aboutsummaryrefslogtreecommitdiff
path: root/importer/providers
diff options
context:
space:
mode:
authorgabrielgio <gabriel.giovanini@pm.me>2021-07-18 19:56:59 +0200
committergabrielgio <gabriel.giovanini@pm.me>2021-07-18 19:56:59 +0200
commit10cbc378ad0daf0e80f5ceed92d70fdbf573df88 (patch)
treea4217e75f591632ed383e334ed8e61935cd2b096 /importer/providers
parentb453f05d18c261d3ce3b20bb5aaa2504da562756 (diff)
downloadreddit-nextcloud-importer-10cbc378ad0daf0e80f5ceed92d70fdbf573df88.tar.gz
reddit-nextcloud-importer-10cbc378ad0daf0e80f5ceed92d70fdbf573df88.tar.bz2
reddit-nextcloud-importer-10cbc378ad0daf0e80f5ceed92d70fdbf573df88.zip
ref: Move to OO implementation
Heavily inspired by the `youtube-dl` implementation I moved to OO implementation where now every source type has its own class, making easy to add new providers. Also new it has a fallback back, where if no provider is chose it will try to download with `YoutubeDlProvideBase`. Add `_TEST` to each class to make it easy to add test to new providers.
Diffstat (limited to 'importer/providers')
-rw-r--r--importer/providers/__init__.py10
-rw-r--r--importer/providers/g_reddit.py19
-rw-r--r--importer/providers/gfycat.py9
-rw-r--r--importer/providers/i_reddit.py9
-rw-r--r--importer/providers/imgur.py9
-rw-r--r--importer/providers/providerbase.py30
-rw-r--r--importer/providers/raw_image_base.py27
-rw-r--r--importer/providers/redgifs.py9
-rw-r--r--importer/providers/v_reddit.py10
-rw-r--r--importer/providers/youtube.py10
-rw-r--r--importer/providers/youtube_dl_base.py36
11 files changed, 178 insertions, 0 deletions
diff --git a/importer/providers/__init__.py b/importer/providers/__init__.py
new file mode 100644
index 0000000..62c2d85
--- /dev/null
+++ b/importer/providers/__init__.py
@@ -0,0 +1,10 @@
+from .g_reddit import GReddit
+from .providerbase import ProviderBase
+from .gfycat import Gfycat
+from .i_reddit import IReddit
+from .imgur import Imgur
+from .raw_image_base import RawImageProviderBase
+from .redgifs import RedGifs
+from .v_reddit import VReddit
+from .youtube import Youtube
+from .youtube_dl_base import YoutubeDlProviderBase
diff --git a/importer/providers/g_reddit.py b/importer/providers/g_reddit.py
new file mode 100644
index 0000000..53ee5df
--- /dev/null
+++ b/importer/providers/g_reddit.py
@@ -0,0 +1,19 @@
+from praw import Reddit
+
+from importer.providers.raw_image_base import RawImageProviderBase
+
+
+class GReddit(RawImageProviderBase):
+ regex = "^.*www.reddit.com/gallery.*$"
+
+ def __init__(self, url: str, reddit: Reddit):
+ super(GReddit, self).__init__(url)
+ self.reddit = reddit
+
+ def download(self):
+ submission = self.reddit.submission(url=self.url)
+ for key in submission.media_metadata:
+ value = submission.media_metadata[key]
+ url = value['s']['u']
+ path = self._download_raw_file(url)
+ self.paths.append(path)
diff --git a/importer/providers/gfycat.py b/importer/providers/gfycat.py
new file mode 100644
index 0000000..70d9c05
--- /dev/null
+++ b/importer/providers/gfycat.py
@@ -0,0 +1,9 @@
+from importer.providers.youtube_dl_base import YoutubeDlProviderBase
+
+
+class Gfycat(YoutubeDlProviderBase):
+ regex = "^.*gfycat.com.*$"
+ _TEST = [{
+ "url": "https://gfycat.com/presentdangerousdromedary",
+ "paths": "source_presentdangerousdromedary.mp4"
+ }]
diff --git a/importer/providers/i_reddit.py b/importer/providers/i_reddit.py
new file mode 100644
index 0000000..797ce43
--- /dev/null
+++ b/importer/providers/i_reddit.py
@@ -0,0 +1,9 @@
+from importer.providers.raw_image_base import RawImageProviderBase
+
+
+class IReddit(RawImageProviderBase):
+ regex = "^.*i\\.redd\\.it.*\\.(jpg|jpeg)$"
+ _TEST = [{
+ "url": "https://i.redd.it/pjj1ll1b2rr41.jpg",
+ "paths": ["source_pjj1ll1b2rr41.jpg"]
+ }]
diff --git a/importer/providers/imgur.py b/importer/providers/imgur.py
new file mode 100644
index 0000000..dd8fb6e
--- /dev/null
+++ b/importer/providers/imgur.py
@@ -0,0 +1,9 @@
+from importer.providers.raw_image_base import RawImageProviderBase
+
+
+class Imgur(RawImageProviderBase):
+ regex = "^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$"
+ _TEST = [{
+ "url": "https://i.imgur.com/fXLMjfp.jpg",
+ "paths": ["source_fXLMjfp.jpg"],
+ }]
diff --git a/importer/providers/providerbase.py b/importer/providers/providerbase.py
new file mode 100644
index 0000000..374b9af
--- /dev/null
+++ b/importer/providers/providerbase.py
@@ -0,0 +1,30 @@
+import os
+from typing import List
+
+
+class ProviderBase:
+ paths: List[str]
+ downloaded: bool
+ regex: str
+
+ _TEST = [{
+ "url": "https://i.imgur.com/fXLMjfp.jpg",
+ "paths": ["source_fXLMjfp.jpg"],
+ }]
+
+ def __init__(self, url: str):
+ self.url = url
+ self.paths = []
+ self.downloaded = False
+
+ def download(self):
+ pass
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if self.paths:
+ for path in self.paths:
+ if os.path.exists(path):
+ os.unlink(path)
diff --git a/importer/providers/raw_image_base.py b/importer/providers/raw_image_base.py
new file mode 100644
index 0000000..267dcad
--- /dev/null
+++ b/importer/providers/raw_image_base.py
@@ -0,0 +1,27 @@
+import os
+import shutil
+from urllib.parse import urlparse
+
+import requests
+
+from importer.providers.providerbase import ProviderBase
+
+
+class RawImageProviderBase(ProviderBase):
+ regex = "^.*i.(jpg|jpeg|mp4)$"
+
+ def download(self):
+ path = self._download_raw_file(self.url)
+ self.paths.append(path)
+ self.downloaded = True
+
+ @staticmethod
+ def _download_raw_file(url: str) -> str:
+ a = urlparse(url)
+ path = f'source_{os.path.basename(a.path)}'
+ r = requests.get(url, stream=True)
+ if r.status_code == 200:
+ with open(path, 'wb') as f:
+ r.raw.decode_content = True
+ shutil.copyfileobj(r.raw, f)
+ return path
diff --git a/importer/providers/redgifs.py b/importer/providers/redgifs.py
new file mode 100644
index 0000000..e15468f
--- /dev/null
+++ b/importer/providers/redgifs.py
@@ -0,0 +1,9 @@
+from importer.providers.youtube_dl_base import YoutubeDlProviderBase
+
+
+class RedGifs(YoutubeDlProviderBase):
+ regex = "^.*redgifs\\.com.*$"
+ _TEST = [{
+ "url": "https://redgifs.com/watch/ripesnivelingfiddlercrab",
+ "paths": ["source_RipeSnivelingFiddlercrab.mp4", 'source_RipeSnivelingFiddlercrab-mobile.mp4']
+ }]
diff --git a/importer/providers/v_reddit.py b/importer/providers/v_reddit.py
new file mode 100644
index 0000000..2917fee
--- /dev/null
+++ b/importer/providers/v_reddit.py
@@ -0,0 +1,10 @@
+from importer.providers.youtube_dl_base import YoutubeDlProviderBase
+
+
+class VReddit(YoutubeDlProviderBase):
+ regex = "^.*v\\.redd\\.it.*$"
+ format = 'bestvideo+bestaudio/bestvideo'
+ _TEST = [{
+ "url": "https://v.redd.it/42j6r7i8z7151",
+ "paths": ["source_42j6r7i8z7151.mp4"]
+ }]
diff --git a/importer/providers/youtube.py b/importer/providers/youtube.py
new file mode 100644
index 0000000..d880aa0
--- /dev/null
+++ b/importer/providers/youtube.py
@@ -0,0 +1,10 @@
+from importer.providers.youtube_dl_base import YoutubeDlProviderBase
+
+
+class Youtube(YoutubeDlProviderBase):
+ regex = "^.*\\.youtube\\.com.*$"
+ format = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio"
+ _TEST = [{
+ "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew",
+ "paths": ["source_Wjrrgrvq1ew.mp4"]
+ }]
diff --git a/importer/providers/youtube_dl_base.py b/importer/providers/youtube_dl_base.py
new file mode 100644
index 0000000..3bb2fb8
--- /dev/null
+++ b/importer/providers/youtube_dl_base.py
@@ -0,0 +1,36 @@
+import os
+
+import youtube_dl
+
+from importer.providers.providerbase import ProviderBase
+
+
+class YoutubeDlProviderBase(ProviderBase):
+ regex = ".*"
+ output_template: str = 'source_%(id)s.%(ext)s'
+ format: str = "best"
+ merge_format_output: str = "mp4"
+
+ _TEST = [{
+ "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew",
+ "paths": ["source_Wjrrgrvq1ew.mp4"]
+ }]
+
+ def download(self):
+ ydl_opts = {
+ 'format': self.format,
+ 'merge_output_format': self.merge_format_output,
+ 'outtmpl': self.output_template
+ }
+
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(self.url, download=True)
+ if info.get('_type', None) == 'playlist':
+ for entry in info['entries']:
+ r = ydl.prepare_filename(entry)
+ self.paths.append(f'{os.path.splitext(r)[0]}.mp4')
+ else:
+ r = ydl.prepare_filename(info)
+ self.paths.append(f'{os.path.splitext(r)[0]}.mp4')
+
+ self.downloaded = True