From 10cbc378ad0daf0e80f5ceed92d70fdbf573df88 Mon Sep 17 00:00:00 2001 From: gabrielgio Date: Sun, 18 Jul 2021 19:56:59 +0200 Subject: ref: Move to OO implementation Heavily inspired by the `youtube-dl` implementation I moved to OO implementation where now every source type has its own class, making easy to add new providers. Also new it has a fallback back, where if no provider is chose it will try to download with `YoutubeDlProvideBase`. Add `_TEST` to each class to make it easy to add test to new providers. --- importer/providers/__init__.py | 10 ++++++++++ importer/providers/g_reddit.py | 19 ++++++++++++++++++ importer/providers/gfycat.py | 9 +++++++++ importer/providers/i_reddit.py | 9 +++++++++ importer/providers/imgur.py | 9 +++++++++ importer/providers/providerbase.py | 30 +++++++++++++++++++++++++++++ importer/providers/raw_image_base.py | 27 ++++++++++++++++++++++++++ importer/providers/redgifs.py | 9 +++++++++ importer/providers/v_reddit.py | 10 ++++++++++ importer/providers/youtube.py | 10 ++++++++++ importer/providers/youtube_dl_base.py | 36 +++++++++++++++++++++++++++++++++++ 11 files changed, 178 insertions(+) create mode 100644 importer/providers/__init__.py create mode 100644 importer/providers/g_reddit.py create mode 100644 importer/providers/gfycat.py create mode 100644 importer/providers/i_reddit.py create mode 100644 importer/providers/imgur.py create mode 100644 importer/providers/providerbase.py create mode 100644 importer/providers/raw_image_base.py create mode 100644 importer/providers/redgifs.py create mode 100644 importer/providers/v_reddit.py create mode 100644 importer/providers/youtube.py create mode 100644 importer/providers/youtube_dl_base.py (limited to 'importer/providers') diff --git a/importer/providers/__init__.py b/importer/providers/__init__.py new file mode 100644 index 0000000..62c2d85 --- /dev/null +++ b/importer/providers/__init__.py @@ -0,0 +1,10 @@ +from .g_reddit import GReddit +from .providerbase import ProviderBase +from .gfycat import Gfycat +from .i_reddit import IReddit +from .imgur import Imgur +from .raw_image_base import RawImageProviderBase +from .redgifs import RedGifs +from .v_reddit import VReddit +from .youtube import Youtube +from .youtube_dl_base import YoutubeDlProviderBase diff --git a/importer/providers/g_reddit.py b/importer/providers/g_reddit.py new file mode 100644 index 0000000..53ee5df --- /dev/null +++ b/importer/providers/g_reddit.py @@ -0,0 +1,19 @@ +from praw import Reddit + +from importer.providers.raw_image_base import RawImageProviderBase + + +class GReddit(RawImageProviderBase): + regex = "^.*www.reddit.com/gallery.*$" + + def __init__(self, url: str, reddit: Reddit): + super(GReddit, self).__init__(url) + self.reddit = reddit + + def download(self): + submission = self.reddit.submission(url=self.url) + for key in submission.media_metadata: + value = submission.media_metadata[key] + url = value['s']['u'] + path = self._download_raw_file(url) + self.paths.append(path) diff --git a/importer/providers/gfycat.py b/importer/providers/gfycat.py new file mode 100644 index 0000000..70d9c05 --- /dev/null +++ b/importer/providers/gfycat.py @@ -0,0 +1,9 @@ +from importer.providers.youtube_dl_base import YoutubeDlProviderBase + + +class Gfycat(YoutubeDlProviderBase): + regex = "^.*gfycat.com.*$" + _TEST = [{ + "url": "https://gfycat.com/presentdangerousdromedary", + "paths": "source_presentdangerousdromedary.mp4" + }] diff --git a/importer/providers/i_reddit.py b/importer/providers/i_reddit.py new file mode 100644 index 0000000..797ce43 --- /dev/null +++ b/importer/providers/i_reddit.py @@ -0,0 +1,9 @@ +from importer.providers.raw_image_base import RawImageProviderBase + + +class IReddit(RawImageProviderBase): + regex = "^.*i\\.redd\\.it.*\\.(jpg|jpeg)$" + _TEST = [{ + "url": "https://i.redd.it/pjj1ll1b2rr41.jpg", + "paths": ["source_pjj1ll1b2rr41.jpg"] + }] diff --git a/importer/providers/imgur.py b/importer/providers/imgur.py new file mode 100644 index 0000000..dd8fb6e --- /dev/null +++ b/importer/providers/imgur.py @@ -0,0 +1,9 @@ +from importer.providers.raw_image_base import RawImageProviderBase + + +class Imgur(RawImageProviderBase): + regex = "^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$" + _TEST = [{ + "url": "https://i.imgur.com/fXLMjfp.jpg", + "paths": ["source_fXLMjfp.jpg"], + }] diff --git a/importer/providers/providerbase.py b/importer/providers/providerbase.py new file mode 100644 index 0000000..374b9af --- /dev/null +++ b/importer/providers/providerbase.py @@ -0,0 +1,30 @@ +import os +from typing import List + + +class ProviderBase: + paths: List[str] + downloaded: bool + regex: str + + _TEST = [{ + "url": "https://i.imgur.com/fXLMjfp.jpg", + "paths": ["source_fXLMjfp.jpg"], + }] + + def __init__(self, url: str): + self.url = url + self.paths = [] + self.downloaded = False + + def download(self): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.paths: + for path in self.paths: + if os.path.exists(path): + os.unlink(path) diff --git a/importer/providers/raw_image_base.py b/importer/providers/raw_image_base.py new file mode 100644 index 0000000..267dcad --- /dev/null +++ b/importer/providers/raw_image_base.py @@ -0,0 +1,27 @@ +import os +import shutil +from urllib.parse import urlparse + +import requests + +from importer.providers.providerbase import ProviderBase + + +class RawImageProviderBase(ProviderBase): + regex = "^.*i.(jpg|jpeg|mp4)$" + + def download(self): + path = self._download_raw_file(self.url) + self.paths.append(path) + self.downloaded = True + + @staticmethod + def _download_raw_file(url: str) -> str: + a = urlparse(url) + path = f'source_{os.path.basename(a.path)}' + r = requests.get(url, stream=True) + if r.status_code == 200: + with open(path, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + return path diff --git a/importer/providers/redgifs.py b/importer/providers/redgifs.py new file mode 100644 index 0000000..e15468f --- /dev/null +++ b/importer/providers/redgifs.py @@ -0,0 +1,9 @@ +from importer.providers.youtube_dl_base import YoutubeDlProviderBase + + +class RedGifs(YoutubeDlProviderBase): + regex = "^.*redgifs\\.com.*$" + _TEST = [{ + "url": "https://redgifs.com/watch/ripesnivelingfiddlercrab", + "paths": ["source_RipeSnivelingFiddlercrab.mp4", 'source_RipeSnivelingFiddlercrab-mobile.mp4'] + }] diff --git a/importer/providers/v_reddit.py b/importer/providers/v_reddit.py new file mode 100644 index 0000000..2917fee --- /dev/null +++ b/importer/providers/v_reddit.py @@ -0,0 +1,10 @@ +from importer.providers.youtube_dl_base import YoutubeDlProviderBase + + +class VReddit(YoutubeDlProviderBase): + regex = "^.*v\\.redd\\.it.*$" + format = 'bestvideo+bestaudio/bestvideo' + _TEST = [{ + "url": "https://v.redd.it/42j6r7i8z7151", + "paths": ["source_42j6r7i8z7151.mp4"] + }] diff --git a/importer/providers/youtube.py b/importer/providers/youtube.py new file mode 100644 index 0000000..d880aa0 --- /dev/null +++ b/importer/providers/youtube.py @@ -0,0 +1,10 @@ +from importer.providers.youtube_dl_base import YoutubeDlProviderBase + + +class Youtube(YoutubeDlProviderBase): + regex = "^.*\\.youtube\\.com.*$" + format = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio" + _TEST = [{ + "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew", + "paths": ["source_Wjrrgrvq1ew.mp4"] + }] diff --git a/importer/providers/youtube_dl_base.py b/importer/providers/youtube_dl_base.py new file mode 100644 index 0000000..3bb2fb8 --- /dev/null +++ b/importer/providers/youtube_dl_base.py @@ -0,0 +1,36 @@ +import os + +import youtube_dl + +from importer.providers.providerbase import ProviderBase + + +class YoutubeDlProviderBase(ProviderBase): + regex = ".*" + output_template: str = 'source_%(id)s.%(ext)s' + format: str = "best" + merge_format_output: str = "mp4" + + _TEST = [{ + "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew", + "paths": ["source_Wjrrgrvq1ew.mp4"] + }] + + def download(self): + ydl_opts = { + 'format': self.format, + 'merge_output_format': self.merge_format_output, + 'outtmpl': self.output_template + } + + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(self.url, download=True) + if info.get('_type', None) == 'playlist': + for entry in info['entries']: + r = ydl.prepare_filename(entry) + self.paths.append(f'{os.path.splitext(r)[0]}.mp4') + else: + r = ydl.prepare_filename(info) + self.paths.append(f'{os.path.splitext(r)[0]}.mp4') + + self.downloaded = True -- cgit v1.2.3