diff options
author | gabrielgio <gabriel.giovanini@pm.me> | 2021-07-18 19:56:59 +0200 |
---|---|---|
committer | gabrielgio <gabriel.giovanini@pm.me> | 2021-07-18 19:56:59 +0200 |
commit | 10cbc378ad0daf0e80f5ceed92d70fdbf573df88 (patch) | |
tree | a4217e75f591632ed383e334ed8e61935cd2b096 /importer | |
parent | b453f05d18c261d3ce3b20bb5aaa2504da562756 (diff) | |
download | reddit-nextcloud-importer-10cbc378ad0daf0e80f5ceed92d70fdbf573df88.tar.gz reddit-nextcloud-importer-10cbc378ad0daf0e80f5ceed92d70fdbf573df88.tar.bz2 reddit-nextcloud-importer-10cbc378ad0daf0e80f5ceed92d70fdbf573df88.zip |
ref: Move to OO implementation
Heavily inspired by the `youtube-dl` implementation I moved to OO
implementation where now every source type has its own class, making
easy to add new providers.
Also new it has a fallback back, where if no provider is chose it will
try to download with `YoutubeDlProvideBase`.
Add `_TEST` to each class to make it easy to add test to new providers.
Diffstat (limited to 'importer')
-rw-r--r-- | importer/downloader.py | 155 | ||||
-rw-r--r-- | importer/providers/__init__.py | 10 | ||||
-rw-r--r-- | importer/providers/g_reddit.py | 19 | ||||
-rw-r--r-- | importer/providers/gfycat.py | 9 | ||||
-rw-r--r-- | importer/providers/i_reddit.py | 9 | ||||
-rw-r--r-- | importer/providers/imgur.py | 9 | ||||
-rw-r--r-- | importer/providers/providerbase.py | 30 | ||||
-rw-r--r-- | importer/providers/raw_image_base.py | 27 | ||||
-rw-r--r-- | importer/providers/redgifs.py | 9 | ||||
-rw-r--r-- | importer/providers/v_reddit.py | 10 | ||||
-rw-r--r-- | importer/providers/youtube.py | 10 | ||||
-rw-r--r-- | importer/providers/youtube_dl_base.py | 36 |
12 files changed, 188 insertions, 145 deletions
diff --git a/importer/downloader.py b/importer/downloader.py index c67834b..1dd1684 100644 --- a/importer/downloader.py +++ b/importer/downloader.py @@ -1,157 +1,22 @@ -import os import re -import shutil -from enum import Enum -from urllib.parse import urlparse - -import youtube_dl -import requests from praw import Reddit - -class SourceType(Enum): - VREDDIT = 1 - IREDDIT = 2 - YOUTUBE = 4 - REDGIFS = 5 - IMAGURJPG = 6 - GFYCAT = 7 - GREDDIT = 8 - UNKNOWN = 1000 - - -OUTTMPL = 'source_%(id)s.%(ext)s' +from importer.providers import GReddit, Gfycat, IReddit, Imgur, RedGifs, VReddit, YoutubeDlProviderBase, \ + RawImageProviderBase, Youtube class Downloader: - reddit: Reddit - username: str - downloaded: bool - post_id: str - source_type: SourceType - paths: list[str] + providers = [GReddit, Gfycat, IReddit, Imgur, RedGifs, VReddit, Youtube, RawImageProviderBase, + YoutubeDlProviderBase] def __init__(self, url: str, reddit: Reddit): - self.reddit = reddit - self.downloaded = False + self.Provider = next(filter(lambda x: re.match(x.regex, url), self.providers)) self.url = url - self.source_type = self._get_source_type(url) - self.paths = [] - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.delete() + self.reddit = reddit def download(self): - try: - if self.source_type == SourceType.VREDDIT: - self._download_vreddit() - elif self.source_type == SourceType.REDGIFS: - self._download_redgifs() - elif self.source_type == SourceType.GFYCAT: - self._download_gifycat() - elif self.source_type == SourceType.YOUTUBE: - self._download_youtube() - elif self.source_type in (SourceType.IMAGURJPG, SourceType.IREDDIT): - self._download_raw_file() - elif self.source_type == SourceType.GREDDIT: - self._download_gallery_reddit() - except Exception as e: - self.downloaded = False - - def delete(self): - if self.paths: - for path in self.paths: - if os.path.exists(path): - os.unlink(path) - - def _download_youtube_dls(self, ydl_opts): - with youtube_dl.YoutubeDL(ydl_opts) as ydl: - info = ydl.extract_info(self.url, download=True) - if info.get('_type', None) == 'playlist': - for entry in info['entries']: - r = ydl.prepare_filename(entry) - self.paths.append(f'{os.path.splitext(r)[0]}.mp4') - else: - r = ydl.prepare_filename(info) - self.paths.append(f'{os.path.splitext(r)[0]}.mp4') - - self.downloaded = True - - def _download_redgifs(self): - ydl_opts = { - 'format': 'best', - 'merge_output_format': 'mp4', - 'outtmpl': OUTTMPL - } - self._download_youtube_dls(ydl_opts) - - def _download_gifycat(self): - ydl_opts = { - 'format': 'best', - 'merge_output_format': 'mp4', - 'outtmpl': OUTTMPL - } - self._download_youtube_dls(ydl_opts) - - def _download_vreddit(self): - ydl_opts = { - 'format': 'bestvideo+bestaudio/bestvideo', - 'merge_output_format': 'mp4', - 'outtmpl': OUTTMPL - } - self._download_youtube_dls(ydl_opts) - - def _download_youtube(self): - ydl_opts = { - 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio', - 'merge_output_format': 'mp4', - 'outtmpl': OUTTMPL - } - self._download_youtube_dls(ydl_opts) - - def _download_raw_file(self): - a = urlparse(self.url) - path = f'source_{os.path.basename(a.path)}' - - r = requests.get(self.url, stream=True) - if r.status_code == 200: - self.downloaded = True - with open(path, 'wb') as f: - r.raw.decode_content = True - shutil.copyfileobj(r.raw, f) - self.paths.append(path) - else: - self.downloaded = False - - def _download_gallery_reddit(self): - url = self.url - submission = self.reddit.submission(url=self.url) - for key in submission.media_metadata: - value = submission.media_metadata[key] - self.url = value['s']['u'] - self._download_raw_file() - - self.url = url - - @staticmethod - def _get_source_type(url): - if re.match("^.*v\\.redd\\.it.*$", url): - return SourceType.VREDDIT - if re.match("^.*i\\.redd\\.it.*\\.(jpg|jpeg)$", url): - return SourceType.IREDDIT - if re.match("^.*\\.youtube\\.com.*$", url): - return SourceType.YOUTUBE - if re.match("^.*redgifs\\.com.*$", url): - return SourceType.REDGIFS - if re.match("^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$", url): - return SourceType.IMAGURJPG - if re.match("^.*gfycat.com.*$", url): - return SourceType.GFYCAT - if re.match("^.*www.reddit.com/gallery.*$", url): - return SourceType.GREDDIT - - return SourceType.UNKNOWN + with self.Provider(url=self.url, reddit=self.reddit) as provider: + provider.download() + self.paths = provider.paths + self.downloaded = provider.downloaded diff --git a/importer/providers/__init__.py b/importer/providers/__init__.py new file mode 100644 index 0000000..62c2d85 --- /dev/null +++ b/importer/providers/__init__.py @@ -0,0 +1,10 @@ +from .g_reddit import GReddit +from .providerbase import ProviderBase +from .gfycat import Gfycat +from .i_reddit import IReddit +from .imgur import Imgur +from .raw_image_base import RawImageProviderBase +from .redgifs import RedGifs +from .v_reddit import VReddit +from .youtube import Youtube +from .youtube_dl_base import YoutubeDlProviderBase diff --git a/importer/providers/g_reddit.py b/importer/providers/g_reddit.py new file mode 100644 index 0000000..53ee5df --- /dev/null +++ b/importer/providers/g_reddit.py @@ -0,0 +1,19 @@ +from praw import Reddit + +from importer.providers.raw_image_base import RawImageProviderBase + + +class GReddit(RawImageProviderBase): + regex = "^.*www.reddit.com/gallery.*$" + + def __init__(self, url: str, reddit: Reddit): + super(GReddit, self).__init__(url) + self.reddit = reddit + + def download(self): + submission = self.reddit.submission(url=self.url) + for key in submission.media_metadata: + value = submission.media_metadata[key] + url = value['s']['u'] + path = self._download_raw_file(url) + self.paths.append(path) diff --git a/importer/providers/gfycat.py b/importer/providers/gfycat.py new file mode 100644 index 0000000..70d9c05 --- /dev/null +++ b/importer/providers/gfycat.py @@ -0,0 +1,9 @@ +from importer.providers.youtube_dl_base import YoutubeDlProviderBase + + +class Gfycat(YoutubeDlProviderBase): + regex = "^.*gfycat.com.*$" + _TEST = [{ + "url": "https://gfycat.com/presentdangerousdromedary", + "paths": "source_presentdangerousdromedary.mp4" + }] diff --git a/importer/providers/i_reddit.py b/importer/providers/i_reddit.py new file mode 100644 index 0000000..797ce43 --- /dev/null +++ b/importer/providers/i_reddit.py @@ -0,0 +1,9 @@ +from importer.providers.raw_image_base import RawImageProviderBase + + +class IReddit(RawImageProviderBase): + regex = "^.*i\\.redd\\.it.*\\.(jpg|jpeg)$" + _TEST = [{ + "url": "https://i.redd.it/pjj1ll1b2rr41.jpg", + "paths": ["source_pjj1ll1b2rr41.jpg"] + }] diff --git a/importer/providers/imgur.py b/importer/providers/imgur.py new file mode 100644 index 0000000..dd8fb6e --- /dev/null +++ b/importer/providers/imgur.py @@ -0,0 +1,9 @@ +from importer.providers.raw_image_base import RawImageProviderBase + + +class Imgur(RawImageProviderBase): + regex = "^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$" + _TEST = [{ + "url": "https://i.imgur.com/fXLMjfp.jpg", + "paths": ["source_fXLMjfp.jpg"], + }] diff --git a/importer/providers/providerbase.py b/importer/providers/providerbase.py new file mode 100644 index 0000000..374b9af --- /dev/null +++ b/importer/providers/providerbase.py @@ -0,0 +1,30 @@ +import os +from typing import List + + +class ProviderBase: + paths: List[str] + downloaded: bool + regex: str + + _TEST = [{ + "url": "https://i.imgur.com/fXLMjfp.jpg", + "paths": ["source_fXLMjfp.jpg"], + }] + + def __init__(self, url: str): + self.url = url + self.paths = [] + self.downloaded = False + + def download(self): + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.paths: + for path in self.paths: + if os.path.exists(path): + os.unlink(path) diff --git a/importer/providers/raw_image_base.py b/importer/providers/raw_image_base.py new file mode 100644 index 0000000..267dcad --- /dev/null +++ b/importer/providers/raw_image_base.py @@ -0,0 +1,27 @@ +import os +import shutil +from urllib.parse import urlparse + +import requests + +from importer.providers.providerbase import ProviderBase + + +class RawImageProviderBase(ProviderBase): + regex = "^.*i.(jpg|jpeg|mp4)$" + + def download(self): + path = self._download_raw_file(self.url) + self.paths.append(path) + self.downloaded = True + + @staticmethod + def _download_raw_file(url: str) -> str: + a = urlparse(url) + path = f'source_{os.path.basename(a.path)}' + r = requests.get(url, stream=True) + if r.status_code == 200: + with open(path, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + return path diff --git a/importer/providers/redgifs.py b/importer/providers/redgifs.py new file mode 100644 index 0000000..e15468f --- /dev/null +++ b/importer/providers/redgifs.py @@ -0,0 +1,9 @@ +from importer.providers.youtube_dl_base import YoutubeDlProviderBase + + +class RedGifs(YoutubeDlProviderBase): + regex = "^.*redgifs\\.com.*$" + _TEST = [{ + "url": "https://redgifs.com/watch/ripesnivelingfiddlercrab", + "paths": ["source_RipeSnivelingFiddlercrab.mp4", 'source_RipeSnivelingFiddlercrab-mobile.mp4'] + }] diff --git a/importer/providers/v_reddit.py b/importer/providers/v_reddit.py new file mode 100644 index 0000000..2917fee --- /dev/null +++ b/importer/providers/v_reddit.py @@ -0,0 +1,10 @@ +from importer.providers.youtube_dl_base import YoutubeDlProviderBase + + +class VReddit(YoutubeDlProviderBase): + regex = "^.*v\\.redd\\.it.*$" + format = 'bestvideo+bestaudio/bestvideo' + _TEST = [{ + "url": "https://v.redd.it/42j6r7i8z7151", + "paths": ["source_42j6r7i8z7151.mp4"] + }] diff --git a/importer/providers/youtube.py b/importer/providers/youtube.py new file mode 100644 index 0000000..d880aa0 --- /dev/null +++ b/importer/providers/youtube.py @@ -0,0 +1,10 @@ +from importer.providers.youtube_dl_base import YoutubeDlProviderBase + + +class Youtube(YoutubeDlProviderBase): + regex = "^.*\\.youtube\\.com.*$" + format = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio" + _TEST = [{ + "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew", + "paths": ["source_Wjrrgrvq1ew.mp4"] + }] diff --git a/importer/providers/youtube_dl_base.py b/importer/providers/youtube_dl_base.py new file mode 100644 index 0000000..3bb2fb8 --- /dev/null +++ b/importer/providers/youtube_dl_base.py @@ -0,0 +1,36 @@ +import os + +import youtube_dl + +from importer.providers.providerbase import ProviderBase + + +class YoutubeDlProviderBase(ProviderBase): + regex = ".*" + output_template: str = 'source_%(id)s.%(ext)s' + format: str = "best" + merge_format_output: str = "mp4" + + _TEST = [{ + "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew", + "paths": ["source_Wjrrgrvq1ew.mp4"] + }] + + def download(self): + ydl_opts = { + 'format': self.format, + 'merge_output_format': self.merge_format_output, + 'outtmpl': self.output_template + } + + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(self.url, download=True) + if info.get('_type', None) == 'playlist': + for entry in info['entries']: + r = ydl.prepare_filename(entry) + self.paths.append(f'{os.path.splitext(r)[0]}.mp4') + else: + r = ydl.prepare_filename(info) + self.paths.append(f'{os.path.splitext(r)[0]}.mp4') + + self.downloaded = True |