From b8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda Mon Sep 17 00:00:00 2001 From: gabrielgio Date: Tue, 27 Jul 2021 23:15:53 +0200 Subject: feat: Move to gallery-dl As it turns out there is already a project that does what I want but better. --- .gitlab-ci.yml | 2 +- importer/__init__.py | 0 importer/downloader.py | 19 ----------- importer/providers/__init__.py | 10 ------ importer/providers/g_reddit.py | 26 --------------- importer/providers/gfycat.py | 9 ------ importer/providers/i_reddit.py | 15 --------- importer/providers/imgur.py | 9 ------ importer/providers/providerbase.py | 30 ----------------- importer/providers/raw_image_base.py | 27 ---------------- importer/providers/redgifs.py | 9 ------ importer/providers/v_reddit.py | 10 ------ importer/providers/youtube.py | 10 ------ importer/providers/youtube_dl_base.py | 37 --------------------- main.py | 44 +++++++++++++++++++------ requirements.txt | 1 + test/test_providers.py | 61 ----------------------------------- 17 files changed, 36 insertions(+), 283 deletions(-) delete mode 100644 importer/__init__.py delete mode 100644 importer/downloader.py delete mode 100644 importer/providers/__init__.py delete mode 100644 importer/providers/g_reddit.py delete mode 100644 importer/providers/gfycat.py delete mode 100644 importer/providers/i_reddit.py delete mode 100644 importer/providers/imgur.py delete mode 100644 importer/providers/providerbase.py delete mode 100644 importer/providers/raw_image_base.py delete mode 100644 importer/providers/redgifs.py delete mode 100644 importer/providers/v_reddit.py delete mode 100644 importer/providers/youtube.py delete mode 100644 importer/providers/youtube_dl_base.py delete mode 100644 test/test_providers.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4291a2e..5e43e8d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,7 +38,7 @@ test: stage: test script: - pip install -r dev-requirements.txt - - pytest -n $(nproc) --junitxml=report.xml --cov=importer + - pytest --junitxml=report.xml --cov=importer artifacts: reports: junit: report.xml diff --git a/importer/__init__.py b/importer/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/importer/downloader.py b/importer/downloader.py deleted file mode 100644 index 290c9e2..0000000 --- a/importer/downloader.py +++ /dev/null @@ -1,19 +0,0 @@ -import re - -from praw import Reddit - -from importer.providers import GReddit, Gfycat, IReddit, Imgur, RedGifs, VReddit, YoutubeDlProviderBase, \ - RawImageProviderBase, Youtube - - -class Downloader: - providers = [GReddit, Gfycat, IReddit, Imgur, RedGifs, VReddit, Youtube, RawImageProviderBase, - YoutubeDlProviderBase] - - def __init__(self, url: str, reddit: Reddit): - self.Provider = next(filter(lambda x: re.match(x.regex, url), self.providers)) - self.url = url - self.reddit = reddit - - def provider(self): - return self.Provider(url=self.url, reddit=self.reddit) diff --git a/importer/providers/__init__.py b/importer/providers/__init__.py deleted file mode 100644 index 62c2d85..0000000 --- a/importer/providers/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .g_reddit import GReddit -from .providerbase import ProviderBase -from .gfycat import Gfycat -from .i_reddit import IReddit -from .imgur import Imgur -from .raw_image_base import RawImageProviderBase -from .redgifs import RedGifs -from .v_reddit import VReddit -from .youtube import Youtube -from .youtube_dl_base import YoutubeDlProviderBase diff --git a/importer/providers/g_reddit.py b/importer/providers/g_reddit.py deleted file mode 100644 index e0341e0..0000000 --- a/importer/providers/g_reddit.py +++ /dev/null @@ -1,26 +0,0 @@ -from praw import Reddit - -from importer.providers.raw_image_base import RawImageProviderBase - - -class GReddit(RawImageProviderBase): - regex = "^.*www.reddit.com/gallery.*$" - _TEST = [{ - "url": "https://www.reddit.com/gallery/mik7c9", - "paths": ['source_hlokpsyhgrq61.jpg', 'source_n31c2y7igrq61.jpg', 'source_7eg0o76igrq61.jpg', - 'source_whl12jbigrq61.jpg', 'source_4uok762igrq61.jpg', 'source_t3pgm64igrq61.jpg', - 'source_ymc4hv9igrq61.jpg'] - }] - - def __init__(self, url: str, reddit: Reddit): - super(GReddit, self).__init__(url) - self.reddit = reddit - - def download(self): - submission = self.reddit.submission(url=self.url) - for key in submission.media_metadata: - value = submission.media_metadata[key] - url = value['s']['u'] - path = self._download_raw_file(url) - self.paths.append(path) - self.downloaded = True diff --git a/importer/providers/gfycat.py b/importer/providers/gfycat.py deleted file mode 100644 index 70d9c05..0000000 --- a/importer/providers/gfycat.py +++ /dev/null @@ -1,9 +0,0 @@ -from importer.providers.youtube_dl_base import YoutubeDlProviderBase - - -class Gfycat(YoutubeDlProviderBase): - regex = "^.*gfycat.com.*$" - _TEST = [{ - "url": "https://gfycat.com/presentdangerousdromedary", - "paths": "source_presentdangerousdromedary.mp4" - }] diff --git a/importer/providers/i_reddit.py b/importer/providers/i_reddit.py deleted file mode 100644 index 13fc70e..0000000 --- a/importer/providers/i_reddit.py +++ /dev/null @@ -1,15 +0,0 @@ -from importer.providers.raw_image_base import RawImageProviderBase - - -class IReddit(RawImageProviderBase): - regex = "^.*i\\.redd\\.it.*\\.(jpg|jpeg|gif)$" - _TEST = [ - { - "url": "https://i.redd.it/pjj1ll1b2rr41.jpg", - "paths": ["source_pjj1ll1b2rr41.jpg"] - }, - { - "url": "https://i.redd.it/55vpi6ol5jc71.gif", - "paths": ['source_55vpi6ol5jc71.gif'] - } - ] diff --git a/importer/providers/imgur.py b/importer/providers/imgur.py deleted file mode 100644 index dd8fb6e..0000000 --- a/importer/providers/imgur.py +++ /dev/null @@ -1,9 +0,0 @@ -from importer.providers.raw_image_base import RawImageProviderBase - - -class Imgur(RawImageProviderBase): - regex = "^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$" - _TEST = [{ - "url": "https://i.imgur.com/fXLMjfp.jpg", - "paths": ["source_fXLMjfp.jpg"], - }] diff --git a/importer/providers/providerbase.py b/importer/providers/providerbase.py deleted file mode 100644 index 51b525e..0000000 --- a/importer/providers/providerbase.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import List - - -class ProviderBase: - paths: List[str] - downloaded: bool - regex: str - - _TEST = [{ - "url": "https://i.imgur.com/fXLMjfp.jpg", - "paths": ["source_fXLMjfp.jpg"], - }] - - def __init__(self, url: str, **kwargs): - self.url = url - self.paths = [] - self.downloaded = False - - def download(self): - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.paths: - for path in self.paths: - if os.path.exists(path): - os.unlink(path) diff --git a/importer/providers/raw_image_base.py b/importer/providers/raw_image_base.py deleted file mode 100644 index 89ecfca..0000000 --- a/importer/providers/raw_image_base.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -import shutil -from urllib.parse import urlparse - -import requests - -from importer.providers.providerbase import ProviderBase - - -class RawImageProviderBase(ProviderBase): - regex = "^.*.(jpg|jpeg|mp4|gif)$" - - def download(self): - path = self._download_raw_file(self.url) - self.paths.append(path) - self.downloaded = True - - @staticmethod - def _download_raw_file(url: str) -> str: - a = urlparse(url) - path = f'source_{os.path.basename(a.path)}' - r = requests.get(url, stream=True) - if r.status_code == 200: - with open(path, 'wb') as f: - r.raw.decode_content = True - shutil.copyfileobj(r.raw, f) - return path diff --git a/importer/providers/redgifs.py b/importer/providers/redgifs.py deleted file mode 100644 index e15468f..0000000 --- a/importer/providers/redgifs.py +++ /dev/null @@ -1,9 +0,0 @@ -from importer.providers.youtube_dl_base import YoutubeDlProviderBase - - -class RedGifs(YoutubeDlProviderBase): - regex = "^.*redgifs\\.com.*$" - _TEST = [{ - "url": "https://redgifs.com/watch/ripesnivelingfiddlercrab", - "paths": ["source_RipeSnivelingFiddlercrab.mp4", 'source_RipeSnivelingFiddlercrab-mobile.mp4'] - }] diff --git a/importer/providers/v_reddit.py b/importer/providers/v_reddit.py deleted file mode 100644 index 2917fee..0000000 --- a/importer/providers/v_reddit.py +++ /dev/null @@ -1,10 +0,0 @@ -from importer.providers.youtube_dl_base import YoutubeDlProviderBase - - -class VReddit(YoutubeDlProviderBase): - regex = "^.*v\\.redd\\.it.*$" - format = 'bestvideo+bestaudio/bestvideo' - _TEST = [{ - "url": "https://v.redd.it/42j6r7i8z7151", - "paths": ["source_42j6r7i8z7151.mp4"] - }] diff --git a/importer/providers/youtube.py b/importer/providers/youtube.py deleted file mode 100644 index d880aa0..0000000 --- a/importer/providers/youtube.py +++ /dev/null @@ -1,10 +0,0 @@ -from importer.providers.youtube_dl_base import YoutubeDlProviderBase - - -class Youtube(YoutubeDlProviderBase): - regex = "^.*\\.youtube\\.com.*$" - format = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio" - _TEST = [{ - "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew", - "paths": ["source_Wjrrgrvq1ew.mp4"] - }] diff --git a/importer/providers/youtube_dl_base.py b/importer/providers/youtube_dl_base.py deleted file mode 100644 index 3d67738..0000000 --- a/importer/providers/youtube_dl_base.py +++ /dev/null @@ -1,37 +0,0 @@ -import os - -import youtube_dl - -from importer.providers.providerbase import ProviderBase - - -class YoutubeDlProviderBase(ProviderBase): - regex = ".*" - output_template: str = 'source_%(id)s.%(ext)s' - format: str = "best" - merge_format_output: str = "mp4" - - _TEST = [{ - "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew", - "paths": ["source_Wjrrgrvq1ew.mp4"] - }] - - def download(self): - ydl_opts = { - 'quiet': True, - 'format': self.format, - 'merge_output_format': self.merge_format_output, - 'outtmpl': self.output_template - } - - with youtube_dl.YoutubeDL(ydl_opts) as ydl: - info = ydl.extract_info(self.url, download=True) - if info.get('_type', None) == 'playlist': - for entry in info['entries']: - r = ydl.prepare_filename(entry) - self.paths.append(f'{os.path.splitext(r)[0]}.{entry["ext"]}') - else: - r = ydl.prepare_filename(info) - self.paths.append(f'{os.path.splitext(r)[0]}.{info["ext"]}') - - self.downloaded = True diff --git a/main.py b/main.py index 036aab3..2f34b3a 100644 --- a/main.py +++ b/main.py @@ -7,8 +7,8 @@ import praw from nextcloud import NextCloud from praw.models.util import stream_generator +from gallery_dl.job import DownloadJob -from importer.downloader import Downloader from importer.uploader import upload_file, create_folders levels = { @@ -55,6 +55,33 @@ parser.add_argument('-l', '--log-level', choices=levels.keys(), help=f'it will set log level.') + +def get_list_of_files(dirName): + # create a list of file and sub directories + # names in the given directory + listOfFile = os.listdir(dirName) + allFiles = list() + # Iterate over all the entries + for entry in listOfFile: + # Create full path + fullPath = os.path.join(dirName, entry) + # If entry is a directory then get the list of files in this directory + if os.path.isdir(fullPath): + allFiles = allFiles + get_list_of_files(fullPath) + else: + allFiles.append(fullPath) + + return allFiles + + +def download(url) -> [str]: + d = DownloadJob(url=url) + d.run() + basedir = d.pathfmt.basedirectory + files = get_list_of_files(basedir) + return files + + if __name__ == "__main__": args = parser.parse_args() @@ -81,15 +108,12 @@ if __name__ == "__main__": try: url = post.url create_folders(f"{args.nextcloud_path}/{post.subreddit}/", nxc) - d = Downloader(url=url, reddit=reddit) - with d.provider() as provider: - provider.download() - logging.info(f"{post.id} from {post.subreddit} downloaded") - for path in provider.paths: - if "-mobile" in path: # Remove mobile version - continue - upload_file(path, f"{args.nextcloud_path}/{post.subreddit}/{path}", nxc) - logging.info(f"{path} uploaded") + logging.info(f"{post.id} from {post.subreddit} downloaded") + for path in download(url): + filename = os.path.basename(path) + upload_file(path, f"{args.nextcloud_path}/{post.subreddit}/{filename}", nxc) + os.unlink(path) + logging.info(f"{path} uploaded") except Exception as e: logging.error(e) diff --git a/requirements.txt b/requirements.txt index 896d94d..a80d705 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ requests==2.26.0 praw==7.3.0 jsonpickle==2.0.0 nextcloud-api-wrapper==0.2.1.5 +gallery-dl==1.18.2 \ No newline at end of file diff --git a/test/test_providers.py b/test/test_providers.py deleted file mode 100644 index 32d81bd..0000000 --- a/test/test_providers.py +++ /dev/null @@ -1,61 +0,0 @@ -import os - -import praw -import pytest - -import importer.providers as providers - -reddit_env = pytest.mark.skipif( - os.environ.get('CLIENT_ID', '') == '' or - os.environ.get('CLIENT_SECRET', '') == '' or - os.environ.get('USERNAME', '') == '' or - os.environ.get('PASSWORD', '') == '' - , reason="Require reddit env variables to be set." -) - - -@pytest.fixture -def mock_ydl_download(mocker): - # this function is responsible for downloading the file - return mocker.patch('importer.providers.youtube_dl_base.youtube_dl.YoutubeDL.process_info') - - -@pytest.mark.parametrize("provider", - [ - providers.IReddit, - providers.Imgur, - providers.RawImageProviderBase, - providers.RedGifs, - providers.Youtube, - providers.YoutubeDlProviderBase - ]) -def test_provider(provider, mock_ydl_download): - for test in provider._TEST: - with provider(url=test['url']) as p: - p.download() - assert p.downloaded - assert p.paths == test['paths'] - - -@reddit_env -@pytest.mark.parametrize("provider", - [ - providers.GReddit - ]) -def test_provider_with_reddit(provider, mock_ydl_download): - username = os.environ.get('USERNAME', '') - password = os.environ.get('PASSWORD', '') - client_id = os.environ.get('CLIENT_ID', '') - client_secret = os.environ.get('CLIENT_SECRET', '') - - reddit = praw.Reddit(client_id=client_id, - client_secret=client_secret, - password=password, - user_agent="reddit-nextcloud-importer", - username=username) - - for test in provider._TEST: - with provider(url=test['url'], reddit=reddit) as p: - p.download() - assert p.downloaded - assert p.paths == test['paths'] -- cgit v1.2.3