diff options
| author | gabrielgio <gabriel.giovanini@pm.me> | 2021-07-27 23:15:53 +0200 | 
|---|---|---|
| committer | gabrielgio <gabriel.giovanini@pm.me> | 2021-07-27 23:28:51 +0200 | 
| commit | b8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda (patch) | |
| tree | 6652fd8b3e89b652791167e73b293b57d0b6496b | |
| parent | 3d54b3d91d0c175feae82c413fd0139545d46e2a (diff) | |
| download | reddit-nextcloud-importer-b8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda.tar.gz reddit-nextcloud-importer-b8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda.tar.bz2 reddit-nextcloud-importer-b8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda.zip | |
feat: Move to gallery-dl
As it turns out there is already a project that does what I want but
better.
| -rw-r--r-- | .gitlab-ci.yml | 2 | ||||
| -rw-r--r-- | importer/__init__.py | 0 | ||||
| -rw-r--r-- | importer/downloader.py | 19 | ||||
| -rw-r--r-- | importer/providers/__init__.py | 10 | ||||
| -rw-r--r-- | importer/providers/g_reddit.py | 26 | ||||
| -rw-r--r-- | importer/providers/gfycat.py | 9 | ||||
| -rw-r--r-- | importer/providers/i_reddit.py | 15 | ||||
| -rw-r--r-- | importer/providers/imgur.py | 9 | ||||
| -rw-r--r-- | importer/providers/providerbase.py | 30 | ||||
| -rw-r--r-- | importer/providers/raw_image_base.py | 27 | ||||
| -rw-r--r-- | importer/providers/redgifs.py | 9 | ||||
| -rw-r--r-- | importer/providers/v_reddit.py | 10 | ||||
| -rw-r--r-- | importer/providers/youtube.py | 10 | ||||
| -rw-r--r-- | importer/providers/youtube_dl_base.py | 37 | ||||
| -rw-r--r-- | main.py | 44 | ||||
| -rw-r--r-- | requirements.txt | 1 | ||||
| -rw-r--r-- | test/test_providers.py | 61 | 
17 files changed, 36 insertions, 283 deletions
| diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4291a2e..5e43e8d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,7 +38,7 @@ test:    stage: test    script:      - pip install -r dev-requirements.txt -    - pytest -n $(nproc) --junitxml=report.xml --cov=importer +    - pytest --junitxml=report.xml --cov=importer    artifacts:      reports:        junit: report.xml diff --git a/importer/__init__.py b/importer/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/importer/__init__.py +++ /dev/null diff --git a/importer/downloader.py b/importer/downloader.py deleted file mode 100644 index 290c9e2..0000000 --- a/importer/downloader.py +++ /dev/null @@ -1,19 +0,0 @@ -import re - -from praw import Reddit - -from importer.providers import GReddit, Gfycat, IReddit, Imgur, RedGifs, VReddit, YoutubeDlProviderBase, \ -    RawImageProviderBase, Youtube - - -class Downloader: -    providers = [GReddit, Gfycat, IReddit, Imgur, RedGifs, VReddit, Youtube, RawImageProviderBase, -                 YoutubeDlProviderBase] - -    def __init__(self, url: str, reddit: Reddit): -        self.Provider = next(filter(lambda x: re.match(x.regex, url), self.providers)) -        self.url = url -        self.reddit = reddit - -    def provider(self): -        return self.Provider(url=self.url, reddit=self.reddit) diff --git a/importer/providers/__init__.py b/importer/providers/__init__.py deleted file mode 100644 index 62c2d85..0000000 --- a/importer/providers/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .g_reddit import GReddit -from .providerbase import ProviderBase -from .gfycat import Gfycat -from .i_reddit import IReddit -from .imgur import Imgur -from .raw_image_base import RawImageProviderBase -from .redgifs import RedGifs -from .v_reddit import VReddit -from .youtube import Youtube -from .youtube_dl_base import YoutubeDlProviderBase diff --git a/importer/providers/g_reddit.py b/importer/providers/g_reddit.py deleted file mode 100644 index e0341e0..0000000 --- a/importer/providers/g_reddit.py +++ /dev/null @@ -1,26 +0,0 @@ -from praw import Reddit - -from importer.providers.raw_image_base import RawImageProviderBase - - -class GReddit(RawImageProviderBase): -    regex = "^.*www.reddit.com/gallery.*$" -    _TEST = [{ -        "url": "https://www.reddit.com/gallery/mik7c9", -        "paths": ['source_hlokpsyhgrq61.jpg', 'source_n31c2y7igrq61.jpg', 'source_7eg0o76igrq61.jpg', -                  'source_whl12jbigrq61.jpg', 'source_4uok762igrq61.jpg', 'source_t3pgm64igrq61.jpg', -                  'source_ymc4hv9igrq61.jpg'] -    }] - -    def __init__(self, url: str, reddit: Reddit): -        super(GReddit, self).__init__(url) -        self.reddit = reddit - -    def download(self): -        submission = self.reddit.submission(url=self.url) -        for key in submission.media_metadata: -            value = submission.media_metadata[key] -            url = value['s']['u'] -            path = self._download_raw_file(url) -            self.paths.append(path) -        self.downloaded = True diff --git a/importer/providers/gfycat.py b/importer/providers/gfycat.py deleted file mode 100644 index 70d9c05..0000000 --- a/importer/providers/gfycat.py +++ /dev/null @@ -1,9 +0,0 @@ -from importer.providers.youtube_dl_base import YoutubeDlProviderBase - - -class Gfycat(YoutubeDlProviderBase): -    regex = "^.*gfycat.com.*$" -    _TEST = [{ -        "url": "https://gfycat.com/presentdangerousdromedary", -        "paths": "source_presentdangerousdromedary.mp4" -    }] diff --git a/importer/providers/i_reddit.py b/importer/providers/i_reddit.py deleted file mode 100644 index 13fc70e..0000000 --- a/importer/providers/i_reddit.py +++ /dev/null @@ -1,15 +0,0 @@ -from importer.providers.raw_image_base import RawImageProviderBase - - -class IReddit(RawImageProviderBase): -    regex = "^.*i\\.redd\\.it.*\\.(jpg|jpeg|gif)$" -    _TEST = [ -        { -            "url": "https://i.redd.it/pjj1ll1b2rr41.jpg", -            "paths": ["source_pjj1ll1b2rr41.jpg"] -        }, -        { -            "url": "https://i.redd.it/55vpi6ol5jc71.gif", -            "paths": ['source_55vpi6ol5jc71.gif'] -        } -    ] diff --git a/importer/providers/imgur.py b/importer/providers/imgur.py deleted file mode 100644 index dd8fb6e..0000000 --- a/importer/providers/imgur.py +++ /dev/null @@ -1,9 +0,0 @@ -from importer.providers.raw_image_base import RawImageProviderBase - - -class Imgur(RawImageProviderBase): -    regex = "^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$" -    _TEST = [{ -        "url": "https://i.imgur.com/fXLMjfp.jpg", -        "paths": ["source_fXLMjfp.jpg"], -    }] diff --git a/importer/providers/providerbase.py b/importer/providers/providerbase.py deleted file mode 100644 index 51b525e..0000000 --- a/importer/providers/providerbase.py +++ /dev/null @@ -1,30 +0,0 @@ -import os -from typing import List - - -class ProviderBase: -    paths: List[str] -    downloaded: bool -    regex: str - -    _TEST = [{ -        "url": "https://i.imgur.com/fXLMjfp.jpg", -        "paths": ["source_fXLMjfp.jpg"], -    }] - -    def __init__(self, url: str, **kwargs): -        self.url = url -        self.paths = [] -        self.downloaded = False - -    def download(self): -        pass - -    def __enter__(self): -        return self - -    def __exit__(self, exc_type, exc_val, exc_tb): -        if self.paths: -            for path in self.paths: -                if os.path.exists(path): -                    os.unlink(path) diff --git a/importer/providers/raw_image_base.py b/importer/providers/raw_image_base.py deleted file mode 100644 index 89ecfca..0000000 --- a/importer/providers/raw_image_base.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -import shutil -from urllib.parse import urlparse - -import requests - -from importer.providers.providerbase import ProviderBase - - -class RawImageProviderBase(ProviderBase): -    regex = "^.*.(jpg|jpeg|mp4|gif)$" - -    def download(self): -        path = self._download_raw_file(self.url) -        self.paths.append(path) -        self.downloaded = True - -    @staticmethod -    def _download_raw_file(url: str) -> str: -        a = urlparse(url) -        path = f'source_{os.path.basename(a.path)}' -        r = requests.get(url, stream=True) -        if r.status_code == 200: -            with open(path, 'wb') as f: -                r.raw.decode_content = True -                shutil.copyfileobj(r.raw, f) -        return path diff --git a/importer/providers/redgifs.py b/importer/providers/redgifs.py deleted file mode 100644 index e15468f..0000000 --- a/importer/providers/redgifs.py +++ /dev/null @@ -1,9 +0,0 @@ -from importer.providers.youtube_dl_base import YoutubeDlProviderBase - - -class RedGifs(YoutubeDlProviderBase): -    regex = "^.*redgifs\\.com.*$" -    _TEST = [{ -        "url": "https://redgifs.com/watch/ripesnivelingfiddlercrab", -        "paths": ["source_RipeSnivelingFiddlercrab.mp4", 'source_RipeSnivelingFiddlercrab-mobile.mp4'] -    }] diff --git a/importer/providers/v_reddit.py b/importer/providers/v_reddit.py deleted file mode 100644 index 2917fee..0000000 --- a/importer/providers/v_reddit.py +++ /dev/null @@ -1,10 +0,0 @@ -from importer.providers.youtube_dl_base import YoutubeDlProviderBase - - -class VReddit(YoutubeDlProviderBase): -    regex = "^.*v\\.redd\\.it.*$" -    format = 'bestvideo+bestaudio/bestvideo' -    _TEST = [{ -        "url": "https://v.redd.it/42j6r7i8z7151", -        "paths": ["source_42j6r7i8z7151.mp4"] -    }] diff --git a/importer/providers/youtube.py b/importer/providers/youtube.py deleted file mode 100644 index d880aa0..0000000 --- a/importer/providers/youtube.py +++ /dev/null @@ -1,10 +0,0 @@ -from importer.providers.youtube_dl_base import YoutubeDlProviderBase - - -class Youtube(YoutubeDlProviderBase): -    regex = "^.*\\.youtube\\.com.*$" -    format = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio" -    _TEST = [{ -        "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew", -        "paths": ["source_Wjrrgrvq1ew.mp4"] -    }] diff --git a/importer/providers/youtube_dl_base.py b/importer/providers/youtube_dl_base.py deleted file mode 100644 index 3d67738..0000000 --- a/importer/providers/youtube_dl_base.py +++ /dev/null @@ -1,37 +0,0 @@ -import os - -import youtube_dl - -from importer.providers.providerbase import ProviderBase - - -class YoutubeDlProviderBase(ProviderBase): -    regex = ".*" -    output_template: str = 'source_%(id)s.%(ext)s' -    format: str = "best" -    merge_format_output: str = "mp4" - -    _TEST = [{ -        "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew", -        "paths": ["source_Wjrrgrvq1ew.mp4"] -    }] - -    def download(self): -        ydl_opts = { -            'quiet': True, -            'format': self.format, -            'merge_output_format': self.merge_format_output, -            'outtmpl': self.output_template -        } - -        with youtube_dl.YoutubeDL(ydl_opts) as ydl: -            info = ydl.extract_info(self.url, download=True) -            if info.get('_type', None) == 'playlist': -                for entry in info['entries']: -                    r = ydl.prepare_filename(entry) -                    self.paths.append(f'{os.path.splitext(r)[0]}.{entry["ext"]}') -            else: -                r = ydl.prepare_filename(info) -                self.paths.append(f'{os.path.splitext(r)[0]}.{info["ext"]}') - -            self.downloaded = True @@ -7,8 +7,8 @@ import praw  from nextcloud import NextCloud  from praw.models.util import stream_generator +from gallery_dl.job import DownloadJob -from importer.downloader import Downloader  from importer.uploader import upload_file, create_folders  levels = { @@ -55,6 +55,33 @@ parser.add_argument('-l', '--log-level',                      choices=levels.keys(),                      help=f'it will set log level.') + +def get_list_of_files(dirName): +    # create a list of file and sub directories +    # names in the given directory +    listOfFile = os.listdir(dirName) +    allFiles = list() +    # Iterate over all the entries +    for entry in listOfFile: +        # Create full path +        fullPath = os.path.join(dirName, entry) +        # If entry is a directory then get the list of files in this directory +        if os.path.isdir(fullPath): +            allFiles = allFiles + get_list_of_files(fullPath) +        else: +            allFiles.append(fullPath) + +    return allFiles + + +def download(url) -> [str]: +    d = DownloadJob(url=url) +    d.run() +    basedir = d.pathfmt.basedirectory +    files = get_list_of_files(basedir) +    return files + +  if __name__ == "__main__":      args = parser.parse_args() @@ -81,15 +108,12 @@ if __name__ == "__main__":          try:              url = post.url              create_folders(f"{args.nextcloud_path}/{post.subreddit}/", nxc) -            d = Downloader(url=url, reddit=reddit) -            with d.provider() as provider: -                provider.download() -                logging.info(f"{post.id} from {post.subreddit} downloaded") -                for path in provider.paths: -                    if "-mobile" in path:  # Remove mobile version -                        continue -                    upload_file(path, f"{args.nextcloud_path}/{post.subreddit}/{path}", nxc) -                    logging.info(f"{path} uploaded") +            logging.info(f"{post.id} from {post.subreddit} downloaded") +            for path in download(url): +                filename = os.path.basename(path) +                upload_file(path, f"{args.nextcloud_path}/{post.subreddit}/{filename}", nxc) +                os.unlink(path) +                logging.info(f"{path} uploaded")          except Exception as e:              logging.error(e) diff --git a/requirements.txt b/requirements.txt index 896d94d..a80d705 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ requests==2.26.0  praw==7.3.0  jsonpickle==2.0.0  nextcloud-api-wrapper==0.2.1.5 +gallery-dl==1.18.2
\ No newline at end of file diff --git a/test/test_providers.py b/test/test_providers.py deleted file mode 100644 index 32d81bd..0000000 --- a/test/test_providers.py +++ /dev/null @@ -1,61 +0,0 @@ -import os - -import praw -import pytest - -import importer.providers as providers - -reddit_env = pytest.mark.skipif( -    os.environ.get('CLIENT_ID', '') == '' or -    os.environ.get('CLIENT_SECRET', '') == '' or -    os.environ.get('USERNAME', '') == '' or -    os.environ.get('PASSWORD', '') == '' -    , reason="Require reddit env variables to be set." -) - - -@pytest.fixture -def mock_ydl_download(mocker): -    # this function is responsible for downloading the file -    return mocker.patch('importer.providers.youtube_dl_base.youtube_dl.YoutubeDL.process_info') - - -@pytest.mark.parametrize("provider", -                         [ -                             providers.IReddit, -                             providers.Imgur, -                             providers.RawImageProviderBase, -                             providers.RedGifs, -                             providers.Youtube, -                             providers.YoutubeDlProviderBase -                         ]) -def test_provider(provider, mock_ydl_download): -    for test in provider._TEST: -        with provider(url=test['url']) as p: -            p.download() -            assert p.downloaded -            assert p.paths == test['paths'] - - -@reddit_env -@pytest.mark.parametrize("provider", -                         [ -                             providers.GReddit -                         ]) -def test_provider_with_reddit(provider, mock_ydl_download): -    username = os.environ.get('USERNAME', '') -    password = os.environ.get('PASSWORD', '') -    client_id = os.environ.get('CLIENT_ID', '') -    client_secret = os.environ.get('CLIENT_SECRET', '') - -    reddit = praw.Reddit(client_id=client_id, -                         client_secret=client_secret, -                         password=password, -                         user_agent="reddit-nextcloud-importer", -                         username=username) - -    for test in provider._TEST: -        with provider(url=test['url'], reddit=reddit) as p: -            p.download() -            assert p.downloaded -            assert p.paths == test['paths'] | 
