aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgabrielgio <gabriel.giovanini@pm.me>2021-07-27 23:15:53 +0200
committergabrielgio <gabriel.giovanini@pm.me>2021-07-27 23:28:51 +0200
commitb8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda (patch)
tree6652fd8b3e89b652791167e73b293b57d0b6496b
parent3d54b3d91d0c175feae82c413fd0139545d46e2a (diff)
downloadreddit-nextcloud-importer-b8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda.tar.gz
reddit-nextcloud-importer-b8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda.tar.bz2
reddit-nextcloud-importer-b8d69f9bf5a03fd6d8b6a477f3b7ca8f10c27bda.zip
feat: Move to gallery-dl
As it turns out there is already a project that does what I want but better.
-rw-r--r--.gitlab-ci.yml2
-rw-r--r--importer/__init__.py0
-rw-r--r--importer/downloader.py19
-rw-r--r--importer/providers/__init__.py10
-rw-r--r--importer/providers/g_reddit.py26
-rw-r--r--importer/providers/gfycat.py9
-rw-r--r--importer/providers/i_reddit.py15
-rw-r--r--importer/providers/imgur.py9
-rw-r--r--importer/providers/providerbase.py30
-rw-r--r--importer/providers/raw_image_base.py27
-rw-r--r--importer/providers/redgifs.py9
-rw-r--r--importer/providers/v_reddit.py10
-rw-r--r--importer/providers/youtube.py10
-rw-r--r--importer/providers/youtube_dl_base.py37
-rw-r--r--main.py44
-rw-r--r--requirements.txt1
-rw-r--r--test/test_providers.py61
17 files changed, 36 insertions, 283 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4291a2e..5e43e8d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -38,7 +38,7 @@ test:
stage: test
script:
- pip install -r dev-requirements.txt
- - pytest -n $(nproc) --junitxml=report.xml --cov=importer
+ - pytest --junitxml=report.xml --cov=importer
artifacts:
reports:
junit: report.xml
diff --git a/importer/__init__.py b/importer/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/importer/__init__.py
+++ /dev/null
diff --git a/importer/downloader.py b/importer/downloader.py
deleted file mode 100644
index 290c9e2..0000000
--- a/importer/downloader.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import re
-
-from praw import Reddit
-
-from importer.providers import GReddit, Gfycat, IReddit, Imgur, RedGifs, VReddit, YoutubeDlProviderBase, \
- RawImageProviderBase, Youtube
-
-
-class Downloader:
- providers = [GReddit, Gfycat, IReddit, Imgur, RedGifs, VReddit, Youtube, RawImageProviderBase,
- YoutubeDlProviderBase]
-
- def __init__(self, url: str, reddit: Reddit):
- self.Provider = next(filter(lambda x: re.match(x.regex, url), self.providers))
- self.url = url
- self.reddit = reddit
-
- def provider(self):
- return self.Provider(url=self.url, reddit=self.reddit)
diff --git a/importer/providers/__init__.py b/importer/providers/__init__.py
deleted file mode 100644
index 62c2d85..0000000
--- a/importer/providers/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .g_reddit import GReddit
-from .providerbase import ProviderBase
-from .gfycat import Gfycat
-from .i_reddit import IReddit
-from .imgur import Imgur
-from .raw_image_base import RawImageProviderBase
-from .redgifs import RedGifs
-from .v_reddit import VReddit
-from .youtube import Youtube
-from .youtube_dl_base import YoutubeDlProviderBase
diff --git a/importer/providers/g_reddit.py b/importer/providers/g_reddit.py
deleted file mode 100644
index e0341e0..0000000
--- a/importer/providers/g_reddit.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from praw import Reddit
-
-from importer.providers.raw_image_base import RawImageProviderBase
-
-
-class GReddit(RawImageProviderBase):
- regex = "^.*www.reddit.com/gallery.*$"
- _TEST = [{
- "url": "https://www.reddit.com/gallery/mik7c9",
- "paths": ['source_hlokpsyhgrq61.jpg', 'source_n31c2y7igrq61.jpg', 'source_7eg0o76igrq61.jpg',
- 'source_whl12jbigrq61.jpg', 'source_4uok762igrq61.jpg', 'source_t3pgm64igrq61.jpg',
- 'source_ymc4hv9igrq61.jpg']
- }]
-
- def __init__(self, url: str, reddit: Reddit):
- super(GReddit, self).__init__(url)
- self.reddit = reddit
-
- def download(self):
- submission = self.reddit.submission(url=self.url)
- for key in submission.media_metadata:
- value = submission.media_metadata[key]
- url = value['s']['u']
- path = self._download_raw_file(url)
- self.paths.append(path)
- self.downloaded = True
diff --git a/importer/providers/gfycat.py b/importer/providers/gfycat.py
deleted file mode 100644
index 70d9c05..0000000
--- a/importer/providers/gfycat.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from importer.providers.youtube_dl_base import YoutubeDlProviderBase
-
-
-class Gfycat(YoutubeDlProviderBase):
- regex = "^.*gfycat.com.*$"
- _TEST = [{
- "url": "https://gfycat.com/presentdangerousdromedary",
- "paths": "source_presentdangerousdromedary.mp4"
- }]
diff --git a/importer/providers/i_reddit.py b/importer/providers/i_reddit.py
deleted file mode 100644
index 13fc70e..0000000
--- a/importer/providers/i_reddit.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from importer.providers.raw_image_base import RawImageProviderBase
-
-
-class IReddit(RawImageProviderBase):
- regex = "^.*i\\.redd\\.it.*\\.(jpg|jpeg|gif)$"
- _TEST = [
- {
- "url": "https://i.redd.it/pjj1ll1b2rr41.jpg",
- "paths": ["source_pjj1ll1b2rr41.jpg"]
- },
- {
- "url": "https://i.redd.it/55vpi6ol5jc71.gif",
- "paths": ['source_55vpi6ol5jc71.gif']
- }
- ]
diff --git a/importer/providers/imgur.py b/importer/providers/imgur.py
deleted file mode 100644
index dd8fb6e..0000000
--- a/importer/providers/imgur.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from importer.providers.raw_image_base import RawImageProviderBase
-
-
-class Imgur(RawImageProviderBase):
- regex = "^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$"
- _TEST = [{
- "url": "https://i.imgur.com/fXLMjfp.jpg",
- "paths": ["source_fXLMjfp.jpg"],
- }]
diff --git a/importer/providers/providerbase.py b/importer/providers/providerbase.py
deleted file mode 100644
index 51b525e..0000000
--- a/importer/providers/providerbase.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import os
-from typing import List
-
-
-class ProviderBase:
- paths: List[str]
- downloaded: bool
- regex: str
-
- _TEST = [{
- "url": "https://i.imgur.com/fXLMjfp.jpg",
- "paths": ["source_fXLMjfp.jpg"],
- }]
-
- def __init__(self, url: str, **kwargs):
- self.url = url
- self.paths = []
- self.downloaded = False
-
- def download(self):
- pass
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- if self.paths:
- for path in self.paths:
- if os.path.exists(path):
- os.unlink(path)
diff --git a/importer/providers/raw_image_base.py b/importer/providers/raw_image_base.py
deleted file mode 100644
index 89ecfca..0000000
--- a/importer/providers/raw_image_base.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-import shutil
-from urllib.parse import urlparse
-
-import requests
-
-from importer.providers.providerbase import ProviderBase
-
-
-class RawImageProviderBase(ProviderBase):
- regex = "^.*.(jpg|jpeg|mp4|gif)$"
-
- def download(self):
- path = self._download_raw_file(self.url)
- self.paths.append(path)
- self.downloaded = True
-
- @staticmethod
- def _download_raw_file(url: str) -> str:
- a = urlparse(url)
- path = f'source_{os.path.basename(a.path)}'
- r = requests.get(url, stream=True)
- if r.status_code == 200:
- with open(path, 'wb') as f:
- r.raw.decode_content = True
- shutil.copyfileobj(r.raw, f)
- return path
diff --git a/importer/providers/redgifs.py b/importer/providers/redgifs.py
deleted file mode 100644
index e15468f..0000000
--- a/importer/providers/redgifs.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from importer.providers.youtube_dl_base import YoutubeDlProviderBase
-
-
-class RedGifs(YoutubeDlProviderBase):
- regex = "^.*redgifs\\.com.*$"
- _TEST = [{
- "url": "https://redgifs.com/watch/ripesnivelingfiddlercrab",
- "paths": ["source_RipeSnivelingFiddlercrab.mp4", 'source_RipeSnivelingFiddlercrab-mobile.mp4']
- }]
diff --git a/importer/providers/v_reddit.py b/importer/providers/v_reddit.py
deleted file mode 100644
index 2917fee..0000000
--- a/importer/providers/v_reddit.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from importer.providers.youtube_dl_base import YoutubeDlProviderBase
-
-
-class VReddit(YoutubeDlProviderBase):
- regex = "^.*v\\.redd\\.it.*$"
- format = 'bestvideo+bestaudio/bestvideo'
- _TEST = [{
- "url": "https://v.redd.it/42j6r7i8z7151",
- "paths": ["source_42j6r7i8z7151.mp4"]
- }]
diff --git a/importer/providers/youtube.py b/importer/providers/youtube.py
deleted file mode 100644
index d880aa0..0000000
--- a/importer/providers/youtube.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from importer.providers.youtube_dl_base import YoutubeDlProviderBase
-
-
-class Youtube(YoutubeDlProviderBase):
- regex = "^.*\\.youtube\\.com.*$"
- format = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio"
- _TEST = [{
- "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew",
- "paths": ["source_Wjrrgrvq1ew.mp4"]
- }]
diff --git a/importer/providers/youtube_dl_base.py b/importer/providers/youtube_dl_base.py
deleted file mode 100644
index 3d67738..0000000
--- a/importer/providers/youtube_dl_base.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import os
-
-import youtube_dl
-
-from importer.providers.providerbase import ProviderBase
-
-
-class YoutubeDlProviderBase(ProviderBase):
- regex = ".*"
- output_template: str = 'source_%(id)s.%(ext)s'
- format: str = "best"
- merge_format_output: str = "mp4"
-
- _TEST = [{
- "url": "https://www.youtube.com/watch?v=Wjrrgrvq1ew",
- "paths": ["source_Wjrrgrvq1ew.mp4"]
- }]
-
- def download(self):
- ydl_opts = {
- 'quiet': True,
- 'format': self.format,
- 'merge_output_format': self.merge_format_output,
- 'outtmpl': self.output_template
- }
-
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- info = ydl.extract_info(self.url, download=True)
- if info.get('_type', None) == 'playlist':
- for entry in info['entries']:
- r = ydl.prepare_filename(entry)
- self.paths.append(f'{os.path.splitext(r)[0]}.{entry["ext"]}')
- else:
- r = ydl.prepare_filename(info)
- self.paths.append(f'{os.path.splitext(r)[0]}.{info["ext"]}')
-
- self.downloaded = True
diff --git a/main.py b/main.py
index 036aab3..2f34b3a 100644
--- a/main.py
+++ b/main.py
@@ -7,8 +7,8 @@ import praw
from nextcloud import NextCloud
from praw.models.util import stream_generator
+from gallery_dl.job import DownloadJob
-from importer.downloader import Downloader
from importer.uploader import upload_file, create_folders
levels = {
@@ -55,6 +55,33 @@ parser.add_argument('-l', '--log-level',
choices=levels.keys(),
help=f'it will set log level.')
+
+def get_list_of_files(dirName):
+ # create a list of file and sub directories
+ # names in the given directory
+ listOfFile = os.listdir(dirName)
+ allFiles = list()
+ # Iterate over all the entries
+ for entry in listOfFile:
+ # Create full path
+ fullPath = os.path.join(dirName, entry)
+ # If entry is a directory then get the list of files in this directory
+ if os.path.isdir(fullPath):
+ allFiles = allFiles + get_list_of_files(fullPath)
+ else:
+ allFiles.append(fullPath)
+
+ return allFiles
+
+
+def download(url) -> [str]:
+ d = DownloadJob(url=url)
+ d.run()
+ basedir = d.pathfmt.basedirectory
+ files = get_list_of_files(basedir)
+ return files
+
+
if __name__ == "__main__":
args = parser.parse_args()
@@ -81,15 +108,12 @@ if __name__ == "__main__":
try:
url = post.url
create_folders(f"{args.nextcloud_path}/{post.subreddit}/", nxc)
- d = Downloader(url=url, reddit=reddit)
- with d.provider() as provider:
- provider.download()
- logging.info(f"{post.id} from {post.subreddit} downloaded")
- for path in provider.paths:
- if "-mobile" in path: # Remove mobile version
- continue
- upload_file(path, f"{args.nextcloud_path}/{post.subreddit}/{path}", nxc)
- logging.info(f"{path} uploaded")
+ logging.info(f"{post.id} from {post.subreddit} downloaded")
+ for path in download(url):
+ filename = os.path.basename(path)
+ upload_file(path, f"{args.nextcloud_path}/{post.subreddit}/{filename}", nxc)
+ os.unlink(path)
+ logging.info(f"{path} uploaded")
except Exception as e:
logging.error(e)
diff --git a/requirements.txt b/requirements.txt
index 896d94d..a80d705 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ requests==2.26.0
praw==7.3.0
jsonpickle==2.0.0
nextcloud-api-wrapper==0.2.1.5
+gallery-dl==1.18.2 \ No newline at end of file
diff --git a/test/test_providers.py b/test/test_providers.py
deleted file mode 100644
index 32d81bd..0000000
--- a/test/test_providers.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import os
-
-import praw
-import pytest
-
-import importer.providers as providers
-
-reddit_env = pytest.mark.skipif(
- os.environ.get('CLIENT_ID', '') == '' or
- os.environ.get('CLIENT_SECRET', '') == '' or
- os.environ.get('USERNAME', '') == '' or
- os.environ.get('PASSWORD', '') == ''
- , reason="Require reddit env variables to be set."
-)
-
-
-@pytest.fixture
-def mock_ydl_download(mocker):
- # this function is responsible for downloading the file
- return mocker.patch('importer.providers.youtube_dl_base.youtube_dl.YoutubeDL.process_info')
-
-
-@pytest.mark.parametrize("provider",
- [
- providers.IReddit,
- providers.Imgur,
- providers.RawImageProviderBase,
- providers.RedGifs,
- providers.Youtube,
- providers.YoutubeDlProviderBase
- ])
-def test_provider(provider, mock_ydl_download):
- for test in provider._TEST:
- with provider(url=test['url']) as p:
- p.download()
- assert p.downloaded
- assert p.paths == test['paths']
-
-
-@reddit_env
-@pytest.mark.parametrize("provider",
- [
- providers.GReddit
- ])
-def test_provider_with_reddit(provider, mock_ydl_download):
- username = os.environ.get('USERNAME', '')
- password = os.environ.get('PASSWORD', '')
- client_id = os.environ.get('CLIENT_ID', '')
- client_secret = os.environ.get('CLIENT_SECRET', '')
-
- reddit = praw.Reddit(client_id=client_id,
- client_secret=client_secret,
- password=password,
- user_agent="reddit-nextcloud-importer",
- username=username)
-
- for test in provider._TEST:
- with provider(url=test['url'], reddit=reddit) as p:
- p.download()
- assert p.downloaded
- assert p.paths == test['paths']