From 417fb93a9368057e85e4c8bbaabc2ae5aca123d3 Mon Sep 17 00:00:00 2001 From: gabrielgio Date: Wed, 14 Jul 2021 21:47:48 +0200 Subject: fix: Fix pathing The module imports aren't working. --- Dockerfile | 6 +- importer/__init__.py | 0 importer/downloader.py | 157 +++++++++++++++++++++++++++++++++++++++++++++++++ importer/uploader.py | 19 ++++++ main.py | 70 ++++++++++++++++++++++ src/downloader.py | 157 ------------------------------------------------- src/main.py | 70 ---------------------- src/uploader.py | 19 ------ test/test_download.py | 4 +- test/test_uploader.py | 2 +- 10 files changed, 252 insertions(+), 252 deletions(-) create mode 100644 importer/__init__.py create mode 100644 importer/downloader.py create mode 100644 importer/uploader.py create mode 100644 main.py delete mode 100644 src/downloader.py delete mode 100644 src/main.py delete mode 100644 src/uploader.py diff --git a/Dockerfile b/Dockerfile index 307aede..a3b227c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,9 +7,9 @@ WORKDIR /opt COPY requirements.txt . RUN pip install -r requirements.txt -COPY src/downloader.py . -COPY src/main.py . -COPY src/uploader.py . +COPY importer/downloader.py . +COPY importer/uploader.py . +COPY main.py . ENTRYPOINT python main.py \ No newline at end of file diff --git a/importer/__init__.py b/importer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/importer/downloader.py b/importer/downloader.py new file mode 100644 index 0000000..c67834b --- /dev/null +++ b/importer/downloader.py @@ -0,0 +1,157 @@ +import os +import re +import shutil +from enum import Enum +from urllib.parse import urlparse + +import youtube_dl +import requests + +from praw import Reddit + + +class SourceType(Enum): + VREDDIT = 1 + IREDDIT = 2 + YOUTUBE = 4 + REDGIFS = 5 + IMAGURJPG = 6 + GFYCAT = 7 + GREDDIT = 8 + UNKNOWN = 1000 + + +OUTTMPL = 'source_%(id)s.%(ext)s' + + +class Downloader: + reddit: Reddit + username: str + downloaded: bool + post_id: str + source_type: SourceType + paths: list[str] + + def __init__(self, url: str, reddit: Reddit): + self.reddit = reddit + self.downloaded = False + self.url = url + self.source_type = self._get_source_type(url) + self.paths = [] + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.delete() + + def download(self): + try: + if self.source_type == SourceType.VREDDIT: + self._download_vreddit() + elif self.source_type == SourceType.REDGIFS: + self._download_redgifs() + elif self.source_type == SourceType.GFYCAT: + self._download_gifycat() + elif self.source_type == SourceType.YOUTUBE: + self._download_youtube() + elif self.source_type in (SourceType.IMAGURJPG, SourceType.IREDDIT): + self._download_raw_file() + elif self.source_type == SourceType.GREDDIT: + self._download_gallery_reddit() + except Exception as e: + self.downloaded = False + + def delete(self): + if self.paths: + for path in self.paths: + if os.path.exists(path): + os.unlink(path) + + def _download_youtube_dls(self, ydl_opts): + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(self.url, download=True) + if info.get('_type', None) == 'playlist': + for entry in info['entries']: + r = ydl.prepare_filename(entry) + self.paths.append(f'{os.path.splitext(r)[0]}.mp4') + else: + r = ydl.prepare_filename(info) + self.paths.append(f'{os.path.splitext(r)[0]}.mp4') + + self.downloaded = True + + def _download_redgifs(self): + ydl_opts = { + 'format': 'best', + 'merge_output_format': 'mp4', + 'outtmpl': OUTTMPL + } + self._download_youtube_dls(ydl_opts) + + def _download_gifycat(self): + ydl_opts = { + 'format': 'best', + 'merge_output_format': 'mp4', + 'outtmpl': OUTTMPL + } + self._download_youtube_dls(ydl_opts) + + def _download_vreddit(self): + ydl_opts = { + 'format': 'bestvideo+bestaudio/bestvideo', + 'merge_output_format': 'mp4', + 'outtmpl': OUTTMPL + } + self._download_youtube_dls(ydl_opts) + + def _download_youtube(self): + ydl_opts = { + 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio', + 'merge_output_format': 'mp4', + 'outtmpl': OUTTMPL + } + self._download_youtube_dls(ydl_opts) + + def _download_raw_file(self): + a = urlparse(self.url) + path = f'source_{os.path.basename(a.path)}' + + r = requests.get(self.url, stream=True) + if r.status_code == 200: + self.downloaded = True + with open(path, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + self.paths.append(path) + else: + self.downloaded = False + + def _download_gallery_reddit(self): + url = self.url + submission = self.reddit.submission(url=self.url) + for key in submission.media_metadata: + value = submission.media_metadata[key] + self.url = value['s']['u'] + self._download_raw_file() + + self.url = url + + @staticmethod + def _get_source_type(url): + if re.match("^.*v\\.redd\\.it.*$", url): + return SourceType.VREDDIT + if re.match("^.*i\\.redd\\.it.*\\.(jpg|jpeg)$", url): + return SourceType.IREDDIT + if re.match("^.*\\.youtube\\.com.*$", url): + return SourceType.YOUTUBE + if re.match("^.*redgifs\\.com.*$", url): + return SourceType.REDGIFS + if re.match("^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$", url): + return SourceType.IMAGURJPG + if re.match("^.*gfycat.com.*$", url): + return SourceType.GFYCAT + if re.match("^.*www.reddit.com/gallery.*$", url): + return SourceType.GREDDIT + + return SourceType.UNKNOWN diff --git a/importer/uploader.py b/importer/uploader.py new file mode 100644 index 0000000..7c7641a --- /dev/null +++ b/importer/uploader.py @@ -0,0 +1,19 @@ +from functools import reduce + +from nextcloud import NextCloud + + +def _create_folder(folder: str, nxc: NextCloud) -> str: + nxc.create_folder(folder, True) + return folder + + +def create_folders(path: str, nxc: NextCloud): + # remove first "/" if there is one. + path = path if path[0] != '/' else path[1:] + folders = path.split("/") + reduce(lambda x, y: _create_folder(f"{x}/{y}", nxc), folders, "") + + +def upload_file(local_filename: str, remote_filename: str, nxc: NextCloud): + nxc.upload_file(local_filename, remote_filename) diff --git a/main.py b/main.py new file mode 100644 index 0000000..266bb53 --- /dev/null +++ b/main.py @@ -0,0 +1,70 @@ +import argparse +import os + +import praw +from time import sleep + +from nextcloud import NextCloud +from praw.models.util import stream_generator + +from downloader import Downloader +from uploader import create_folders, upload_file + +parser = argparse.ArgumentParser(description="Monitor saved") +parser.add_argument('-c', '--client-id', + help="Reddit client id", + default=os.environ.get('CLIENT_ID', '')) +parser.add_argument('-s', '--client-secret', + help="Reddit client secret", + default=os.environ.get('CLIENT_SECRET', '')) +parser.add_argument('-u', '--reddit-username', + help="Reddit username", + default=os.environ.get('REDDIT_USERNAME', '')) +parser.add_argument('-p', '--reddit-password', + help="Reddit user password", + default=os.environ.get('REDDIT_PASSWORD', '')) +parser.add_argument('-P', '--nextcloud-password', + help="Nextcloud Password", + default=os.environ.get('NEXTCLOUD_PASSWORD', '')) +parser.add_argument('-U', '--nextcloud-username', + help="Nextcloud Username", + default=os.environ.get('NEXTCLOUD_USERNAME', '')) +parser.add_argument('-o', '--nextcloud-host', + help="Nextcloud Host", + default=os.environ.get('NEXTCLOUD_HOST', 'localhost')) +parser.add_argument('-d', '--nextcloud-path', + help="Nextcloud root folder", + default=os.environ.get('NEXTCLOUD_PATH', 'im')) + +if __name__ == "__main__": + args = parser.parse_args() + reddit = praw.Reddit(client_id=args.client_id, + client_secret=args.client_secret, + password=args.reddit_password, + user_agent="hcrawler", + username=args.reddit_username) + + nxc = NextCloud( + args.nextcloud_host, + user=args.nextcloud_username, + password=args.nextcloud_password, + session_kwargs={'verify': False} + ) + + redditor = reddit.redditor(args.reddit_username) + + + def upload(post): + url = post.url + create_folders(f"{args.nextcloud_path}/{post.subreddit}/", nxc) + with Downloader(url=url, reddit=reddit) as d: + d.download() + for path in d.paths: + if "-mobile" in path: # Remove mobile version + continue + upload_file(path, f"im/{post.subreddit}/{path}", nxc) + + + generator = stream_generator(redditor.saved, attribute_name="name") + for post in generator: + upload(post) diff --git a/src/downloader.py b/src/downloader.py deleted file mode 100644 index c67834b..0000000 --- a/src/downloader.py +++ /dev/null @@ -1,157 +0,0 @@ -import os -import re -import shutil -from enum import Enum -from urllib.parse import urlparse - -import youtube_dl -import requests - -from praw import Reddit - - -class SourceType(Enum): - VREDDIT = 1 - IREDDIT = 2 - YOUTUBE = 4 - REDGIFS = 5 - IMAGURJPG = 6 - GFYCAT = 7 - GREDDIT = 8 - UNKNOWN = 1000 - - -OUTTMPL = 'source_%(id)s.%(ext)s' - - -class Downloader: - reddit: Reddit - username: str - downloaded: bool - post_id: str - source_type: SourceType - paths: list[str] - - def __init__(self, url: str, reddit: Reddit): - self.reddit = reddit - self.downloaded = False - self.url = url - self.source_type = self._get_source_type(url) - self.paths = [] - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.delete() - - def download(self): - try: - if self.source_type == SourceType.VREDDIT: - self._download_vreddit() - elif self.source_type == SourceType.REDGIFS: - self._download_redgifs() - elif self.source_type == SourceType.GFYCAT: - self._download_gifycat() - elif self.source_type == SourceType.YOUTUBE: - self._download_youtube() - elif self.source_type in (SourceType.IMAGURJPG, SourceType.IREDDIT): - self._download_raw_file() - elif self.source_type == SourceType.GREDDIT: - self._download_gallery_reddit() - except Exception as e: - self.downloaded = False - - def delete(self): - if self.paths: - for path in self.paths: - if os.path.exists(path): - os.unlink(path) - - def _download_youtube_dls(self, ydl_opts): - with youtube_dl.YoutubeDL(ydl_opts) as ydl: - info = ydl.extract_info(self.url, download=True) - if info.get('_type', None) == 'playlist': - for entry in info['entries']: - r = ydl.prepare_filename(entry) - self.paths.append(f'{os.path.splitext(r)[0]}.mp4') - else: - r = ydl.prepare_filename(info) - self.paths.append(f'{os.path.splitext(r)[0]}.mp4') - - self.downloaded = True - - def _download_redgifs(self): - ydl_opts = { - 'format': 'best', - 'merge_output_format': 'mp4', - 'outtmpl': OUTTMPL - } - self._download_youtube_dls(ydl_opts) - - def _download_gifycat(self): - ydl_opts = { - 'format': 'best', - 'merge_output_format': 'mp4', - 'outtmpl': OUTTMPL - } - self._download_youtube_dls(ydl_opts) - - def _download_vreddit(self): - ydl_opts = { - 'format': 'bestvideo+bestaudio/bestvideo', - 'merge_output_format': 'mp4', - 'outtmpl': OUTTMPL - } - self._download_youtube_dls(ydl_opts) - - def _download_youtube(self): - ydl_opts = { - 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio', - 'merge_output_format': 'mp4', - 'outtmpl': OUTTMPL - } - self._download_youtube_dls(ydl_opts) - - def _download_raw_file(self): - a = urlparse(self.url) - path = f'source_{os.path.basename(a.path)}' - - r = requests.get(self.url, stream=True) - if r.status_code == 200: - self.downloaded = True - with open(path, 'wb') as f: - r.raw.decode_content = True - shutil.copyfileobj(r.raw, f) - self.paths.append(path) - else: - self.downloaded = False - - def _download_gallery_reddit(self): - url = self.url - submission = self.reddit.submission(url=self.url) - for key in submission.media_metadata: - value = submission.media_metadata[key] - self.url = value['s']['u'] - self._download_raw_file() - - self.url = url - - @staticmethod - def _get_source_type(url): - if re.match("^.*v\\.redd\\.it.*$", url): - return SourceType.VREDDIT - if re.match("^.*i\\.redd\\.it.*\\.(jpg|jpeg)$", url): - return SourceType.IREDDIT - if re.match("^.*\\.youtube\\.com.*$", url): - return SourceType.YOUTUBE - if re.match("^.*redgifs\\.com.*$", url): - return SourceType.REDGIFS - if re.match("^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$", url): - return SourceType.IMAGURJPG - if re.match("^.*gfycat.com.*$", url): - return SourceType.GFYCAT - if re.match("^.*www.reddit.com/gallery.*$", url): - return SourceType.GREDDIT - - return SourceType.UNKNOWN diff --git a/src/main.py b/src/main.py deleted file mode 100644 index 266bb53..0000000 --- a/src/main.py +++ /dev/null @@ -1,70 +0,0 @@ -import argparse -import os - -import praw -from time import sleep - -from nextcloud import NextCloud -from praw.models.util import stream_generator - -from downloader import Downloader -from uploader import create_folders, upload_file - -parser = argparse.ArgumentParser(description="Monitor saved") -parser.add_argument('-c', '--client-id', - help="Reddit client id", - default=os.environ.get('CLIENT_ID', '')) -parser.add_argument('-s', '--client-secret', - help="Reddit client secret", - default=os.environ.get('CLIENT_SECRET', '')) -parser.add_argument('-u', '--reddit-username', - help="Reddit username", - default=os.environ.get('REDDIT_USERNAME', '')) -parser.add_argument('-p', '--reddit-password', - help="Reddit user password", - default=os.environ.get('REDDIT_PASSWORD', '')) -parser.add_argument('-P', '--nextcloud-password', - help="Nextcloud Password", - default=os.environ.get('NEXTCLOUD_PASSWORD', '')) -parser.add_argument('-U', '--nextcloud-username', - help="Nextcloud Username", - default=os.environ.get('NEXTCLOUD_USERNAME', '')) -parser.add_argument('-o', '--nextcloud-host', - help="Nextcloud Host", - default=os.environ.get('NEXTCLOUD_HOST', 'localhost')) -parser.add_argument('-d', '--nextcloud-path', - help="Nextcloud root folder", - default=os.environ.get('NEXTCLOUD_PATH', 'im')) - -if __name__ == "__main__": - args = parser.parse_args() - reddit = praw.Reddit(client_id=args.client_id, - client_secret=args.client_secret, - password=args.reddit_password, - user_agent="hcrawler", - username=args.reddit_username) - - nxc = NextCloud( - args.nextcloud_host, - user=args.nextcloud_username, - password=args.nextcloud_password, - session_kwargs={'verify': False} - ) - - redditor = reddit.redditor(args.reddit_username) - - - def upload(post): - url = post.url - create_folders(f"{args.nextcloud_path}/{post.subreddit}/", nxc) - with Downloader(url=url, reddit=reddit) as d: - d.download() - for path in d.paths: - if "-mobile" in path: # Remove mobile version - continue - upload_file(path, f"im/{post.subreddit}/{path}", nxc) - - - generator = stream_generator(redditor.saved, attribute_name="name") - for post in generator: - upload(post) diff --git a/src/uploader.py b/src/uploader.py deleted file mode 100644 index 7c7641a..0000000 --- a/src/uploader.py +++ /dev/null @@ -1,19 +0,0 @@ -from functools import reduce - -from nextcloud import NextCloud - - -def _create_folder(folder: str, nxc: NextCloud) -> str: - nxc.create_folder(folder, True) - return folder - - -def create_folders(path: str, nxc: NextCloud): - # remove first "/" if there is one. - path = path if path[0] != '/' else path[1:] - folders = path.split("/") - reduce(lambda x, y: _create_folder(f"{x}/{y}", nxc), folders, "") - - -def upload_file(local_filename: str, remote_filename: str, nxc: NextCloud): - nxc.upload_file(local_filename, remote_filename) diff --git a/test/test_download.py b/test/test_download.py index 9bafc1c..a5c9386 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -2,7 +2,7 @@ import os import pytest -from src.downloader import SourceType, Downloader +from importer.downloader import SourceType, Downloader reddit_env = pytest.mark.skipif( os.environ.get('CLIENT_ID', '') == '' or @@ -16,7 +16,7 @@ reddit_env = pytest.mark.skipif( @pytest.fixture def mock_ydl_download(mocker): # this function is responsible for downloading the file - return mocker.patch('downloader.youtube_dl.YoutubeDL.process_info') + return mocker.patch('importer.downloader.youtube_dl.YoutubeDL.process_info') @pytest.mark.parametrize('url,source_type', [ diff --git a/test/test_uploader.py b/test/test_uploader.py index bcdf04b..7fe2f44 100644 --- a/test/test_uploader.py +++ b/test/test_uploader.py @@ -1,6 +1,6 @@ import pytest -from uploader import create_folders +from importer.uploader import create_folders @pytest.fixture -- cgit v1.2.3