diff options
author | gabrielgio <gabriel.giovanini@pm.me> | 2021-07-14 21:23:44 +0200 |
---|---|---|
committer | gabrielgio <gabriel.giovanini@pm.me> | 2021-07-14 21:23:44 +0200 |
commit | 4d43e402b2b4e27bbbbfe557216d95963a27af72 (patch) | |
tree | 46f3013be10a1062280d64052bcb654d67084f91 /src | |
parent | 6887fcc0e42b1a64ed80f8565fe3099aaa818930 (diff) | |
download | reddit-nextcloud-importer-4d43e402b2b4e27bbbbfe557216d95963a27af72.tar.gz reddit-nextcloud-importer-4d43e402b2b4e27bbbbfe557216d95963a27af72.tar.bz2 reddit-nextcloud-importer-4d43e402b2b4e27bbbbfe557216d95963a27af72.zip |
feat: Add nextcloud path param
Add a new param to input the nextcloud root path.
Diffstat (limited to 'src')
-rw-r--r-- | src/downloader.py | 157 | ||||
-rw-r--r-- | src/main.py | 70 | ||||
-rw-r--r-- | src/uploader.py | 19 |
3 files changed, 246 insertions, 0 deletions
diff --git a/src/downloader.py b/src/downloader.py new file mode 100644 index 0000000..c67834b --- /dev/null +++ b/src/downloader.py @@ -0,0 +1,157 @@ +import os +import re +import shutil +from enum import Enum +from urllib.parse import urlparse + +import youtube_dl +import requests + +from praw import Reddit + + +class SourceType(Enum): + VREDDIT = 1 + IREDDIT = 2 + YOUTUBE = 4 + REDGIFS = 5 + IMAGURJPG = 6 + GFYCAT = 7 + GREDDIT = 8 + UNKNOWN = 1000 + + +OUTTMPL = 'source_%(id)s.%(ext)s' + + +class Downloader: + reddit: Reddit + username: str + downloaded: bool + post_id: str + source_type: SourceType + paths: list[str] + + def __init__(self, url: str, reddit: Reddit): + self.reddit = reddit + self.downloaded = False + self.url = url + self.source_type = self._get_source_type(url) + self.paths = [] + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.delete() + + def download(self): + try: + if self.source_type == SourceType.VREDDIT: + self._download_vreddit() + elif self.source_type == SourceType.REDGIFS: + self._download_redgifs() + elif self.source_type == SourceType.GFYCAT: + self._download_gifycat() + elif self.source_type == SourceType.YOUTUBE: + self._download_youtube() + elif self.source_type in (SourceType.IMAGURJPG, SourceType.IREDDIT): + self._download_raw_file() + elif self.source_type == SourceType.GREDDIT: + self._download_gallery_reddit() + except Exception as e: + self.downloaded = False + + def delete(self): + if self.paths: + for path in self.paths: + if os.path.exists(path): + os.unlink(path) + + def _download_youtube_dls(self, ydl_opts): + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(self.url, download=True) + if info.get('_type', None) == 'playlist': + for entry in info['entries']: + r = ydl.prepare_filename(entry) + self.paths.append(f'{os.path.splitext(r)[0]}.mp4') + else: + r = ydl.prepare_filename(info) + self.paths.append(f'{os.path.splitext(r)[0]}.mp4') + + self.downloaded = True + + def _download_redgifs(self): + ydl_opts = { + 'format': 'best', + 'merge_output_format': 'mp4', + 'outtmpl': OUTTMPL + } + self._download_youtube_dls(ydl_opts) + + def _download_gifycat(self): + ydl_opts = { + 'format': 'best', + 'merge_output_format': 'mp4', + 'outtmpl': OUTTMPL + } + self._download_youtube_dls(ydl_opts) + + def _download_vreddit(self): + ydl_opts = { + 'format': 'bestvideo+bestaudio/bestvideo', + 'merge_output_format': 'mp4', + 'outtmpl': OUTTMPL + } + self._download_youtube_dls(ydl_opts) + + def _download_youtube(self): + ydl_opts = { + 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio', + 'merge_output_format': 'mp4', + 'outtmpl': OUTTMPL + } + self._download_youtube_dls(ydl_opts) + + def _download_raw_file(self): + a = urlparse(self.url) + path = f'source_{os.path.basename(a.path)}' + + r = requests.get(self.url, stream=True) + if r.status_code == 200: + self.downloaded = True + with open(path, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + self.paths.append(path) + else: + self.downloaded = False + + def _download_gallery_reddit(self): + url = self.url + submission = self.reddit.submission(url=self.url) + for key in submission.media_metadata: + value = submission.media_metadata[key] + self.url = value['s']['u'] + self._download_raw_file() + + self.url = url + + @staticmethod + def _get_source_type(url): + if re.match("^.*v\\.redd\\.it.*$", url): + return SourceType.VREDDIT + if re.match("^.*i\\.redd\\.it.*\\.(jpg|jpeg)$", url): + return SourceType.IREDDIT + if re.match("^.*\\.youtube\\.com.*$", url): + return SourceType.YOUTUBE + if re.match("^.*redgifs\\.com.*$", url): + return SourceType.REDGIFS + if re.match("^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$", url): + return SourceType.IMAGURJPG + if re.match("^.*gfycat.com.*$", url): + return SourceType.GFYCAT + if re.match("^.*www.reddit.com/gallery.*$", url): + return SourceType.GREDDIT + + return SourceType.UNKNOWN diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..266bb53 --- /dev/null +++ b/src/main.py @@ -0,0 +1,70 @@ +import argparse +import os + +import praw +from time import sleep + +from nextcloud import NextCloud +from praw.models.util import stream_generator + +from downloader import Downloader +from uploader import create_folders, upload_file + +parser = argparse.ArgumentParser(description="Monitor saved") +parser.add_argument('-c', '--client-id', + help="Reddit client id", + default=os.environ.get('CLIENT_ID', '')) +parser.add_argument('-s', '--client-secret', + help="Reddit client secret", + default=os.environ.get('CLIENT_SECRET', '')) +parser.add_argument('-u', '--reddit-username', + help="Reddit username", + default=os.environ.get('REDDIT_USERNAME', '')) +parser.add_argument('-p', '--reddit-password', + help="Reddit user password", + default=os.environ.get('REDDIT_PASSWORD', '')) +parser.add_argument('-P', '--nextcloud-password', + help="Nextcloud Password", + default=os.environ.get('NEXTCLOUD_PASSWORD', '')) +parser.add_argument('-U', '--nextcloud-username', + help="Nextcloud Username", + default=os.environ.get('NEXTCLOUD_USERNAME', '')) +parser.add_argument('-o', '--nextcloud-host', + help="Nextcloud Host", + default=os.environ.get('NEXTCLOUD_HOST', 'localhost')) +parser.add_argument('-d', '--nextcloud-path', + help="Nextcloud root folder", + default=os.environ.get('NEXTCLOUD_PATH', 'im')) + +if __name__ == "__main__": + args = parser.parse_args() + reddit = praw.Reddit(client_id=args.client_id, + client_secret=args.client_secret, + password=args.reddit_password, + user_agent="hcrawler", + username=args.reddit_username) + + nxc = NextCloud( + args.nextcloud_host, + user=args.nextcloud_username, + password=args.nextcloud_password, + session_kwargs={'verify': False} + ) + + redditor = reddit.redditor(args.reddit_username) + + + def upload(post): + url = post.url + create_folders(f"{args.nextcloud_path}/{post.subreddit}/", nxc) + with Downloader(url=url, reddit=reddit) as d: + d.download() + for path in d.paths: + if "-mobile" in path: # Remove mobile version + continue + upload_file(path, f"im/{post.subreddit}/{path}", nxc) + + + generator = stream_generator(redditor.saved, attribute_name="name") + for post in generator: + upload(post) diff --git a/src/uploader.py b/src/uploader.py new file mode 100644 index 0000000..7c7641a --- /dev/null +++ b/src/uploader.py @@ -0,0 +1,19 @@ +from functools import reduce + +from nextcloud import NextCloud + + +def _create_folder(folder: str, nxc: NextCloud) -> str: + nxc.create_folder(folder, True) + return folder + + +def create_folders(path: str, nxc: NextCloud): + # remove first "/" if there is one. + path = path if path[0] != '/' else path[1:] + folders = path.split("/") + reduce(lambda x, y: _create_folder(f"{x}/{y}", nxc), folders, "") + + +def upload_file(local_filename: str, remote_filename: str, nxc: NextCloud): + nxc.upload_file(local_filename, remote_filename) |