aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorgabrielgio <gabriel.giovanini@pm.me>2021-07-14 21:23:44 +0200
committergabrielgio <gabriel.giovanini@pm.me>2021-07-14 21:23:44 +0200
commit4d43e402b2b4e27bbbbfe557216d95963a27af72 (patch)
tree46f3013be10a1062280d64052bcb654d67084f91 /src
parent6887fcc0e42b1a64ed80f8565fe3099aaa818930 (diff)
downloadreddit-nextcloud-importer-4d43e402b2b4e27bbbbfe557216d95963a27af72.tar.gz
reddit-nextcloud-importer-4d43e402b2b4e27bbbbfe557216d95963a27af72.tar.bz2
reddit-nextcloud-importer-4d43e402b2b4e27bbbbfe557216d95963a27af72.zip
feat: Add nextcloud path param
Add a new param to input the nextcloud root path.
Diffstat (limited to 'src')
-rw-r--r--src/downloader.py157
-rw-r--r--src/main.py70
-rw-r--r--src/uploader.py19
3 files changed, 246 insertions, 0 deletions
diff --git a/src/downloader.py b/src/downloader.py
new file mode 100644
index 0000000..c67834b
--- /dev/null
+++ b/src/downloader.py
@@ -0,0 +1,157 @@
+import os
+import re
+import shutil
+from enum import Enum
+from urllib.parse import urlparse
+
+import youtube_dl
+import requests
+
+from praw import Reddit
+
+
+class SourceType(Enum):
+ VREDDIT = 1
+ IREDDIT = 2
+ YOUTUBE = 4
+ REDGIFS = 5
+ IMAGURJPG = 6
+ GFYCAT = 7
+ GREDDIT = 8
+ UNKNOWN = 1000
+
+
+OUTTMPL = 'source_%(id)s.%(ext)s'
+
+
+class Downloader:
+ reddit: Reddit
+ username: str
+ downloaded: bool
+ post_id: str
+ source_type: SourceType
+ paths: list[str]
+
+ def __init__(self, url: str, reddit: Reddit):
+ self.reddit = reddit
+ self.downloaded = False
+ self.url = url
+ self.source_type = self._get_source_type(url)
+ self.paths = []
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.delete()
+
+ def download(self):
+ try:
+ if self.source_type == SourceType.VREDDIT:
+ self._download_vreddit()
+ elif self.source_type == SourceType.REDGIFS:
+ self._download_redgifs()
+ elif self.source_type == SourceType.GFYCAT:
+ self._download_gifycat()
+ elif self.source_type == SourceType.YOUTUBE:
+ self._download_youtube()
+ elif self.source_type in (SourceType.IMAGURJPG, SourceType.IREDDIT):
+ self._download_raw_file()
+ elif self.source_type == SourceType.GREDDIT:
+ self._download_gallery_reddit()
+ except Exception as e:
+ self.downloaded = False
+
+ def delete(self):
+ if self.paths:
+ for path in self.paths:
+ if os.path.exists(path):
+ os.unlink(path)
+
+ def _download_youtube_dls(self, ydl_opts):
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(self.url, download=True)
+ if info.get('_type', None) == 'playlist':
+ for entry in info['entries']:
+ r = ydl.prepare_filename(entry)
+ self.paths.append(f'{os.path.splitext(r)[0]}.mp4')
+ else:
+ r = ydl.prepare_filename(info)
+ self.paths.append(f'{os.path.splitext(r)[0]}.mp4')
+
+ self.downloaded = True
+
+ def _download_redgifs(self):
+ ydl_opts = {
+ 'format': 'best',
+ 'merge_output_format': 'mp4',
+ 'outtmpl': OUTTMPL
+ }
+ self._download_youtube_dls(ydl_opts)
+
+ def _download_gifycat(self):
+ ydl_opts = {
+ 'format': 'best',
+ 'merge_output_format': 'mp4',
+ 'outtmpl': OUTTMPL
+ }
+ self._download_youtube_dls(ydl_opts)
+
+ def _download_vreddit(self):
+ ydl_opts = {
+ 'format': 'bestvideo+bestaudio/bestvideo',
+ 'merge_output_format': 'mp4',
+ 'outtmpl': OUTTMPL
+ }
+ self._download_youtube_dls(ydl_opts)
+
+ def _download_youtube(self):
+ ydl_opts = {
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio',
+ 'merge_output_format': 'mp4',
+ 'outtmpl': OUTTMPL
+ }
+ self._download_youtube_dls(ydl_opts)
+
+ def _download_raw_file(self):
+ a = urlparse(self.url)
+ path = f'source_{os.path.basename(a.path)}'
+
+ r = requests.get(self.url, stream=True)
+ if r.status_code == 200:
+ self.downloaded = True
+ with open(path, 'wb') as f:
+ r.raw.decode_content = True
+ shutil.copyfileobj(r.raw, f)
+ self.paths.append(path)
+ else:
+ self.downloaded = False
+
+ def _download_gallery_reddit(self):
+ url = self.url
+ submission = self.reddit.submission(url=self.url)
+ for key in submission.media_metadata:
+ value = submission.media_metadata[key]
+ self.url = value['s']['u']
+ self._download_raw_file()
+
+ self.url = url
+
+ @staticmethod
+ def _get_source_type(url):
+ if re.match("^.*v\\.redd\\.it.*$", url):
+ return SourceType.VREDDIT
+ if re.match("^.*i\\.redd\\.it.*\\.(jpg|jpeg)$", url):
+ return SourceType.IREDDIT
+ if re.match("^.*\\.youtube\\.com.*$", url):
+ return SourceType.YOUTUBE
+ if re.match("^.*redgifs\\.com.*$", url):
+ return SourceType.REDGIFS
+ if re.match("^.*i\\.imgur\\.com.*\\.(jpg|jpeg)$", url):
+ return SourceType.IMAGURJPG
+ if re.match("^.*gfycat.com.*$", url):
+ return SourceType.GFYCAT
+ if re.match("^.*www.reddit.com/gallery.*$", url):
+ return SourceType.GREDDIT
+
+ return SourceType.UNKNOWN
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..266bb53
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,70 @@
+import argparse
+import os
+
+import praw
+from time import sleep
+
+from nextcloud import NextCloud
+from praw.models.util import stream_generator
+
+from downloader import Downloader
+from uploader import create_folders, upload_file
+
+parser = argparse.ArgumentParser(description="Monitor saved")
+parser.add_argument('-c', '--client-id',
+ help="Reddit client id",
+ default=os.environ.get('CLIENT_ID', ''))
+parser.add_argument('-s', '--client-secret',
+ help="Reddit client secret",
+ default=os.environ.get('CLIENT_SECRET', ''))
+parser.add_argument('-u', '--reddit-username',
+ help="Reddit username",
+ default=os.environ.get('REDDIT_USERNAME', ''))
+parser.add_argument('-p', '--reddit-password',
+ help="Reddit user password",
+ default=os.environ.get('REDDIT_PASSWORD', ''))
+parser.add_argument('-P', '--nextcloud-password',
+ help="Nextcloud Password",
+ default=os.environ.get('NEXTCLOUD_PASSWORD', ''))
+parser.add_argument('-U', '--nextcloud-username',
+ help="Nextcloud Username",
+ default=os.environ.get('NEXTCLOUD_USERNAME', ''))
+parser.add_argument('-o', '--nextcloud-host',
+ help="Nextcloud Host",
+ default=os.environ.get('NEXTCLOUD_HOST', 'localhost'))
+parser.add_argument('-d', '--nextcloud-path',
+ help="Nextcloud root folder",
+ default=os.environ.get('NEXTCLOUD_PATH', 'im'))
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ reddit = praw.Reddit(client_id=args.client_id,
+ client_secret=args.client_secret,
+ password=args.reddit_password,
+ user_agent="hcrawler",
+ username=args.reddit_username)
+
+ nxc = NextCloud(
+ args.nextcloud_host,
+ user=args.nextcloud_username,
+ password=args.nextcloud_password,
+ session_kwargs={'verify': False}
+ )
+
+ redditor = reddit.redditor(args.reddit_username)
+
+
+ def upload(post):
+ url = post.url
+ create_folders(f"{args.nextcloud_path}/{post.subreddit}/", nxc)
+ with Downloader(url=url, reddit=reddit) as d:
+ d.download()
+ for path in d.paths:
+ if "-mobile" in path: # Remove mobile version
+ continue
+ upload_file(path, f"im/{post.subreddit}/{path}", nxc)
+
+
+ generator = stream_generator(redditor.saved, attribute_name="name")
+ for post in generator:
+ upload(post)
diff --git a/src/uploader.py b/src/uploader.py
new file mode 100644
index 0000000..7c7641a
--- /dev/null
+++ b/src/uploader.py
@@ -0,0 +1,19 @@
+from functools import reduce
+
+from nextcloud import NextCloud
+
+
+def _create_folder(folder: str, nxc: NextCloud) -> str:
+ nxc.create_folder(folder, True)
+ return folder
+
+
+def create_folders(path: str, nxc: NextCloud):
+ # remove first "/" if there is one.
+ path = path if path[0] != '/' else path[1:]
+ folders = path.split("/")
+ reduce(lambda x, y: _create_folder(f"{x}/{y}", nxc), folders, "")
+
+
+def upload_file(local_filename: str, remote_filename: str, nxc: NextCloud):
+ nxc.upload_file(local_filename, remote_filename)