1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
import os
import pytest
from importer.downloader import SourceType, Downloader
reddit_env = pytest.mark.skipif(
os.environ.get('CLIENT_ID', '') == '' or
os.environ.get('CLIENT_SECRET', '') == '' or
os.environ.get('USERNAME', '') == '' or
os.environ.get('PASSWORD', '') == ''
, reason="Require reddit env variables to be set."
)
@pytest.fixture
def mock_ydl_download(mocker):
# this function is responsible for downloading the file
return mocker.patch('importer.downloader.youtube_dl.YoutubeDL.process_info')
@pytest.mark.parametrize('url,source_type', [
("https://i.redd.it/pjj1ll1b2rr41.jpg", SourceType.IREDDIT),
("https://gfycat.com/presentdangerousdromedary", SourceType.GFYCAT),
("https://i.imgur.com/fXLMjfp.jpg", SourceType.IMAGURJPG),
("https://redgifs.com/watch/ripesnivelingfiddlercrab", SourceType.REDGIFS),
("https://www.youtube.com/watch?v=oLkdqptmfng", SourceType.YOUTUBE),
("https://v.redd.it/42j6r7i8z7151", SourceType.VREDDIT),
("https://www.reddit.com/gallery/mik7c9", SourceType.GREDDIT),
("https://duckduckgo.com", SourceType.UNKNOWN),
])
def test_source_type(url, source_type):
with Downloader(url, "1-A") as d:
assert d.source_type == source_type
@pytest.mark.parametrize('url,paths', [
("https://gfycat.com/presentdangerousdromedary", ["source_presentdangerousdromedary.mp4"]),
("https://redgifs.com/watch/ripesnivelingfiddlercrab", ["source_RipeSnivelingFiddlercrab.mp4", 'source_RipeSnivelingFiddlercrab-mobile.mp4']),
("https://www.youtube.com/watch?v=oLkdqptmfng", ["source_oLkdqptmfng.mp4"]),
("https://v.redd.it/42j6r7i8z7151", ["source_42j6r7i8z7151.mp4"]),
])
def test_download_youtube_dl(url, paths, mock_ydl_download):
with Downloader(url, "1-A") as d:
assert d.downloaded is False
d.download()
assert d.downloaded is True
assert d.paths == paths
mock_ydl_download.assert_called()
@pytest.mark.parametrize('url,path', [
("https://i.redd.it/pjj1ll1b2rr41.jpg", "source_pjj1ll1b2rr41.jpg"),
("https://i.imgur.com/fXLMjfp.jpg", "source_fXLMjfp.jpg"),
])
def test_download_raw_data(url, path):
with Downloader(url, "1-A") as d:
assert d.downloaded is False
d.download()
assert d.paths == [path]
assert d.downloaded is True
@reddit_env
def test_praw_download():
client_id = os.environ.get('CLIENT_ID', '')
client_secret = os.environ.get('CLIENT_SECRET', '')
username = os.environ.get('USERNAME', '')
password = os.environ.get('PASSWORD', '')
files = {'source_hlokpsyhgrq61.jpg', 'source_n31c2y7igrq61.jpg', 'source_7eg0o76igrq61.jpg',
'source_whl12jbigrq61.jpg', 'source_4uok762igrq61.jpg', 'source_t3pgm64igrq61.jpg',
'source_ymc4hv9igrq61.jpg'}
with Downloader("https://www.reddit.com/gallery/mik7c9", "1-A", client_id=client_id, client_secret=client_secret,
password=password, user_agent="hcrawler", username=username) as d:
assert d.downloaded is False
d.download()
assert d.downloaded is True
assert set(d.paths) == files
|