aboutsummaryrefslogtreecommitdiff
path: root/jnfilter/__init__.py
blob: 35be6bd4c97fbe3e44b81d07c076bb3662780ae4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import re
import httpx

from functools import reduce
from typing import List, Iterator, Union
from xml.etree.ElementTree import ElementTree, fromstring, tostring, register_namespace

from flask import Flask, Response, request

app =  Flask(__name__)

URL = "https://jovemnerd.com.br/feed-nerdcast/"

RegexCollection = {
    "nerdcast": "NerdCast [0-9]+[a-c]* -",
    "empreendedor": "Empreendedor [0-9]+ -",
    "mamicas": "Caneca de Mamicas [0-9]+ -",
    "english": "Speak English [0-9]+ -",
    "nerdcash": "NerdCash [0-9]+ -",
    "bunker": "Lá do Bunker  [0-9]+ -",
    "tech": "NerdTech [0-9]+ -",
    "genera": "Generacast [0-9]+ -",
}

ATOM =  "http://www.w3.org/2005/Atom"
ITUNES =  "http://www.itunes.com/dtds/podcast-1.0.dtd"
GOOGLEPLAY = "http://www.google.com/schemas/play-podcasts/1.0"

register_namespace("googleplay", GOOGLEPLAY)
register_namespace("itunes", ITUNES)
register_namespace("atom", ATOM)



def match(title: str, series: List[str]) -> bool:
    def _match(s):
        return re.match(RegexCollection[s], title) is not None

    return reduce(lambda x, y: x or _match(y), series, False)


def filter_xml(xml_str: str, series: List[str], tag: Union[bool, None] = False) -> str:
    tree = ElementTree(fromstring(xml_str))
    tree_root = tree.getroot()
    for channel in tree_root.findall("./channel"):

        if tag:
            tag = f' [{",".join(series)}]'.upper()
            channel.find("title").text += tag
            channel.find("description").text += tag
            channel.find("link").text += f"?{tag}"
            channel.find(f"{{{ITUNES}}}author").text += tag
            channel.find(f"{{{GOOGLEPLAY}}}author").text += tag
            channel.find(f"{{{ITUNES}}}subtitle").text += tag
            channel.find(f"{{{ITUNES}}}summary").text += tag

        for item in channel.findall("item"):
            title = item.find("title").text
            if not match(title, series):
                channel.remove(item)

    return tostring(tree_root, encoding='utf8', method='xml')


def filter_titles_xml(xml_str) -> Iterator[str]:
    tree = ElementTree(fromstring(xml_str))
    tree_root = tree.getroot()
    for item in tree_root.findall("./channel/item"):
        yield item.find("title").text


def load_and_filter(series: str, tag: Union[bool, None] = False) -> str:
    series = series or 'nerdcast'
    series = series.split(',')
    with httpx.Client() as client:
        response =client.get(URL)
        xml_str = response.content
        return filter_xml(xml_str, series, tag)


def load_titles() -> Iterator[str]:
    with httpx.Client() as client:
        response = client.get(URL)
        xml_str = response.content
        return filter_titles_xml(xml_str)

@app.route("/", methods=['GET', 'HEAD'])
def root(q: str = '', tag: Union[bool, None] = False):
    q = request.args.get("q", "")
    tag = request.args.get("tag", False)
    return load_and_filter(q, tag), 200,  {'Content-Type': 'application/xml'}


@app.route("/titles", methods=['GET'])
def titles():
    titles = load_titles()
    return "\n".join(titles)


@app.route("/series", methods=['GET'])
def series():
    return [i[0] for i in RegexCollection.items()]