diff options
Diffstat (limited to 'main.py')
-rw-r--r-- | main.py | 87 |
1 files changed, 87 insertions, 0 deletions
@@ -0,0 +1,87 @@ +import re +from functools import reduce +from typing import List, Iterator +from xml.etree.ElementTree import ElementTree, fromstring, tostring, register_namespace + +import httpx +from fastapi import FastAPI +from starlette.responses import Response, PlainTextResponse + +app = FastAPI() + +URL = "https://jovemnerd.com.br/feed-nerdcast/" + +RegexCollection = { + "nerdcast": "NerdCast [0-9]+[a-c]* -", + "empreendedor": "Empreendedor [0-9]+ -", + "mamicas": "Caneca de Mamicas [0-9]+ -", + "english": "Speak English [0-9]+ -", + "nerdcash": "NerdCash [0-9]+ -", + "bunker": "Lá do Bunker [0-9]+ -", +} + +register_namespace("googleplay", "http://www.google.com/schemas/play-podcasts/1.0") +register_namespace("itunes", "http://www.itunes.com/dtds/podcast-1.0.dtd") +register_namespace("atom", "http://www.w3.org/2005/Atom") + + +class XMLResponse(Response): + media_type = "application/xml" + + +def match(title: str, series: List[str]) -> bool: + def _match(s): + return re.match(RegexCollection[s], title) is not None + + return reduce(lambda x, y: x or _match(y), series, False) + + +def filter_xml(xml_str: str, series: List[str]) -> str: + tree = ElementTree(fromstring(xml_str)) + tree_root = tree.getroot() + for channel in tree_root.findall("./channel"): + for item in channel.findall("item"): + title = item.find("title").text + if not match(title, series): + channel.remove(item) + + return tostring(tree_root, encoding='utf8', method='xml') + + +def filter_titles_xml(xml_str) -> Iterator[str]: + tree = ElementTree(fromstring(xml_str)) + tree_root = tree.getroot() + for item in tree_root.findall("./channel/item"): + yield item.find("title").text + + +async def load_and_filter(series: str) -> str: + series = series or 'nerdcast' + series = series.split(',') + async with httpx.AsyncClient() as client: + response = await client.get(URL) + xml_str = response.content + return filter_xml(xml_str, series) + + +async def load_titles() -> Iterator[str]: + async with httpx.AsyncClient() as client: + response = await client.get(URL) + xml_str = response.content + return filter_titles_xml(xml_str) + + +@app.get("/", response_class=XMLResponse) +async def root(q: str = ''): + return await load_and_filter(q) + + +@app.get("/titles", response_class=PlainTextResponse) +async def titles(): + titles = await load_titles() + return "\n".join(titles) + + +@app.get("/series") +async def titles(): + return [i[0] for i in RegexCollection.items()] |