From 74aa2ec7fcd8881b442dce1e6cd24e53ee1c2919 Mon Sep 17 00:00:00 2001 From: Gabriel Arakaki Giovanini Date: Sun, 3 Jul 2022 18:33:43 +0200 Subject: feat: Adiciona tag opcionalmente Agora para ativar a tag no metadata e preciso passar um _query param_ `tag=true`. --- LICENSE | 22 +++++++++++ README.md | 17 +++++--- jnfilter/__init__.py | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++ jnfilter/__main__.py | 4 ++ jnfilter/main.py | 110 --------------------------------------------------- setup.py | 11 +++--- 6 files changed, 152 insertions(+), 121 deletions(-) create mode 100644 LICENSE create mode 100644 jnfilter/__init__.py create mode 100644 jnfilter/__main__.py delete mode 100644 jnfilter/main.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..588d327 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2021 Gabriel Arakaki Giovanini + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next + paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. diff --git a/README.md b/README.md index 71a700f..30a5a41 100644 --- a/README.md +++ b/README.md @@ -45,21 +45,26 @@ Adicionei recentemente uma parte no código pare mexer na metadata do feed (mudar titulo, nome, descrição, etc) para fazer com que o Pocketcast reconheça o feed gerado como se fosse outro feed. Eles usam a metadata para decidir se um feed e novo, então como eu não mexia na metadata ele gerava um link para o feed -original do jovem nerd. +original do jovem nerd. Para ativar essa funcionalidade use parametro +`tag=true`, exemplo: + +``` +https://jnfilter.gabrielgio.me/?q=nerdcash,nerdcast&tag=true +``` Entao os usuarios do Pocketcast tem que ir ao https://pocketcasts.com/submit/ para submeterem a sua URL. Observação, esse processo de submit deles e meio cagado, então se ele não oferecer o feed certo tente trocar as ordens dos parâmetro, se tiver `nerdcast,mamicas` troque para `mamicas,nercast` -o ideal e que cliente de podcast nao obrigue a fazer isso mas fazer o que as outras -opções fazem pior. +o ideal e que cliente de podcast nao obrigue a fazer isso mas fazer o que as +outras opções fazem pior. ## Para programadores -E um projeto simples feito em cima do FastApi. Ele vai pegar o _feed_ e -filtrar os itens do _feed_ do podcast. Não tem cache nem nada sendo armazenado, -todo processamento e feito a partir do feed para cada requisição. +E um projeto simples feito em cima do FastApi. Ele vai pegar o _feed_ e filtrar +os itens do _feed_ do podcast. Não tem cache nem nada sendo armazenado, todo +processamento e feito a partir do feed para cada requisição. Para rodar basta instalar os requirements e rodar o seguinte código: diff --git a/jnfilter/__init__.py b/jnfilter/__init__.py new file mode 100644 index 0000000..2c7641a --- /dev/null +++ b/jnfilter/__init__.py @@ -0,0 +1,109 @@ +import re +import httpx +import uvicorn + +from functools import reduce +from typing import List, Iterator, Union +from xml.etree.ElementTree import ElementTree, fromstring, tostring, register_namespace +from fastapi import FastAPI +from starlette.responses import Response, PlainTextResponse + + +app = FastAPI() + +URL = "https://jovemnerd.com.br/feed-nerdcast/" + +RegexCollection = { + "nerdcast": "NerdCast [0-9]+[a-c]* -", + "empreendedor": "Empreendedor [0-9]+ -", + "mamicas": "Caneca de Mamicas [0-9]+ -", + "english": "Speak English [0-9]+ -", + "nerdcash": "NerdCash [0-9]+ -", + "bunker": "Lá do Bunker [0-9]+ -", + "tech": "NerdTech [0-9]+ -", + "genera": "Generacast [0-9]+ -", +} + +ATOM = "http://www.w3.org/2005/Atom" +ITUNES = "http://www.itunes.com/dtds/podcast-1.0.dtd" +GOOGLEPLAY = "http://www.google.com/schemas/play-podcasts/1.0" + +register_namespace("googleplay", GOOGLEPLAY) +register_namespace("itunes", ITUNES) +register_namespace("atom", ATOM) + + +class XMLResponse(Response): + media_type = "application/xml" + + +def match(title: str, series: List[str]) -> bool: + def _match(s): + return re.match(RegexCollection[s], title) is not None + + return reduce(lambda x, y: x or _match(y), series, False) + + +def filter_xml(xml_str: str, series: List[str], tag: Union[bool, None] = False) -> str: + tree = ElementTree(fromstring(xml_str)) + tree_root = tree.getroot() + for channel in tree_root.findall("./channel"): + + if tag: + tag = f' [{",".join(series)}]'.upper() + channel.find("title").text += tag + channel.find("description").text += tag + channel.find("link").text += f"?{tag}" + channel.find(f"{{{ITUNES}}}author").text += tag + channel.find(f"{{{GOOGLEPLAY}}}author").text += tag + channel.find(f"{{{ITUNES}}}subtitle").text += tag + channel.find(f"{{{ITUNES}}}summary").text += tag + + for item in channel.findall("item"): + title = item.find("title").text + if not match(title, series): + channel.remove(item) + + return tostring(tree_root, encoding='utf8', method='xml') + + +def filter_titles_xml(xml_str) -> Iterator[str]: + tree = ElementTree(fromstring(xml_str)) + tree_root = tree.getroot() + for item in tree_root.findall("./channel/item"): + yield item.find("title").text + + +async def load_and_filter(series: str, tag: Union[bool, None] = False) -> str: + series = series or 'nerdcast' + series = series.split(',') + async with httpx.AsyncClient() as client: + response = await client.get(URL) + xml_str = response.content + return filter_xml(xml_str, series, tag) + + +async def load_titles() -> Iterator[str]: + async with httpx.AsyncClient() as client: + response = await client.get(URL) + xml_str = response.content + return filter_titles_xml(xml_str) + +@app.head("/") +@app.get("/", response_class=XMLResponse) +async def root(q: str = '', tag: Union[bool, None] = False): + return await load_and_filter(q, tag) + + +@app.get("/titles", response_class=PlainTextResponse) +async def titles(): + titles = await load_titles() + return "\n".join(titles) + + +@app.get("/series") +async def titles(): + return [i[0] for i in RegexCollection.items()] + +def run(): + uvicorn.run(app=app, host="0.0.0.0", port=32000) diff --git a/jnfilter/__main__.py b/jnfilter/__main__.py new file mode 100644 index 0000000..ff2b876 --- /dev/null +++ b/jnfilter/__main__.py @@ -0,0 +1,4 @@ +from . import run + +if __name__ == '__main__': + run() diff --git a/jnfilter/main.py b/jnfilter/main.py deleted file mode 100644 index 193975f..0000000 --- a/jnfilter/main.py +++ /dev/null @@ -1,110 +0,0 @@ -import re -import httpx -import uvicorn - -from functools import reduce -from typing import List, Iterator -from xml.etree.ElementTree import ElementTree, fromstring, tostring, register_namespace -from fastapi import FastAPI -from starlette.responses import Response, PlainTextResponse - - -app = FastAPI() - -URL = "https://jovemnerd.com.br/feed-nerdcast/" - -RegexCollection = { - "nerdcast": "NerdCast [0-9]+[a-c]* -", - "empreendedor": "Empreendedor [0-9]+ -", - "mamicas": "Caneca de Mamicas [0-9]+ -", - "english": "Speak English [0-9]+ -", - "nerdcash": "NerdCash [0-9]+ -", - "bunker": "Lá do Bunker [0-9]+ -", - "tech": "NerdTech [0-9]+ -", - "genera": "Generacast [0-9]+ -", -} - -ATOM = "http://www.w3.org/2005/Atom" -ITUNES = "http://www.itunes.com/dtds/podcast-1.0.dtd" -GOOGLEPLAY = "http://www.google.com/schemas/play-podcasts/1.0" - -register_namespace("googleplay", GOOGLEPLAY) -register_namespace("itunes", ITUNES) -register_namespace("atom", ATOM) - - -class XMLResponse(Response): - media_type = "application/xml" - - -def match(title: str, series: List[str]) -> bool: - def _match(s): - return re.match(RegexCollection[s], title) is not None - - return reduce(lambda x, y: x or _match(y), series, False) - - -def filter_xml(xml_str: str, series: List[str]) -> str: - tree = ElementTree(fromstring(xml_str)) - tree_root = tree.getroot() - for channel in tree_root.findall("./channel"): - tag = f' [{",".join(series)}]'.upper() - - channel.find("title").text += tag - channel.find("description").text += tag - channel.find("link").text += f"?{tag}" - channel.find(f"{{{ITUNES}}}author").text += tag - channel.find(f"{{{GOOGLEPLAY}}}author").text += tag - channel.find(f"{{{ITUNES}}}subtitle").text += tag - channel.find(f"{{{ITUNES}}}summary").text += tag - - print({elem.tag for elem in channel.iter()}) - - for item in channel.findall("item"): - title = item.find("title").text - if not match(title, series): - channel.remove(item) - - return tostring(tree_root, encoding='utf8', method='xml') - - -def filter_titles_xml(xml_str) -> Iterator[str]: - tree = ElementTree(fromstring(xml_str)) - tree_root = tree.getroot() - for item in tree_root.findall("./channel/item"): - yield item.find("title").text - - -async def load_and_filter(series: str) -> str: - series = series or 'nerdcast' - series = series.split(',') - async with httpx.AsyncClient() as client: - response = await client.get(URL) - xml_str = response.content - return filter_xml(xml_str, series) - - -async def load_titles() -> Iterator[str]: - async with httpx.AsyncClient() as client: - response = await client.get(URL) - xml_str = response.content - return filter_titles_xml(xml_str) - -@app.head("/") -@app.get("/", response_class=XMLResponse) -async def root(q: str = ''): - return await load_and_filter(q) - - -@app.get("/titles", response_class=PlainTextResponse) -async def titles(): - titles = await load_titles() - return "\n".join(titles) - - -@app.get("/series") -async def titles(): - return [i[0] for i in RegexCollection.items()] - -def run(): - uvicorn.run(app=app, host="0.0.0.0", port=32000) diff --git a/setup.py b/setup.py index 8f95a75..07add82 100644 --- a/setup.py +++ b/setup.py @@ -8,16 +8,17 @@ requirements = [ setup(name='jnfilter', - version='0.2.1', + version='0.3.0', description='A FastAPI server to filter Nercast podcast feed', url='https://git.sr.ht/~gabrielgio/jnfilter', author='Gabriel Arakaki Giovanini', author_email='mail@gabrielgio.me', license='MIT', packages=['jnfilter'], - entry_points=""" - [console_scripts] - jnfilterd=jnfilter.main:run - """, + entry_points={ + 'console_scripts': [ + 'jnfilterd=jnfilter', + ] + }, install_requires=requirements, zip_safe=False) -- cgit v1.2.3