mirror of
https://codeberg.org/Ewen/rudibridge.git
synced 2024-12-22 11:12:32 +00:00
feat: first somewhat (hardcoded) working version
Works only for Ouest-France, need to generalize now.
This commit is contained in:
parent
d21ad550d9
commit
8aebaca9ad
25
api/Dockerfile
Normal file
25
api/Dockerfile
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
FROM python:3.11-alpine as requirements
|
||||||
|
|
||||||
|
RUN apk update \
|
||||||
|
&& apk add --no-cache \
|
||||||
|
build-base \
|
||||||
|
chromium \
|
||||||
|
gcc \
|
||||||
|
libc-dev \
|
||||||
|
linux-headers \
|
||||||
|
pipx \
|
||||||
|
python3-dev \
|
||||||
|
&& pipx install poetry
|
||||||
|
|
||||||
|
ENV PATH=/root/.local/bin:${PATH}
|
||||||
|
|
||||||
|
RUN mkdir /app
|
||||||
|
COPY pyproject.toml poetry.lock /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
RUN poetry --version
|
||||||
|
RUN /root/.local/bin/poetry install
|
||||||
|
|
||||||
|
|
||||||
|
#CMD ["poetry", "run", "gunicorn", "--bind=0.0.0.0:8080", "--reload", "app:app"]
|
||||||
|
CMD ["poetry", "run", "flask", "--app", "api", "run", "--host=0.0.0.0", "--port=8080", "--debug"]
|
2
api/README.md
Normal file
2
api/README.md
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
## Rudibridge
|
||||||
|
A rudimentary RSS bridge for changing web pages that don't have an RSS feed.
|
32
api/api/__init__.py
Normal file
32
api/api/__init__.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from flask import Flask
|
||||||
|
|
||||||
|
|
||||||
|
def create_app(test_config=None):
|
||||||
|
app = Flask(__name__, instance_relative_config=True)
|
||||||
|
app.config.from_mapping(
|
||||||
|
#!FIXME: secret key to change for production
|
||||||
|
SECRET_KEY="dev",
|
||||||
|
DATABASE=os.path.join(app.instance_path, "db.sqlite"),
|
||||||
|
)
|
||||||
|
|
||||||
|
if test_config is None:
|
||||||
|
app.config.from_prefixed_env()
|
||||||
|
else:
|
||||||
|
app.config.from_mapping(test_config)
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.makedirs(app.instance_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
from . import db
|
||||||
|
|
||||||
|
db.init_app(app)
|
||||||
|
|
||||||
|
from . import feed
|
||||||
|
|
||||||
|
app.register_blueprint(feed.bp)
|
||||||
|
|
||||||
|
return app
|
11
api/api/app.py
Normal file
11
api/api/app.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
from flask import Flask
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def hello():
|
||||||
|
return "<p>Coucou.</p>"
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run()
|
0
api/api/config.py
Normal file
0
api/api/config.py
Normal file
38
api/api/db.py
Normal file
38
api/api/db.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
import sqlite3
|
||||||
|
import click
|
||||||
|
from flask import current_app, g
|
||||||
|
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
if "db" not in g:
|
||||||
|
g.db = sqlite3.connect(
|
||||||
|
current_app.config["DATABASE"], detect_types=sqlite3.PARSE_DECLTYPES
|
||||||
|
)
|
||||||
|
g.db.row_factory = sqlite3.Row
|
||||||
|
|
||||||
|
return g.db
|
||||||
|
|
||||||
|
|
||||||
|
def close_db(e=None):
|
||||||
|
db = g.pop("db", None)
|
||||||
|
|
||||||
|
if db is not None:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def init_db():
|
||||||
|
db = get_db()
|
||||||
|
|
||||||
|
with current_app.open_resource("schema.sql") as f:
|
||||||
|
db.executescript(f.read().decode("utf8"))
|
||||||
|
|
||||||
|
|
||||||
|
@click.command("init-db")
|
||||||
|
def init_db_command():
|
||||||
|
init_db()
|
||||||
|
click.echo("Initialized the database.")
|
||||||
|
|
||||||
|
|
||||||
|
def init_app(app):
|
||||||
|
app.teardown_appcontext(close_db)
|
||||||
|
app.cli.add_command(init_db_command)
|
19
api/api/feed.py
Normal file
19
api/api/feed.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from flask import Blueprint, make_response, render_template
|
||||||
|
|
||||||
|
from api.db import get_db
|
||||||
|
|
||||||
|
from .scraper import scrape
|
||||||
|
|
||||||
|
bp = Blueprint("feed", __name__, url_prefix="/feed")
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route("/", methods=("GET",))
|
||||||
|
def parse_page():
|
||||||
|
link = "https://www.ouest-france.fr/bretagne/rennes-35000/"
|
||||||
|
feed = scrape(link)
|
||||||
|
rss_xml = render_template("rss.xml", feed=feed, build_date=datetime.now())
|
||||||
|
response = make_response(rss_xml)
|
||||||
|
response.headers['Content-Type'] = "application/rss+xml"
|
||||||
|
return response
|
6
api/api/schema.sql
Normal file
6
api/api/schema.sql
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
DROP TABLE IF EXISTS feed;
|
||||||
|
|
||||||
|
CREATE TABLE feed (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
url TEXT NOT NULL
|
||||||
|
);
|
63
api/api/scraper.py
Normal file
63
api/api/scraper.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import botasaurus as bt
|
||||||
|
import dateparser
|
||||||
|
|
||||||
|
|
||||||
|
class FeedItem:
|
||||||
|
def __init__(self, title, content, author, link, item_datetime=datetime.now()):
|
||||||
|
self.title = title
|
||||||
|
self.content = content
|
||||||
|
self.author = author
|
||||||
|
self.link = link
|
||||||
|
self.item_datetime = item_datetime.isoformat()
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
if self.item_datetime and other.item_datetime:
|
||||||
|
return self.item_datetime < other.item_datetime
|
||||||
|
else:
|
||||||
|
return self.title < other.title
|
||||||
|
|
||||||
|
class Feed:
|
||||||
|
def __init__(self, title, url, items):
|
||||||
|
self.title = title
|
||||||
|
self.url = url
|
||||||
|
self.items = items
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@bt.request(output=None)
|
||||||
|
def scrape(request, link):
|
||||||
|
soup = request.bs4(link)
|
||||||
|
section = soup.find("section", {"class": "liste-articles"})
|
||||||
|
articles = section.find_all("article", {"class": "teaser-media-liste"})
|
||||||
|
|
||||||
|
feed = Feed(title=soup.title.get_text(), url=link, items=[])
|
||||||
|
|
||||||
|
for article in articles:
|
||||||
|
title = article.find("h2")
|
||||||
|
if title:
|
||||||
|
title = title.get_text()
|
||||||
|
|
||||||
|
content = article.find("p")
|
||||||
|
if content:
|
||||||
|
content = content.get_text()
|
||||||
|
|
||||||
|
link = article.find("a", {"class": "titre-lien"})
|
||||||
|
if link:
|
||||||
|
link = link["href"]
|
||||||
|
|
||||||
|
item_datetime = article.find("time")
|
||||||
|
if item_datetime:
|
||||||
|
item_datetime = dateparser.parse(item_datetime["datetime"])
|
||||||
|
item = FeedItem(
|
||||||
|
title=title,
|
||||||
|
content=content,
|
||||||
|
author="Ouest-France",
|
||||||
|
link=link,
|
||||||
|
item_datetime=item_datetime,
|
||||||
|
)
|
||||||
|
feed.items.append(item)
|
||||||
|
feed.items.sort(reverse=True)
|
||||||
|
|
||||||
|
return feed
|
27
api/api/templates/rss.xml
Normal file
27
api/api/templates/rss.xml
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||||
|
<channel>
|
||||||
|
<title>{{ feed.title }}</title>
|
||||||
|
<atom:link href="{{ request.base_url }}" rel="self" type="application/rss+xml"/>
|
||||||
|
<link>{{ request.base_url }}</link>
|
||||||
|
<description>A feed generated from {{feed.url}} with Rudibridge</description>
|
||||||
|
<lastBuildDate>{{ build_date.strftime("%a, %d %b %Y %T") }} +0000</lastBuildDate>
|
||||||
|
{% for item in feed.items %}
|
||||||
|
<item>
|
||||||
|
{% if item.title %}
|
||||||
|
<title>{{ item.title }}</title>
|
||||||
|
{% endif %}
|
||||||
|
{% if item.link %}
|
||||||
|
<link>{{ item.link }}</link>
|
||||||
|
<guid>{{ item.link }}</guid>
|
||||||
|
{% endif %}
|
||||||
|
{% if item.content %}
|
||||||
|
<description>{{ item.content }}</description>
|
||||||
|
{% endif %}
|
||||||
|
{% if item.item_datetime %}
|
||||||
|
<pubDate>{{ item.item_datetime }}</pubDate>
|
||||||
|
{% endif %}
|
||||||
|
</item>
|
||||||
|
{% endfor %}
|
||||||
|
</channel>
|
||||||
|
</rss>
|
1
api/local_storage.json
Normal file
1
api/local_storage.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{}
|
1186
api/poetry.lock
generated
Normal file
1186
api/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
1
api/profiles.json
Normal file
1
api/profiles.json
Normal file
|
@ -0,0 +1 @@
|
||||||
|
{}
|
22
api/pyproject.toml
Normal file
22
api/pyproject.toml
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "api"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "The API behind Rudibridge"
|
||||||
|
authors = ["Ewen <darempred@korr.bzh>"]
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.11"
|
||||||
|
flask = "^3.0.3"
|
||||||
|
gunicorn = "^22.0.0"
|
||||||
|
flask-sqlalchemy = "^3.1.1"
|
||||||
|
pytest = "^8.2.0"
|
||||||
|
coverage = "^7.5.1"
|
||||||
|
requests = "^2.31.0"
|
||||||
|
botasaurus = "^4.0.14"
|
||||||
|
dateparser = "^1.2.0"
|
||||||
|
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
0
api/tests/__init__.py
Normal file
0
api/tests/__init__.py
Normal file
11
docker-compose.yml
Normal file
11
docker-compose.yml
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
env_file:
|
||||||
|
- .env.dev
|
||||||
|
build:
|
||||||
|
context: ./api
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
volumes:
|
||||||
|
- ./api:/app
|
Loading…
Reference in a new issue