mirror of
https://codeberg.org/Ewen/rudibridge.git
synced 2024-12-22 11:12:32 +00:00
feat: first somewhat (hardcoded) working version
Works only for Ouest-France, need to generalize now.
This commit is contained in:
parent
d21ad550d9
commit
8aebaca9ad
25
api/Dockerfile
Normal file
25
api/Dockerfile
Normal file
|
@ -0,0 +1,25 @@
|
|||
FROM python:3.11-alpine as requirements
|
||||
|
||||
RUN apk update \
|
||||
&& apk add --no-cache \
|
||||
build-base \
|
||||
chromium \
|
||||
gcc \
|
||||
libc-dev \
|
||||
linux-headers \
|
||||
pipx \
|
||||
python3-dev \
|
||||
&& pipx install poetry
|
||||
|
||||
ENV PATH=/root/.local/bin:${PATH}
|
||||
|
||||
RUN mkdir /app
|
||||
COPY pyproject.toml poetry.lock /app
|
||||
|
||||
WORKDIR /app
|
||||
RUN poetry --version
|
||||
RUN /root/.local/bin/poetry install
|
||||
|
||||
|
||||
#CMD ["poetry", "run", "gunicorn", "--bind=0.0.0.0:8080", "--reload", "app:app"]
|
||||
CMD ["poetry", "run", "flask", "--app", "api", "run", "--host=0.0.0.0", "--port=8080", "--debug"]
|
2
api/README.md
Normal file
2
api/README.md
Normal file
|
@ -0,0 +1,2 @@
|
|||
## Rudibridge
|
||||
A rudimentary RSS bridge for changing web pages that don't have an RSS feed.
|
32
api/api/__init__.py
Normal file
32
api/api/__init__.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
import os
|
||||
|
||||
from flask import Flask
|
||||
|
||||
|
||||
def create_app(test_config=None):
|
||||
app = Flask(__name__, instance_relative_config=True)
|
||||
app.config.from_mapping(
|
||||
#!FIXME: secret key to change for production
|
||||
SECRET_KEY="dev",
|
||||
DATABASE=os.path.join(app.instance_path, "db.sqlite"),
|
||||
)
|
||||
|
||||
if test_config is None:
|
||||
app.config.from_prefixed_env()
|
||||
else:
|
||||
app.config.from_mapping(test_config)
|
||||
|
||||
try:
|
||||
os.makedirs(app.instance_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
from . import db
|
||||
|
||||
db.init_app(app)
|
||||
|
||||
from . import feed
|
||||
|
||||
app.register_blueprint(feed.bp)
|
||||
|
||||
return app
|
11
api/api/app.py
Normal file
11
api/api/app.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
from flask import Flask
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/")
|
||||
def hello():
|
||||
return "<p>Coucou.</p>"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
0
api/api/config.py
Normal file
0
api/api/config.py
Normal file
38
api/api/db.py
Normal file
38
api/api/db.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
import sqlite3
|
||||
import click
|
||||
from flask import current_app, g
|
||||
|
||||
|
||||
def get_db():
|
||||
if "db" not in g:
|
||||
g.db = sqlite3.connect(
|
||||
current_app.config["DATABASE"], detect_types=sqlite3.PARSE_DECLTYPES
|
||||
)
|
||||
g.db.row_factory = sqlite3.Row
|
||||
|
||||
return g.db
|
||||
|
||||
|
||||
def close_db(e=None):
|
||||
db = g.pop("db", None)
|
||||
|
||||
if db is not None:
|
||||
db.close()
|
||||
|
||||
|
||||
def init_db():
|
||||
db = get_db()
|
||||
|
||||
with current_app.open_resource("schema.sql") as f:
|
||||
db.executescript(f.read().decode("utf8"))
|
||||
|
||||
|
||||
@click.command("init-db")
|
||||
def init_db_command():
|
||||
init_db()
|
||||
click.echo("Initialized the database.")
|
||||
|
||||
|
||||
def init_app(app):
|
||||
app.teardown_appcontext(close_db)
|
||||
app.cli.add_command(init_db_command)
|
19
api/api/feed.py
Normal file
19
api/api/feed.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
from datetime import datetime
|
||||
|
||||
from flask import Blueprint, make_response, render_template
|
||||
|
||||
from api.db import get_db
|
||||
|
||||
from .scraper import scrape
|
||||
|
||||
bp = Blueprint("feed", __name__, url_prefix="/feed")
|
||||
|
||||
|
||||
@bp.route("/", methods=("GET",))
|
||||
def parse_page():
|
||||
link = "https://www.ouest-france.fr/bretagne/rennes-35000/"
|
||||
feed = scrape(link)
|
||||
rss_xml = render_template("rss.xml", feed=feed, build_date=datetime.now())
|
||||
response = make_response(rss_xml)
|
||||
response.headers['Content-Type'] = "application/rss+xml"
|
||||
return response
|
6
api/api/schema.sql
Normal file
6
api/api/schema.sql
Normal file
|
@ -0,0 +1,6 @@
|
|||
DROP TABLE IF EXISTS feed;
|
||||
|
||||
CREATE TABLE feed (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
url TEXT NOT NULL
|
||||
);
|
63
api/api/scraper.py
Normal file
63
api/api/scraper.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
from datetime import datetime
|
||||
|
||||
import botasaurus as bt
|
||||
import dateparser
|
||||
|
||||
|
||||
class FeedItem:
|
||||
def __init__(self, title, content, author, link, item_datetime=datetime.now()):
|
||||
self.title = title
|
||||
self.content = content
|
||||
self.author = author
|
||||
self.link = link
|
||||
self.item_datetime = item_datetime.isoformat()
|
||||
|
||||
def __lt__(self, other):
|
||||
if self.item_datetime and other.item_datetime:
|
||||
return self.item_datetime < other.item_datetime
|
||||
else:
|
||||
return self.title < other.title
|
||||
|
||||
class Feed:
|
||||
def __init__(self, title, url, items):
|
||||
self.title = title
|
||||
self.url = url
|
||||
self.items = items
|
||||
|
||||
|
||||
|
||||
@bt.request(output=None)
|
||||
def scrape(request, link):
|
||||
soup = request.bs4(link)
|
||||
section = soup.find("section", {"class": "liste-articles"})
|
||||
articles = section.find_all("article", {"class": "teaser-media-liste"})
|
||||
|
||||
feed = Feed(title=soup.title.get_text(), url=link, items=[])
|
||||
|
||||
for article in articles:
|
||||
title = article.find("h2")
|
||||
if title:
|
||||
title = title.get_text()
|
||||
|
||||
content = article.find("p")
|
||||
if content:
|
||||
content = content.get_text()
|
||||
|
||||
link = article.find("a", {"class": "titre-lien"})
|
||||
if link:
|
||||
link = link["href"]
|
||||
|
||||
item_datetime = article.find("time")
|
||||
if item_datetime:
|
||||
item_datetime = dateparser.parse(item_datetime["datetime"])
|
||||
item = FeedItem(
|
||||
title=title,
|
||||
content=content,
|
||||
author="Ouest-France",
|
||||
link=link,
|
||||
item_datetime=item_datetime,
|
||||
)
|
||||
feed.items.append(item)
|
||||
feed.items.sort(reverse=True)
|
||||
|
||||
return feed
|
27
api/api/templates/rss.xml
Normal file
27
api/api/templates/rss.xml
Normal file
|
@ -0,0 +1,27 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<title>{{ feed.title }}</title>
|
||||
<atom:link href="{{ request.base_url }}" rel="self" type="application/rss+xml"/>
|
||||
<link>{{ request.base_url }}</link>
|
||||
<description>A feed generated from {{feed.url}} with Rudibridge</description>
|
||||
<lastBuildDate>{{ build_date.strftime("%a, %d %b %Y %T") }} +0000</lastBuildDate>
|
||||
{% for item in feed.items %}
|
||||
<item>
|
||||
{% if item.title %}
|
||||
<title>{{ item.title }}</title>
|
||||
{% endif %}
|
||||
{% if item.link %}
|
||||
<link>{{ item.link }}</link>
|
||||
<guid>{{ item.link }}</guid>
|
||||
{% endif %}
|
||||
{% if item.content %}
|
||||
<description>{{ item.content }}</description>
|
||||
{% endif %}
|
||||
{% if item.item_datetime %}
|
||||
<pubDate>{{ item.item_datetime }}</pubDate>
|
||||
{% endif %}
|
||||
</item>
|
||||
{% endfor %}
|
||||
</channel>
|
||||
</rss>
|
1
api/local_storage.json
Normal file
1
api/local_storage.json
Normal file
|
@ -0,0 +1 @@
|
|||
{}
|
1186
api/poetry.lock
generated
Normal file
1186
api/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
1
api/profiles.json
Normal file
1
api/profiles.json
Normal file
|
@ -0,0 +1 @@
|
|||
{}
|
22
api/pyproject.toml
Normal file
22
api/pyproject.toml
Normal file
|
@ -0,0 +1,22 @@
|
|||
[tool.poetry]
|
||||
name = "api"
|
||||
version = "0.1.0"
|
||||
description = "The API behind Rudibridge"
|
||||
authors = ["Ewen <darempred@korr.bzh>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
flask = "^3.0.3"
|
||||
gunicorn = "^22.0.0"
|
||||
flask-sqlalchemy = "^3.1.1"
|
||||
pytest = "^8.2.0"
|
||||
coverage = "^7.5.1"
|
||||
requests = "^2.31.0"
|
||||
botasaurus = "^4.0.14"
|
||||
dateparser = "^1.2.0"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
0
api/tests/__init__.py
Normal file
0
api/tests/__init__.py
Normal file
11
docker-compose.yml
Normal file
11
docker-compose.yml
Normal file
|
@ -0,0 +1,11 @@
|
|||
services:
|
||||
api:
|
||||
env_file:
|
||||
- .env.dev
|
||||
build:
|
||||
context: ./api
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- 8080:8080
|
||||
volumes:
|
||||
- ./api:/app
|
Loading…
Reference in a new issue