fix: handling ungiven parameters

This commit is contained in:
Ewen 2024-05-09 08:02:33 +02:00
parent 18e102f5af
commit d6c6b58d59

View file

@ -38,12 +38,12 @@ def scrape(request, args):
# If section is provided, use it, else use the entire page/soup # If section is provided, use it, else use the entire page/soup
section = soup section = soup
if args.get("section"): if args.get("section"):
section_class = None section_class = {}
if args.get("sectionClass"): if args.get("sectionClass"):
section_class = {"class": args.get("sectionClass")} section_class = {"class": args.get("sectionClass")}
section = soup.find(args.get("section"), section_class) section = soup.find(args.get("section"), section_class)
article_class = None article_class = {}
if args.get("articleClass"): if args.get("articleClass"):
article_class = {"class": args.get("articleClass")} article_class = {"class": args.get("articleClass")}
articles = section.find_all(args.get("article"), article_class) articles = section.find_all(args.get("article"), article_class)
@ -51,15 +51,15 @@ def scrape(request, args):
feed = Feed(title=soup.title.get_text(), url=args.get("url"), items=[]) feed = Feed(title=soup.title.get_text(), url=args.get("url"), items=[])
for article in articles: for article in articles:
title_class = None title_class = {}
if args.get("titleClass"): if args.get("titleClass"):
title_class = {"class": args.get("titleClass")} title_class = {"class": args.get("titleClass")}
title = article.find(args.get("title"), title_class) title = article.find(args.get("title"), title_class)
if title: if title:
title = title.get_text() title = title.get_text()
content_tag = "p" content_tag = "p"
content_class = None content_class = {}
if args.get("content"): if args.get("content"):
content_tag = args.get("content") content_tag = args.get("content")
if args.get("contentClass"): if args.get("contentClass"):
@ -70,7 +70,7 @@ def scrape(request, args):
content = "<br>".join([p.get_text() for p in paragraphs]) content = "<br>".join([p.get_text() for p in paragraphs])
link_tag = "a" link_tag = "a"
link_class = None link_class = {}
if args.get("link"): if args.get("link"):
link_tag = args.get("link") link_tag = args.get("link")
if args.get("linkClass"): if args.get("linkClass"):
@ -80,7 +80,7 @@ def scrape(request, args):
link = link["href"] link = link["href"]
item_datetime = None item_datetime = None
item_datetime_class = None item_datetime_class = {}
if args.get("datetime"): if args.get("datetime"):
if args.get("datetimeClass"): if args.get("datetimeClass"):
item_datetime_class = {"class": args.get("datetimeClass")} item_datetime_class = {"class": args.get("datetimeClass")}
@ -89,7 +89,7 @@ def scrape(request, args):
item_datetime = dateparser.parse(item_datetime["datetime"]) item_datetime = dateparser.parse(item_datetime["datetime"])
author = None author = None
author_class = None author_class = {}
if args.get("author"): if args.get("author"):
if args.get("authorClass"): if args.get("authorClass"):
author_class = {"class": args.get("authorClass")} author_class = {"class": args.get("authorClass")}