From d6c6b58d59e4811b42ea937ad4986f625c131c93 Mon Sep 17 00:00:00 2001 From: Ewen Date: Thu, 9 May 2024 08:02:33 +0200 Subject: [PATCH] fix: handling ungiven parameters --- api/api/scraper.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/api/api/scraper.py b/api/api/scraper.py index 15325fb..f2fd185 100644 --- a/api/api/scraper.py +++ b/api/api/scraper.py @@ -38,12 +38,12 @@ def scrape(request, args): # If section is provided, use it, else use the entire page/soup section = soup if args.get("section"): - section_class = None + section_class = {} if args.get("sectionClass"): section_class = {"class": args.get("sectionClass")} section = soup.find(args.get("section"), section_class) - article_class = None + article_class = {} if args.get("articleClass"): article_class = {"class": args.get("articleClass")} articles = section.find_all(args.get("article"), article_class) @@ -51,15 +51,15 @@ def scrape(request, args): feed = Feed(title=soup.title.get_text(), url=args.get("url"), items=[]) for article in articles: - title_class = None + title_class = {} if args.get("titleClass"): title_class = {"class": args.get("titleClass")} title = article.find(args.get("title"), title_class) if title: title = title.get_text() - + content_tag = "p" - content_class = None + content_class = {} if args.get("content"): content_tag = args.get("content") if args.get("contentClass"): @@ -70,7 +70,7 @@ def scrape(request, args): content = "
".join([p.get_text() for p in paragraphs]) link_tag = "a" - link_class = None + link_class = {} if args.get("link"): link_tag = args.get("link") if args.get("linkClass"): @@ -80,7 +80,7 @@ def scrape(request, args): link = link["href"] item_datetime = None - item_datetime_class = None + item_datetime_class = {} if args.get("datetime"): if args.get("datetimeClass"): item_datetime_class = {"class": args.get("datetimeClass")} @@ -89,7 +89,7 @@ def scrape(request, args): item_datetime = dateparser.parse(item_datetime["datetime"]) author = None - author_class = None + author_class = {} if args.get("author"): if args.get("authorClass"): author_class = {"class": args.get("authorClass")}