diff --git a/media/scraper.py b/media/scraper.py index ec91d5e..02e8057 100644 --- a/media/scraper.py +++ b/media/scraper.py @@ -56,6 +56,8 @@ class BedetequeScraper: regex_subtitle = r'

\s*(.*)

' regex_publish_date = r'datePublished\" content=\"([\d-]*)\">' regex_nb_of_pages = r'numberOfPages\">(\d*)(((?!<).)*)' + regex_illustrator = r'(((?!<).)*)' data = { 'external_url': bd_url, @@ -87,4 +89,16 @@ class BedetequeScraper: if search_nb_pages and search_nb_pages.group(1).isnumeric(): data['number_of_pages'] = search_nb_pages.group(1) + # Get author and illustrator + author = re.search(regex_author, content) + if not 'author' in data: + data['author'] = list() + if author: + data['author'].append(author.group(1)) + illustrator = re.search(regex_illustrator, content) + if illustrator: + data['author'].append(illustrator.group(1)) + + print(data) + return data