Temporary use ISBNDB to scrap data

This commit is contained in:
Yohann D'ANELLO 2020-10-03 15:36:28 +02:00
parent 80c520d76c
commit 3af19c0f27
1 changed files with 24 additions and 9 deletions

View File

@ -3,6 +3,7 @@
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
import json import json
import os
import re import re
import unicodedata import unicodedata
import urllib.request import urllib.request
@ -90,6 +91,18 @@ class MediaAdminForm(ModelForm):
{'data-generated-side-identifier': generate_side_identifier(title, authors, subtitle)}) {'data-generated-side-identifier': generate_side_identifier(title, authors, subtitle)})
side_identifier_field.widget.template_name = "media/generate_side_identifier.html" side_identifier_field.widget.template_name = "media/generate_side_identifier.html"
def download_data_isbndb(self, isbn):
api_url = "https://api2.isbndb.com/book/" + str(isbn) + "?Authorization=" + os.getenv("ISBNDB_KEY")
with urllib.request.urlopen(api_url) as url:
data = json.loads(url.read().decode())
self.cleaned_data["title"] = data["title_long"]
self.cleaned_data["published_date"] = data["date_published"][:10]
self.cleaned_data["number_of_pages"] = data["pages"]
self.cleaned_data["authors"] = \
list(Auteur.objects.get_or_create(name=author_name)[0] for author_name in data["authors"])
self.cleaned_data["external_url"] = data["image"]
return True
def download_data_bedeteque(self, isbn): def download_data_bedeteque(self, isbn):
""" """
Download data from bedeteque Download data from bedeteque
@ -249,17 +262,19 @@ class MediaAdminForm(ModelForm):
self.data['_addanother'] = 42 self.data['_addanother'] = 42
self.request.POST = self.data self.request.POST = self.data
if isbn: if isbn:
# ISBN is present, try with bedeteque scrap_result = self.download_data_isbndb(isbn)
scrap_result = self.download_data_bedeteque(isbn)
if not scrap_result: if not scrap_result:
# Try with Google # ISBN is present, try with bedeteque
scrap_result = self.download_data_google(isbn) scrap_result = self.download_data_bedeteque(isbn)
if not scrap_result: if not scrap_result:
# Try with OpenLibrary # Try with Google
if not self.download_data_openlibrary(isbn): scrap_result = self.download_data_google(isbn)
self.add_error('isbn', if not scrap_result:
_("This ISBN is not found.")) # Try with OpenLibrary
return self.cleaned_data if not self.download_data_openlibrary(isbn):
self.add_error('isbn',
_("This ISBN is not found."))
return self.cleaned_data
if self.cleaned_data['title']: if self.cleaned_data['title']:
self.cleaned_data['title'] = re.sub( self.cleaned_data['title'] = re.sub(