From 3af19c0f27459e140aeba4fe359d2179e99369b5 Mon Sep 17 00:00:00 2001 From: Yohann D'ANELLO Date: Sat, 3 Oct 2020 15:36:28 +0200 Subject: [PATCH] Temporary use ISBNDB to scrap data --- media/forms.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/media/forms.py b/media/forms.py index 00fad64..ff89b61 100644 --- a/media/forms.py +++ b/media/forms.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later import json +import os import re import unicodedata import urllib.request @@ -90,6 +91,18 @@ class MediaAdminForm(ModelForm): {'data-generated-side-identifier': generate_side_identifier(title, authors, subtitle)}) side_identifier_field.widget.template_name = "media/generate_side_identifier.html" + def download_data_isbndb(self, isbn): + api_url = "https://api2.isbndb.com/book/" + str(isbn) + "?Authorization=" + os.getenv("ISBNDB_KEY") + with urllib.request.urlopen(api_url) as url: + data = json.loads(url.read().decode()) + self.cleaned_data["title"] = data["title_long"] + self.cleaned_data["published_date"] = data["date_published"][:10] + self.cleaned_data["number_of_pages"] = data["pages"] + self.cleaned_data["authors"] = \ + list(Auteur.objects.get_or_create(name=author_name)[0] for author_name in data["authors"]) + self.cleaned_data["external_url"] = data["image"] + return True + def download_data_bedeteque(self, isbn): """ Download data from bedeteque @@ -249,17 +262,19 @@ class MediaAdminForm(ModelForm): self.data['_addanother'] = 42 self.request.POST = self.data if isbn: - # ISBN is present, try with bedeteque - scrap_result = self.download_data_bedeteque(isbn) + scrap_result = self.download_data_isbndb(isbn) if not scrap_result: - # Try with Google - scrap_result = self.download_data_google(isbn) + # ISBN is present, try with bedeteque + scrap_result = self.download_data_bedeteque(isbn) if not scrap_result: - # Try with OpenLibrary - if not self.download_data_openlibrary(isbn): - self.add_error('isbn', - _("This ISBN is not found.")) - return self.cleaned_data + # Try with Google + scrap_result = self.download_data_google(isbn) + if not scrap_result: + # Try with OpenLibrary + if not self.download_data_openlibrary(isbn): + self.add_error('isbn', + _("This ISBN is not found.")) + return self.cleaned_data if self.cleaned_data['title']: self.cleaned_data['title'] = re.sub(