med/media/forms.py

# -*- mode: python; coding: utf-8 -*-
# Copyright (C) 2017-2021 by BDE ENS Paris-Saclay
# SPDX-License-Identifier: GPL-3.0-or-later

import json
import os
import re
import unicodedata
from urllib.error import HTTPError
import urllib.request

from django.core.exceptions import ValidationError
from django.db.models import QuerySet
from django.forms import ModelForm
from django.utils.translation import gettext_lazy as _

from .models import Author, Comic
from .scraper import BedetequeScraper


def generate_side_identifier(title, authors, subtitle=None):
    if isinstance(authors, QuerySet):
        authors = list(authors)

    title_normalized = title.upper()
    title_normalized = title_normalized.replace('’', '\'')
    title_normalized = re.sub(r'^DE ', '', title_normalized)
    title_normalized = re.sub(r'^DES ', '', title_normalized)
    title_normalized = re.sub(r'^LE ', '', title_normalized)
    title_normalized = re.sub(r'^LA ', '', title_normalized)
    title_normalized = re.sub(r'^LES ', '', title_normalized)
    title_normalized = re.sub(r'^L\'', '', title_normalized)
    title_normalized = re.sub(r'^UN ', '', title_normalized)
    title_normalized = re.sub(r'^UNE ', '', title_normalized)
    title_normalized = re.sub(r'^THE ', '', title_normalized)
    title_normalized = re.sub(r'Œ', 'OE', title_normalized)
    title_normalized = title_normalized.replace(' ', '')
    title_normalized = ''.join(
        char
        for char in unicodedata.normalize(
            'NFKD', title_normalized.casefold())
        if all(not unicodedata.category(char).startswith(cat)
               for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
    ).casefold().upper()
    title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized)
    authors = authors.copy()

    def sort(author):
        return "{:042d}".format(-author.note) + author.name.split(" ")[-1]\
               + ".{:042d}".format(author.pk)

    authors.sort(key=sort)
    primary_author = authors[0]
    author_name = primary_author.name.upper()
    if ',' not in author_name and ' ' in author_name:
        author_name = author_name.split(' ')[-1]
    author_name = ''.join(
        char for char in unicodedata.normalize('NFKD', author_name.casefold())
        if all(not unicodedata.category(char).startswith(cat)
               for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
    ).casefold().upper()
    author_name = re.sub("[^A-Z]", "", author_name)
    side_identifier = "{:.3} {:.3}".format(author_name, title_normalized, )
    if subtitle:
        subtitle = re.sub(r'</span>', '', subtitle)
        subtitle = re.sub(r'<span.*>', '', subtitle)
        start = subtitle.split(' ')[0].replace('.', '')
        start = re.sub("^R?", "", start)

        if start.isnumeric():
            side_identifier += " {:0>2}".format(start, )

    # Normalize side identifier, in order to remove accents
    side_identifier = ''.join(
        char for char in unicodedata.normalize('NFKD',
                                               side_identifier.casefold())
        if all(not unicodedata.category(char).startswith(cat)
               for cat in {'M', 'P', 'Z', 'C'})
        or char == ' ').casefold().upper()

    return side_identifier


class MediaAdminForm(ModelForm):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        isbn_field = self.fields.get('isbn')
        if isbn_field:
            isbn_field.widget.template_name = "media/isbn_button.html"
            isbn_field.widget.attrs.update({'autofocus': 'autofocus'})
        side_identifier_field = self.fields.get('side_identifier')
        if side_identifier_field and self.instance and self.instance.pk:
            instance = self.instance
            title, authors, subtitle = instance.title,\
                instance.authors.all(), None
            if hasattr(instance, "subtitle"):
                subtitle = instance.subtitle
            side_identifier_field.widget.attrs.update(
                {'data-generated-side-identifier':
                 generate_side_identifier(title, authors, subtitle)})
            side_identifier_field.widget.template_name =\
                "media/generate_side_identifier.html"

    def download_data_isbndb(self, isbn):
        api_url = "https://api2.isbndb.com/book/" + str(isbn)\
                  + "?Authorization=" + os.getenv("ISBNDB_KEY", "")
        req = urllib.request.Request(api_url)
        req.add_header("Authorization", os.getenv("ISBNDB_KEY", ""))
        try:
            with urllib.request.urlopen(req) as url:
                data: dict = json.loads(url.read().decode())["book"]
        except HTTPError:
            return False
        print(data)
        data.setdefault("title", "")
        data.setdefault("date_published", "1970-01-01")
        data.setdefault("pages", 0)
        data.setdefault("authors", [])
        data.setdefault("image", "")
        self.cleaned_data["title"] = data["title"]
        self.cleaned_data["publish_date"] = data["date_published"][:10]
        while len(self.cleaned_data["publish_date"]) == 4 \
                or len(self.cleaned_data["publish_date"]) == 7:
            self.cleaned_data["publish_date"] += "-01"
        self.cleaned_data["number_of_pages"] = data["pages"]
        self.cleaned_data["authors"] = \
            list(Author.objects.get_or_create(name=author_name)[0]
                 for author_name in data["authors"])
        self.cleaned_data["external_url"] = data["image"]
        return True

    def download_data_bedeteque(self, isbn):
        """
        Download data from bedeteque
        :return True if success
        """
        scraper = BedetequeScraper()
        r = scraper.search_by_isbn(isbn)
        if not r:
            return False
        # If results, then take the most accurate
        data = scraper.scrap_comic_info(r[0])
        self.cleaned_data.update(data)
        return True

    def download_data_google(self, isbn):
        """
        Download data from google books
        :return True if success
        """
        api_url = "https://www.googleapis.com/books/v1/volumes?q=ISBN:{}"\
            .format(isbn)
        with urllib.request.urlopen(api_url) as url:
            data = json.loads(url.read().decode())

        if data and data['totalItems']:
            fetched_item = None
            for item in data['items']:
                for identifiers in item["volumeInfo"]["industryIdentifiers"]:
                    if identifiers["identifier"] == isbn:
                        fetched_item = item
                        break
                if fetched_item:
                    break
            if not fetched_item:
                return False
            # Fill the data
            self.parse_data_google(fetched_item)
            return True
        return False

    def parse_data_google(self, data):
        info = data['volumeInfo']
        self.cleaned_data['external_url'] = info['canonicalVolumeLink']
        if 'title' in info:
            self.cleaned_data['title'] = info['title']
        if 'subtitle' in data:
            self.cleaned_data['subtitle'] = info['subtitle']

        if 'pageCount' in info:
            self.cleaned_data['number_of_pages'] = \
                info['pageCount']
        elif not self.cleaned_data['number_of_pages']:
            self.cleaned_data['number_of_pages'] = 0

        if 'publishedDate' in info:
            if "-" not in info["publishedDate"]:
                info["publishedDate"] += "-01-01"
            elif len(info["publishedDate"]) == 7:
                info["publishedDate"] += "-01"
            self.cleaned_data['publish_date'] = info['publishedDate'][:10]

        if 'authors' not in self.cleaned_data \
                or not self.cleaned_data['authors']:
            self.cleaned_data['authors'] = list()

        if 'authors' in info:
            for author in info['authors']:
                author_obj = Author.objects.get_or_create(
                    name=author)[0]
                self.cleaned_data['authors'].append(author_obj)

        print(self.cleaned_data)

    def download_data_openlibrary(self, isbn):
        """
        Download data from openlibrary
        :return True if success
        """
        api_url = "https://openlibrary.org/api/books?bibkeys=ISBN:{}" \
                  "&format=json&jscmd=data".format(isbn)
        with urllib.request.urlopen(api_url) as url:
            data = json.loads(url.read().decode())

        if data and data['ISBN:' + isbn]:
            data = data['ISBN:' + isbn]
            if 'url' in data:
                # Fill the data
                self.parse_data_openlibrary(data)
                return True
        return False

    def parse_data_openlibrary(self, data):
        self.cleaned_data['external_url'] = data['url']
        if 'title' in data:
            self.cleaned_data['title'] = data['title']
        if 'subtitle' in data:
            self.cleaned_data['subtitle'] = data['subtitle']

        if 'number_of_pages' in data:
            self.cleaned_data['number_of_pages'] = \
                data['number_of_pages']
        elif not self.cleaned_data['number_of_pages']:
            self.cleaned_data['number_of_pages'] = 0

        if 'publish_date' in data:
            months = ['January', 'February', "March", "April", "Mai",
                      "June", "July", "August", "September",
                      "October", "November", "December"]
            split = data['publish_date'].replace(',', '').split(' ')
            if len(split) == 1:
                self.cleaned_data['publish_date'] = split[0] + "-01-01"
            else:
                month_to_number = dict(
                    Jan="01",
                    Feb="02",
                    Mar="03",
                    Apr="04",
                    May="05",
                    Jun="06",
                    Jul="07",
                    Aug="08",
                    Sep="09",
                    Oct="10",
                    Nov="11",
                    Dec="12",
                )
                if split[0][:3] in month_to_number:
                    self.cleaned_data['publish_date']\
                        = split[2] + "-" \
                        + month_to_number[split[0][:3]] + "-" + split[1]
                else:
                    self.cleaned_data['publish_date'] = "{}-{:02d}-{:02d}" \
                        .format(split[2], months.index(split[0])
                                + 1, int(split[1]), )

        if 'authors' not in self.cleaned_data \
                or not self.cleaned_data['authors']:
            self.cleaned_data['authors'] = list()

        if 'authors' in data:
            for author in data['authors']:
                author_obj = Author.objects.get_or_create(
                    name=author['name'])[0]
                self.cleaned_data['authors'].append(author_obj)

    def clean(self):
        """
        If user fetch ISBN data, then download data before validating the form
        """
        super().clean()

        if "_isbn" in self.data\
                or "_isbn_addanother" in self.data:
            isbn = self.cleaned_data.get('isbn')
            if "_isbn_addanother" in self.data:
                self.data = self.data.copy()
                self.data['_addanother'] = 42
                self.request.POST = self.data
            if isbn:
                scrap_result = self.download_data_isbndb(isbn)
                if not scrap_result:
                    # ISBN is present, try with bedeteque
                    scrap_result = self.download_data_bedeteque(isbn)
                    if not scrap_result:
                        # Try with Google
                        scrap_result = self.download_data_google(isbn)
                        if not scrap_result:
                            # Try with OpenLibrary
                            if not self.download_data_openlibrary(isbn):
                                self.add_error('isbn',
                                               _("This ISBN is not found."))
                                return self.cleaned_data

                if self.cleaned_data['title']:
                    self.cleaned_data['title'] = re.sub(
                        r'\(AUT\) ',
                        '',
                        self.cleaned_data['title']
                    )

                if self.cleaned_data['authors']:
                    side_identifier = generate_side_identifier(
                        self.cleaned_data["title"],
                        self.cleaned_data["authors"],
                        self.cleaned_data["subtitle"],
                    )

                    self.cleaned_data['side_identifier'] = side_identifier

        return self.cleaned_data

    def _clean_fields(self):
        # First clean ISBN field
        isbn_field = self.fields['isbn']
        isbn = isbn_field.widget.value_from_datadict(
            self.data, self.files, self.add_prefix('isbn'))
        isbn = isbn_field.clean(isbn)
        self.cleaned_data['isbn'] = isbn

        for name, field in self.fields.items():
            # value_from_datadict() gets the data from the data dictionaries.
            # Each widget type knows how to retrieve its own data, because some
            # widgets split data over several HTML fields.
            if field.disabled:
                value = self.get_initial_for_field(field, name)
            else:
                value = field.widget.value_from_datadict(
                    self.data, self.files, self.add_prefix(name))
            try:
                # We don't want to check a field when we enter an ISBN.
                if "isbn" not in self.data \
                        or not self.cleaned_data.get('isbn'):
                    value = field.clean(value)
                self.cleaned_data[name] = value
                if hasattr(self, 'clean_%s' % name):
                    value = getattr(self, 'clean_%s' % name)()
                    self.cleaned_data[name] = value
            except ValidationError as e:
                self.add_error(name, e)

    class Meta:
        model = Comic
        fields = ('isbn', 'title', 'subtitle', 'external_url',
                  'side_identifier', 'authors', 'number_of_pages',
                  'publish_date', 'present', )
-												Massive cleanup (1)

											
										
										
											2019-08-02 12:57:53 +00:00
+								# -*- mode: python; coding: utf-8 -*-
-												Prepare the website for 2021

											
										
										
											2020-12-27 17:22:52 +00:00
+								# Copyright (C) 2017-2021 by BDE ENS Paris-Saclay
-												Massive cleanup (1)

											
										
										
											2019-08-02 12:57:53 +00:00
+								# SPDX-License-Identifier: GPL-3.0-or-later
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
+								import json
-												Temporary use ISBNDB to scrap data

											
										
										
											2020-10-03 13:36:28 +00:00
+								import os
-												Side identifiers cannot begin with le/la/les/l'/the

											
										
										
											2020-02-10 16:51:09 +00:00
+								import re
-												Remove special chars from side identifiers

											
										
										
											2020-02-10 15:47:05 +00:00
+								import unicodedata
-												Too much 404...

											
										
										
											2020-10-03 13:51:25 +00:00
+								from urllib.error import HTTPError
-												FUCK LINTERS TEST

											
										
										
											2020-02-10 20:12:12 +00:00
+								import urllib.request
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
-												Fix ISBN scraper

											
										
										
											2021-10-26 13:35:47 +00:00
+								from django.core.exceptions import ValidationError
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
+								from django.db.models import QuerySet
-												Massive cleanup (1)

											
										
										
											2019-08-02 12:57:53 +00:00
+								from django.forms import ModelForm
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								from django.utils.translation import gettext_lazy as _
-												Massive cleanup (1)

											
										
										
											2019-08-02 12:57:53 +00:00
-												Translate models in english

											
										
										
											2021-10-23 16:31:03 +00:00
+								from .models import Author, Comic
-												Add bedeteque scraper

											
										
										
											2019-08-19 10:17:49 +00:00
+								from .scraper import BedetequeScraper
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
+								def generate_side_identifier(title, authors, subtitle=None):
 								    if isinstance(authors, QuerySet):
 								        authors = list(authors)
 								    title_normalized = title.upper()
 								    title_normalized = title_normalized.replace('’', '\'')
 								    title_normalized = re.sub(r'^DE ', '', title_normalized)
-												Remove Des from title

											
										
										
											2020-10-03 09:03:22 +00:00
+								    title_normalized = re.sub(r'^DES ', '', title_normalized)
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
+								    title_normalized = re.sub(r'^LE ', '', title_normalized)
 								    title_normalized = re.sub(r'^LA ', '', title_normalized)
 								    title_normalized = re.sub(r'^LES ', '', title_normalized)
 								    title_normalized = re.sub(r'^L\'', '', title_normalized)
-												Normalize after replacing UN

											
										
										
											2020-10-02 14:20:24 +00:00
+								    title_normalized = re.sub(r'^UN ', '', title_normalized)
 								    title_normalized = re.sub(r'^UNE ', '', title_normalized)
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
+								    title_normalized = re.sub(r'^THE ', '', title_normalized)
 								    title_normalized = re.sub(r'Œ', 'OE', title_normalized)
 								    title_normalized = title_normalized.replace(' ', '')
-												Normalize after replacing L'

											
										
										
											2020-10-02 11:54:16 +00:00
+								    title_normalized = ''.join(
 								        char
 								        for char in unicodedata.normalize(
 								            'NFKD', title_normalized.casefold())
 								        if all(not unicodedata.category(char).startswith(cat)
 								               for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
 								    ).casefold().upper()
-												Normalize before removing wrong letters

											
										
										
											2020-10-02 14:59:58 +00:00
+								    title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized)
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
+								    authors = authors.copy()
 								    def sort(author):
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								        return "{:042d}".format(-author.note) + author.name.split(" ")[-1]\
 								               + ".{:042d}".format(author.pk)
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
 								    authors.sort(key=sort)
 								    primary_author = authors[0]
 								    author_name = primary_author.name.upper()
 								    if ',' not in author_name and ' ' in author_name:
 								        author_name = author_name.split(' ')[-1]
 								    author_name = ''.join(
 								        char for char in unicodedata.normalize('NFKD', author_name.casefold())
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								        if all(not unicodedata.category(char).startswith(cat)
 								               for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
+								    ).casefold().upper()
 								    author_name = re.sub("[^A-Z]", "", author_name)
 								    side_identifier = "{:.3} {:.3}".format(author_name, title_normalized, )
 								    if subtitle:
 								        subtitle = re.sub(r'</span>', '', subtitle)
 								        subtitle = re.sub(r'<span.*>', '', subtitle)
 								        start = subtitle.split(' ')[0].replace('.', '')
-												Allow to have a R in front of subtitles

											
										
										
											2020-10-02 14:43:04 +00:00
+								        start = re.sub("^R?", "", start)
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
 								        if start.isnumeric():
 								            side_identifier += " {:0>2}".format(start, )
 								    # Normalize side identifier, in order to remove accents
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								    side_identifier = ''.join(
 								        char for char in unicodedata.normalize('NFKD',
 								                                               side_identifier.casefold())
 								        if all(not unicodedata.category(char).startswith(cat)
 								               for cat in {'M', 'P', 'Z', 'C'})
 								        or char == ' ').casefold().upper()
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
 								    return side_identifier
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
+								class MediaAdminForm(ModelForm):
 								    def __init__(self, *args, **kwargs):
 								        super().__init__(*args, **kwargs)
-												Fix isbn widget

											
										
										
											2019-08-15 14:30:44 +00:00
+								        isbn_field = self.fields.get('isbn')
 								        if isbn_field:
 								            isbn_field.widget.template_name = "media/isbn_button.html"
 								            isbn_field.widget.attrs.update({'autofocus': 'autofocus'})
-												Add button to generate a side identifier

											
										
										
											2020-10-02 11:12:40 +00:00
+								        side_identifier_field = self.fields.get('side_identifier')
 								        if side_identifier_field and self.instance and self.instance.pk:
 								            instance = self.instance
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								            title, authors, subtitle = instance.title,\
 								                instance.authors.all(), None
-												Add button to generate a side identifier

											
										
										
											2020-10-02 11:12:40 +00:00
+								            if hasattr(instance, "subtitle"):
 								                subtitle = instance.subtitle
 								            side_identifier_field.widget.attrs.update(
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								                {'data-generated-side-identifier':
 								                 generate_side_identifier(title, authors, subtitle)})
 								            side_identifier_field.widget.template_name =\
 								                "media/generate_side_identifier.html"
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
-												Temporary use ISBNDB to scrap data

											
										
										
											2020-10-03 13:36:28 +00:00
+								    def download_data_isbndb(self, isbn):
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								        api_url = "https://api2.isbndb.com/book/" + str(isbn)\
 								                  + "?Authorization=" + os.getenv("ISBNDB_KEY", "")
-												Incorrect use of Authorization header

											
										
										
											2020-10-03 13:40:33 +00:00
+								        req = urllib.request.Request(api_url)
-												Fix CI, add django22-py39 CI step

											
										
										
											2020-12-28 21:55:48 +00:00
+								        req.add_header("Authorization", os.getenv("ISBNDB_KEY", ""))
-												Too much 404...

											
										
										
											2020-10-03 13:51:25 +00:00
+								        try:
 								            with urllib.request.urlopen(req) as url:
 								                data: dict = json.loads(url.read().decode())["book"]
 								        except HTTPError:
 								            return False
-												Incorrect use of Authorization header

											
										
										
											2020-10-03 13:41:49 +00:00
+								        print(data)
-												Wrong use of ISBNDB API

											
										
										
											2020-10-03 13:47:22 +00:00
+								        data.setdefault("title", "")
 								        data.setdefault("date_published", "1970-01-01")
 								        data.setdefault("pages", 0)
 								        data.setdefault("authors", [])
 								        data.setdefault("image", "")
-												Incorrect use of Authorization header

											
										
										
											2020-10-03 13:41:49 +00:00
+								        self.cleaned_data["title"] = data["title"]
-												Better publish date

											
										
										
											2020-10-03 13:55:56 +00:00
+								        self.cleaned_data["publish_date"] = data["date_published"][:10]
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								        while len(self.cleaned_data["publish_date"]) == 4 \
 								                or len(self.cleaned_data["publish_date"]) == 7:
-												Better publish date

											
										
										
											2020-10-03 13:55:56 +00:00
+								            self.cleaned_data["publish_date"] += "-01"
-												Temporary use ISBNDB to scrap data

											
										
										
											2020-10-03 13:36:28 +00:00
+								        self.cleaned_data["number_of_pages"] = data["pages"]
 								        self.cleaned_data["authors"] = \
-												Translate models in english

											
										
										
											2021-10-23 16:31:03 +00:00
+								            list(Author.objects.get_or_create(name=author_name)[0]
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								                 for author_name in data["authors"])
-												Temporary use ISBNDB to scrap data

											
										
										
											2020-10-03 13:36:28 +00:00
+								        self.cleaned_data["external_url"] = data["image"]
 								        return True
-												Add bedeteque scraper

											
										
										
											2019-08-19 10:17:49 +00:00
+								    def download_data_bedeteque(self, isbn):
 								        """
 								        Download data from bedeteque
 								        :return True if success
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
+								        """
-												Add bedeteque scraper

											
										
										
											2019-08-19 10:17:49 +00:00
+								        scraper = BedetequeScraper()
 								        r = scraper.search_by_isbn(isbn)
 								        if not r:
 								            return False
 								        # If results, then take the most accurate
-												Fix tests

											
										
										
											2021-10-23 16:33:51 +00:00
+								        data = scraper.scrap_comic_info(r[0])
-												Add bedeteque scraper

											
										
										
											2019-08-19 10:17:49 +00:00
+								        self.cleaned_data.update(data)
 								        return True
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								    def download_data_google(self, isbn):
 								        """
 								        Download data from google books
 								        :return True if success
 								        """
-												Fix linters

											
										
										
											2020-05-12 15:34:53 +00:00
+								        api_url = "https://www.googleapis.com/books/v1/volumes?q=ISBN:{}"\
 								            .format(isbn)
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								        with urllib.request.urlopen(api_url) as url:
 								            data = json.loads(url.read().decode())
 								        if data and data['totalItems']:
-												Ensure that the fetched roman ISBN is the queried ISBN

											
										
										
											2020-10-03 13:10:43 +00:00
+								            fetched_item = None
 								            for item in data['items']:
 								                for identifiers in item["volumeInfo"]["industryIdentifiers"]:
 								                    if identifiers["identifier"] == isbn:
 								                        fetched_item = item
 								                        break
 								                if fetched_item:
 								                    break
 								            if not fetched_item:
 								                return False
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								            # Fill the data
-												Ensure that the fetched roman ISBN is the queried ISBN

											
										
										
											2020-10-03 13:10:43 +00:00
+								            self.parse_data_google(fetched_item)
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								            return True
 								        return False
 								    def parse_data_google(self, data):
 								        info = data['volumeInfo']
 								        self.cleaned_data['external_url'] = info['canonicalVolumeLink']
 								        if 'title' in info:
 								            self.cleaned_data['title'] = info['title']
 								        if 'subtitle' in data:
 								            self.cleaned_data['subtitle'] = info['subtitle']
 								        if 'pageCount' in info:
 								            self.cleaned_data['number_of_pages'] = \
 								                info['pageCount']
 								        elif not self.cleaned_data['number_of_pages']:
 								            self.cleaned_data['number_of_pages'] = 0
 								        if 'publishedDate' in info:
-												Better roman import

											
										
										
											2020-10-02 15:12:02 +00:00
+								            if "-" not in info["publishedDate"]:
 								                info["publishedDate"] += "-01-01"
-												If no day of month, set the first day of the month

											
										
										
											2020-10-03 13:15:35 +00:00
+								            elif len(info["publishedDate"]) == 7:
 								                info["publishedDate"] += "-01"
-												Get only the ten first characters for the date

											
										
										
											2020-10-02 15:42:56 +00:00
+								            self.cleaned_data['publish_date'] = info['publishedDate'][:10]
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
-												Fix synthax

											
										
										
											2020-05-21 15:07:50 +00:00
+								        if 'authors' not in self.cleaned_data \
 								                or not self.cleaned_data['authors']:
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								            self.cleaned_data['authors'] = list()
 								        if 'authors' in info:
 								            for author in info['authors']:
-												Translate models in english

											
										
										
											2021-10-23 16:31:03 +00:00
+								                author_obj = Author.objects.get_or_create(
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								                    name=author)[0]
 								                self.cleaned_data['authors'].append(author_obj)
 								        print(self.cleaned_data)
-												Add bedeteque scraper

											
										
										
											2019-08-19 10:17:49 +00:00
+								    def download_data_openlibrary(self, isbn):
 								        """
 								        Download data from openlibrary
 								        :return True if success
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
+								        """
 								        api_url = "https://openlibrary.org/api/books?bibkeys=ISBN:{}" \
 								                  "&format=json&jscmd=data".format(isbn)
 								        with urllib.request.urlopen(api_url) as url:
 								            data = json.loads(url.read().decode())
-												Clean code

											
										
										
											2020-02-10 10:47:59 +00:00
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
+								        if data and data['ISBN:' + isbn]:
 								            data = data['ISBN:' + isbn]
-												Fix data dict for isbn

											
										
										
											2019-08-11 09:19:24 +00:00
+								            if 'url' in data:
-												Add bedeteque scraper

											
										
										
											2019-08-19 10:17:49 +00:00
+								                # Fill the data
-												Less complexity for download_data_openlibrary

											
										
										
											2020-02-10 11:08:48 +00:00
+								                self.parse_data_openlibrary(data)
 								                return True
 								        return False
-												Clean code

											
										
										
											2020-02-10 10:47:59 +00:00
-												Less complexity for download_data_openlibrary

											
										
										
											2020-02-10 11:08:48 +00:00
+								    def parse_data_openlibrary(self, data):
 								        self.cleaned_data['external_url'] = data['url']
 								        if 'title' in data:
 								            self.cleaned_data['title'] = data['title']
 								        if 'subtitle' in data:
 								            self.cleaned_data['subtitle'] = data['subtitle']
-												Clean code

											
										
										
											2020-02-10 10:47:59 +00:00
-												Less complexity for download_data_openlibrary

											
										
										
											2020-02-10 11:08:48 +00:00
+								        if 'number_of_pages' in data:
 								            self.cleaned_data['number_of_pages'] = \
 								                data['number_of_pages']
 								        elif not self.cleaned_data['number_of_pages']:
 								            self.cleaned_data['number_of_pages'] = 0
-												Clean code

											
										
										
											2020-02-10 10:47:59 +00:00
-												Less complexity for download_data_openlibrary

											
										
										
											2020-02-10 11:08:48 +00:00
+								        if 'publish_date' in data:
 								            months = ['January', 'February', "March", "April", "Mai",
 								                      "June", "July", "August", "September",
 								                      "October", "November", "December"]
 								            split = data['publish_date'].replace(',', '').split(' ')
-												Fixed publish date when there is only the year

											
										
										
											2020-02-10 18:33:34 +00:00
+								            if len(split) == 1:
 								                self.cleaned_data['publish_date'] = split[0] + "-01-01"
 								            else:
-												Fix openlibrary scrap, fix linters

											
										
										
											2020-05-12 12:56:31 +00:00
+								                month_to_number = dict(
 								                    Jan="01",
 								                    Feb="02",
 								                    Mar="03",
 								                    Apr="04",
 								                    May="05",
 								                    Jun="06",
 								                    Jul="07",
 								                    Aug="08",
 								                    Sep="09",
 								                    Oct="10",
 								                    Nov="11",
 								                    Dec="12",
 								                )
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								                if split[0][:3] in month_to_number:
-												Fix openlibrary scrap, fix linters

											
										
										
											2020-05-12 12:56:31 +00:00
+								                    self.cleaned_data['publish_date']\
 								                        = split[2] + "-" \
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								                        + month_to_number[split[0][:3]] + "-" + split[1]
-												Fix openlibrary scrap, fix linters

											
										
										
											2020-05-12 12:56:31 +00:00
+								                else:
 								                    self.cleaned_data['publish_date'] = "{}-{:02d}-{:02d}" \
 								                        .format(split[2], months.index(split[0])
 								                                + 1, int(split[1]), )
-												Less complexity for download_data_openlibrary

											
										
										
											2020-02-10 11:08:48 +00:00
-												Fix synthax

											
										
										
											2020-05-21 15:07:50 +00:00
+								        if 'authors' not in self.cleaned_data \
 								                or not self.cleaned_data['authors']:
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								            self.cleaned_data['authors'] = list()
-												Less complexity for download_data_openlibrary

											
										
										
											2020-02-10 11:08:48 +00:00
+								        if 'authors' in data:
 								            for author in data['authors']:
-												Translate models in english

											
										
										
											2021-10-23 16:31:03 +00:00
+								                author_obj = Author.objects.get_or_create(
-												Less complexity for download_data_openlibrary

											
										
										
											2020-02-10 11:08:48 +00:00
+								                    name=author['name'])[0]
 								                self.cleaned_data['authors'].append(author_obj)
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
 								    def clean(self):
 								        """
 								        If user fetch ISBN data, then download data before validating the form
 								        """
-												Required fields are not checked when the ISBN is typed

											
										
										
											2020-02-10 02:08:08 +00:00
+								        super().clean()
-												Split comic strips and mangas

											
										
										
											2020-05-21 14:56:41 +00:00
+								        if "_isbn" in self.data\
 								                or "_isbn_addanother" in self.data:
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
+								            isbn = self.cleaned_data.get('isbn')
-												Split comic strips and mangas

											
										
										
											2020-05-21 14:56:41 +00:00
+								            if "_isbn_addanother" in self.data:
 								                self.data = self.data.copy()
 								                self.data['_addanother'] = 42
-												Romans were missing

											
										
										
											2020-05-22 19:08:44 +00:00
+								                self.request.POST = self.data
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
+								            if isbn:
-												Temporary use ISBNDB to scrap data

											
										
										
											2020-10-03 13:36:28 +00:00
+								                scrap_result = self.download_data_isbndb(isbn)
-												Add bedeteque scraper

											
										
										
											2019-08-19 10:17:49 +00:00
+								                if not scrap_result:
-												Temporary use ISBNDB to scrap data

											
										
										
											2020-10-03 13:36:28 +00:00
+								                    # ISBN is present, try with bedeteque
 								                    scrap_result = self.download_data_bedeteque(isbn)
-												Scrap from Google books API

											
										
										
											2020-05-12 15:08:40 +00:00
+								                    if not scrap_result:
-												Temporary use ISBNDB to scrap data

											
										
										
											2020-10-03 13:36:28 +00:00
+								                        # Try with Google
 								                        scrap_result = self.download_data_google(isbn)
 								                        if not scrap_result:
 								                            # Try with OpenLibrary
 								                            if not self.download_data_openlibrary(isbn):
 								                                self.add_error('isbn',
 								                                               _("This ISBN is not found."))
 								                                return self.cleaned_data
-												Add ISBN data downloader

											
										
										
											2019-08-11 08:40:39 +00:00
-												Remove `(AUT)` fields from titles

											
										
										
											2020-02-10 17:23:13 +00:00
+								                if self.cleaned_data['title']:
-												Fix indentation

											
										
										
											2020-02-11 20:12:00 +00:00
+								                    self.cleaned_data['title'] = re.sub(
 								                        r'\(AUT\) ',
 								                        '',
 								                        self.cleaned_data['title']
 								                    )
-												Remove `(AUT)` fields from titles

											
										
										
											2020-02-10 17:23:13 +00:00
-												Fix authors & openlibrary scrap

											
										
										
											2020-02-10 10:29:26 +00:00
+								                if self.cleaned_data['authors']:
-												Add script to regenerate side identifiers

											
										
										
											2020-09-28 08:51:21 +00:00
+								                    side_identifier = generate_side_identifier(
 								                        self.cleaned_data["title"],
 								                        self.cleaned_data["authors"],
 								                        self.cleaned_data["subtitle"],
 								                    )
-												Fix authors & openlibrary scrap

											
										
										
											2020-02-10 10:29:26 +00:00
+								                    self.cleaned_data['side_identifier'] = side_identifier
-												Required fields are not checked when the ISBN is typed

											
										
										
											2020-02-10 02:08:08 +00:00
+								        return self.cleaned_data
 								    def _clean_fields(self):
-												Fix ISBN scraper

											
										
										
											2021-10-26 13:35:47 +00:00
+								        # First clean ISBN field
 								        isbn_field = self.fields['isbn']
 								        isbn = isbn_field.widget.value_from_datadict(
 								            self.data, self.files, self.add_prefix('isbn'))
 								        isbn = isbn_field.clean(isbn)
 								        self.cleaned_data['isbn'] = isbn
-												Required fields are not checked when the ISBN is typed

											
										
										
											2020-02-10 02:08:08 +00:00
+								        for name, field in self.fields.items():
 								            # value_from_datadict() gets the data from the data dictionaries.
 								            # Each widget type knows how to retrieve its own data, because some
 								            # widgets split data over several HTML fields.
 								            if field.disabled:
 								                value = self.get_initial_for_field(field, name)
 								            else:
-												Pipelines

											
										
										
											2020-02-10 03:59:39 +00:00
+								                value = field.widget.value_from_datadict(
 								                    self.data, self.files, self.add_prefix(name))
-												Required fields are not checked when the ISBN is typed

											
										
										
											2020-02-10 02:08:08 +00:00
+								            try:
 								                # We don't want to check a field when we enter an ISBN.
-												Split comic strips and mangas

											
										
										
											2020-05-21 14:56:41 +00:00
+								                if "isbn" not in self.data \
-												Pipelines

											
										
										
											2020-02-10 03:59:39 +00:00
+								                        or not self.cleaned_data.get('isbn'):
-												Required fields are not checked when the ISBN is typed

											
										
										
											2020-02-10 02:08:08 +00:00
+								                    value = field.clean(value)
 								                self.cleaned_data[name] = value
 								                if hasattr(self, 'clean_%s' % name):
 								                    value = getattr(self, 'clean_%s' % name)()
 								                    self.cleaned_data[name] = value
 								            except ValidationError as e:
 								                self.add_error(name, e)
-												Split comic strips and mangas

											
										
										
											2020-05-21 14:56:41 +00:00
 								    class Meta:
-												Translate models in english

											
										
										
											2021-10-23 16:31:03 +00:00
+								        model = Comic
-												Linting

											
										
										
											2020-12-28 22:12:27 +00:00
+								        fields = ('isbn', 'title', 'subtitle', 'external_url',
 								                  'side_identifier', 'authors', 'number_of_pages',
 								                  'publish_date', 'present', )