diff --git a/media/forms.py b/media/forms.py index 08e766c..7d86990 100644 --- a/media/forms.py +++ b/media/forms.py @@ -1,5 +1,5 @@ # -*- mode: python; coding: utf-8 -*- -# Copyright (C) 2017-2019 by BDE ENS Paris-Saclay +# Copyright (C) 2017-2020 by BDE ENS Paris-Saclay # SPDX-License-Identifier: GPL-3.0-or-later import json @@ -7,6 +7,7 @@ import re import unicodedata import urllib.request +from django.db.models import QuerySet from django.forms import ModelForm from django.utils.translation import gettext_lazy as _ @@ -14,6 +15,60 @@ from .models import Auteur, BD from .scraper import BedetequeScraper +def generate_side_identifier(title, authors, subtitle=None): + if isinstance(authors, QuerySet): + authors = list(authors) + + title_normalized = title.upper() + title_normalized = title_normalized.replace('’', '\'') + title_normalized = ''.join( + char + for char in unicodedata.normalize( + 'NFKD', title_normalized.casefold()) + if all(not unicodedata.category(char).startswith(cat) + for cat in {'M', 'P', 'Z', 'C'}) or char == ' ' + ).casefold().upper() + title_normalized = re.sub(r'^DE ', '', title_normalized) + title_normalized = re.sub(r'^LE ', '', title_normalized) + title_normalized = re.sub(r'^LA ', '', title_normalized) + title_normalized = re.sub(r'^LES ', '', title_normalized) + title_normalized = re.sub(r'^L\'', '', title_normalized) + title_normalized = re.sub(r'^THE ', '', title_normalized) + title_normalized = re.sub(r'Œ', 'OE', title_normalized) + title_normalized = title_normalized.replace(' ', '') + title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized) + authors = authors.copy() + + def sort(author): + return str(-author.note) + author.name.split(" ")[-1] + ".{:05d}".format(author.pk) + + authors.sort(key=sort) + primary_author = authors[0] + author_name = primary_author.name.upper() + if ',' not in author_name and ' ' in author_name: + author_name = author_name.split(' ')[-1] + author_name = ''.join( + char for char in unicodedata.normalize('NFKD', author_name.casefold()) + if all(not unicodedata.category(char).startswith(cat) for cat in {'M', 'P', 'Z', 'C'}) or char == ' ' + ).casefold().upper() + author_name = re.sub("[^A-Z]", "", author_name) + side_identifier = "{:.3} {:.3}".format(author_name, title_normalized, ) + if subtitle: + subtitle = re.sub(r'', '', subtitle) + subtitle = re.sub(r'', '', subtitle) + start = subtitle.split(' ')[0].replace('.', '') + + if start.isnumeric(): + side_identifier += " {:0>2}".format(start, ) + + # Normalize side identifier, in order to remove accents + side_identifier = ''.join(char for char in unicodedata.normalize('NFKD', side_identifier.casefold()) + if all(not unicodedata.category(char).startswith(cat) for cat in {'M', 'P', 'Z', 'C'}) + or char == ' ').casefold().upper() + + return side_identifier + + class MediaAdminForm(ModelForm): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -188,54 +243,12 @@ class MediaAdminForm(ModelForm): ) if self.cleaned_data['authors']: - authors = self.cleaned_data['authors'] - old_authors = authors.copy() + side_identifier = generate_side_identifier( + self.cleaned_data["title"], + self.cleaned_data["authors"], + self.cleaned_data["subtitle"], + ) - def sort(author): - return str(-author.note) + "." \ - + str(old_authors.index(author)) \ - + "." + author.name - - authors.sort(key=sort) - author_name = self.cleaned_data['authors'][0].name - if ',' not in author_name and ' ' in author_name: - author_name = author_name.split(' ')[-1] - title_normalized = self.cleaned_data['title'].upper() - title_normalized = re.sub(r'^LE ', '', title_normalized) - title_normalized = re.sub(r'^LA ', '', title_normalized) - title_normalized = re.sub(r'^LES ', '', title_normalized) - title_normalized = re.sub(r'^L\'', '', title_normalized) - title_normalized = re.sub(r'^THE ', '', title_normalized) - title_normalized = re.sub(r'Œ', 'OE', title_normalized) - side_identifier = "{:.3} {:.3}".format( - author_name, - title_normalized.replace(' ', ''), ) - - if self.cleaned_data['subtitle']: - self.cleaned_data['subtitle'] = re.sub( - r'', - '', - self.cleaned_data['subtitle'] - ) - self.cleaned_data['subtitle'] = re.sub( - r'', - '', - self.cleaned_data['subtitle'] - ) - start = self.cleaned_data['subtitle'].split(' ')[0] \ - .replace('.', '') - - if start.isnumeric(): - side_identifier += " {:0>2}".format(start, ) - - # Normalize side identifier, in order to remove accents - side_identifier = ''.join( - char - for char in unicodedata.normalize( - 'NFKD', side_identifier.casefold()) - if all(not unicodedata.category(char).startswith(cat) - for cat in {'M', 'P', 'Z', 'C'}) or char == ' ' - ).casefold().upper() self.cleaned_data['side_identifier'] = side_identifier return self.cleaned_data diff --git a/media/management/commands/import_no_isbn_roman.py b/media/management/commands/import_no_isbn_roman.py index 3e4c52d..0bbb739 100644 --- a/media/management/commands/import_no_isbn_roman.py +++ b/media/management/commands/import_no_isbn_roman.py @@ -1,10 +1,10 @@ -import re -import unicodedata from argparse import FileType from sys import stdin from django.core.management import BaseCommand -from media.models import Auteur, Roman + +from media.forms import generate_side_identifier +from media.models import Roman, Auteur class Command(BaseCommand): @@ -29,27 +29,9 @@ class Command(BaseCommand): continue title = book[1] - title_normalized = title.upper() - title_normalized = title_normalized.replace('’', '\'') - title_normalized = ''.join( - char - for char in unicodedata.normalize( - 'NFKD', title_normalized.casefold()) - if all(not unicodedata.category(char).startswith(cat) - for cat in {'M', 'P', 'Z', 'C'}) or char == ' ' - ).casefold().upper() - title_normalized = re.sub(r'^DE ', '', title_normalized) - title_normalized = re.sub(r'^LE ', '', title_normalized) - title_normalized = re.sub(r'^LA ', '', title_normalized) - title_normalized = re.sub(r'^LES ', '', title_normalized) - title_normalized = re.sub(r'^L\'', '', title_normalized) - title_normalized = re.sub(r'^THE ', '', title_normalized) - title_normalized = re.sub(r'Œ', 'OE', title_normalized) - title_normalized = title_normalized.replace(' ', '') authors = [Auteur.objects.get_or_create(name=n)[0] for n in book[0].split(';')] - side_identifier = "{:.3} {:.3}" \ - .format(authors[0].name.upper(), title_normalized, ) + side_identifier = generate_side_identifier(title, authors) roman = Roman.objects.create( title=title, side_identifier=side_identifier, diff --git a/media/management/commands/regenerate_side_identifiers.py b/media/management/commands/regenerate_side_identifiers.py new file mode 100644 index 0000000..a659840 --- /dev/null +++ b/media/management/commands/regenerate_side_identifiers.py @@ -0,0 +1,56 @@ +from django.core.management import BaseCommand +from django.db import transaction + +from media.forms import generate_side_identifier +from media.models import BD, Manga, Roman + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('--type', '-t', + type=str, + default='bd', + choices=['bd', 'manga', 'roman'], + help="Type of medium where the sides need to be regenerated.") + parser.add_argument('--noninteractivemode', '-n', action="store_true", + help="Disable the interaction mode and replace existing side identifiers.") + + @transaction.atomic + def handle(self, *args, **options): + t = options["type"] + medium_class = None + if t == "bd": + medium_class = BD + elif t == "manga": + medium_class = Manga + elif t == "roman": + medium_class = Roman + + interactive_mode = not options["noninteractivemode"] + + replaced = 0 + + for obj in medium_class.objects.all(): + current_side_identifier = obj.side_identifier + if not obj.authors.all(): + self.stdout.write(str(obj)) + subtitle = obj.subtitle if hasattr(obj, "subtitle") else None + generated_side_identifier = generate_side_identifier(obj.title, obj.authors.all(), subtitle) + if current_side_identifier != generated_side_identifier: + answer = 'y' + if interactive_mode: + answer = '' + while answer != 'y' and answer != 'n': + answer = input(f"For medium {obj}, current side: {current_side_identifier}, generated side: " + f"{generated_side_identifier}, would you like to replace ? [y/n]").lower()[0] + if answer == 'y': + self.stdout.write(self.style.WARNING(f"Replace side of {obj} from {current_side_identifier} " + f"to {generated_side_identifier}...")) + obj.side_identifier = generated_side_identifier + obj.save() + replaced += 1 + + if replaced: + self.stdout.write(self.style.SUCCESS(f"{replaced} side identifiers were replaced.")) + else: + self.stdout.write(self.style.WARNING("Nothing changed."))