mirror of
https://gitlab.crans.org/mediatek/med.git
synced 2024-11-27 00:47:10 +00:00
Add script to regenerate side identifiers
This commit is contained in:
parent
be76bf4857
commit
9ecd876923
107
media/forms.py
107
media/forms.py
@ -1,5 +1,5 @@
|
|||||||
# -*- mode: python; coding: utf-8 -*-
|
# -*- mode: python; coding: utf-8 -*-
|
||||||
# Copyright (C) 2017-2019 by BDE ENS Paris-Saclay
|
# Copyright (C) 2017-2020 by BDE ENS Paris-Saclay
|
||||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
import json
|
import json
|
||||||
@ -7,6 +7,7 @@ import re
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
|
from django.db.models import QuerySet
|
||||||
from django.forms import ModelForm
|
from django.forms import ModelForm
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
@ -14,6 +15,60 @@ from .models import Auteur, BD
|
|||||||
from .scraper import BedetequeScraper
|
from .scraper import BedetequeScraper
|
||||||
|
|
||||||
|
|
||||||
|
def generate_side_identifier(title, authors, subtitle=None):
|
||||||
|
if isinstance(authors, QuerySet):
|
||||||
|
authors = list(authors)
|
||||||
|
|
||||||
|
title_normalized = title.upper()
|
||||||
|
title_normalized = title_normalized.replace('’', '\'')
|
||||||
|
title_normalized = ''.join(
|
||||||
|
char
|
||||||
|
for char in unicodedata.normalize(
|
||||||
|
'NFKD', title_normalized.casefold())
|
||||||
|
if all(not unicodedata.category(char).startswith(cat)
|
||||||
|
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
|
||||||
|
).casefold().upper()
|
||||||
|
title_normalized = re.sub(r'^DE ', '', title_normalized)
|
||||||
|
title_normalized = re.sub(r'^LE ', '', title_normalized)
|
||||||
|
title_normalized = re.sub(r'^LA ', '', title_normalized)
|
||||||
|
title_normalized = re.sub(r'^LES ', '', title_normalized)
|
||||||
|
title_normalized = re.sub(r'^L\'', '', title_normalized)
|
||||||
|
title_normalized = re.sub(r'^THE ', '', title_normalized)
|
||||||
|
title_normalized = re.sub(r'Œ', 'OE', title_normalized)
|
||||||
|
title_normalized = title_normalized.replace(' ', '')
|
||||||
|
title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized)
|
||||||
|
authors = authors.copy()
|
||||||
|
|
||||||
|
def sort(author):
|
||||||
|
return str(-author.note) + author.name.split(" ")[-1] + ".{:05d}".format(author.pk)
|
||||||
|
|
||||||
|
authors.sort(key=sort)
|
||||||
|
primary_author = authors[0]
|
||||||
|
author_name = primary_author.name.upper()
|
||||||
|
if ',' not in author_name and ' ' in author_name:
|
||||||
|
author_name = author_name.split(' ')[-1]
|
||||||
|
author_name = ''.join(
|
||||||
|
char for char in unicodedata.normalize('NFKD', author_name.casefold())
|
||||||
|
if all(not unicodedata.category(char).startswith(cat) for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
|
||||||
|
).casefold().upper()
|
||||||
|
author_name = re.sub("[^A-Z]", "", author_name)
|
||||||
|
side_identifier = "{:.3} {:.3}".format(author_name, title_normalized, )
|
||||||
|
if subtitle:
|
||||||
|
subtitle = re.sub(r'</span>', '', subtitle)
|
||||||
|
subtitle = re.sub(r'<span.*>', '', subtitle)
|
||||||
|
start = subtitle.split(' ')[0].replace('.', '')
|
||||||
|
|
||||||
|
if start.isnumeric():
|
||||||
|
side_identifier += " {:0>2}".format(start, )
|
||||||
|
|
||||||
|
# Normalize side identifier, in order to remove accents
|
||||||
|
side_identifier = ''.join(char for char in unicodedata.normalize('NFKD', side_identifier.casefold())
|
||||||
|
if all(not unicodedata.category(char).startswith(cat) for cat in {'M', 'P', 'Z', 'C'})
|
||||||
|
or char == ' ').casefold().upper()
|
||||||
|
|
||||||
|
return side_identifier
|
||||||
|
|
||||||
|
|
||||||
class MediaAdminForm(ModelForm):
|
class MediaAdminForm(ModelForm):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
@ -188,54 +243,12 @@ class MediaAdminForm(ModelForm):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if self.cleaned_data['authors']:
|
if self.cleaned_data['authors']:
|
||||||
authors = self.cleaned_data['authors']
|
side_identifier = generate_side_identifier(
|
||||||
old_authors = authors.copy()
|
self.cleaned_data["title"],
|
||||||
|
self.cleaned_data["authors"],
|
||||||
def sort(author):
|
self.cleaned_data["subtitle"],
|
||||||
return str(-author.note) + "." \
|
|
||||||
+ str(old_authors.index(author)) \
|
|
||||||
+ "." + author.name
|
|
||||||
|
|
||||||
authors.sort(key=sort)
|
|
||||||
author_name = self.cleaned_data['authors'][0].name
|
|
||||||
if ',' not in author_name and ' ' in author_name:
|
|
||||||
author_name = author_name.split(' ')[-1]
|
|
||||||
title_normalized = self.cleaned_data['title'].upper()
|
|
||||||
title_normalized = re.sub(r'^LE ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^LA ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^LES ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^L\'', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^THE ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'Œ', 'OE', title_normalized)
|
|
||||||
side_identifier = "{:.3} {:.3}".format(
|
|
||||||
author_name,
|
|
||||||
title_normalized.replace(' ', ''), )
|
|
||||||
|
|
||||||
if self.cleaned_data['subtitle']:
|
|
||||||
self.cleaned_data['subtitle'] = re.sub(
|
|
||||||
r'</span>',
|
|
||||||
'',
|
|
||||||
self.cleaned_data['subtitle']
|
|
||||||
)
|
)
|
||||||
self.cleaned_data['subtitle'] = re.sub(
|
|
||||||
r'<span.*>',
|
|
||||||
'',
|
|
||||||
self.cleaned_data['subtitle']
|
|
||||||
)
|
|
||||||
start = self.cleaned_data['subtitle'].split(' ')[0] \
|
|
||||||
.replace('.', '')
|
|
||||||
|
|
||||||
if start.isnumeric():
|
|
||||||
side_identifier += " {:0>2}".format(start, )
|
|
||||||
|
|
||||||
# Normalize side identifier, in order to remove accents
|
|
||||||
side_identifier = ''.join(
|
|
||||||
char
|
|
||||||
for char in unicodedata.normalize(
|
|
||||||
'NFKD', side_identifier.casefold())
|
|
||||||
if all(not unicodedata.category(char).startswith(cat)
|
|
||||||
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
|
|
||||||
).casefold().upper()
|
|
||||||
self.cleaned_data['side_identifier'] = side_identifier
|
self.cleaned_data['side_identifier'] = side_identifier
|
||||||
|
|
||||||
return self.cleaned_data
|
return self.cleaned_data
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
import re
|
|
||||||
import unicodedata
|
|
||||||
from argparse import FileType
|
from argparse import FileType
|
||||||
from sys import stdin
|
from sys import stdin
|
||||||
|
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from media.models import Auteur, Roman
|
|
||||||
|
from media.forms import generate_side_identifier
|
||||||
|
from media.models import Roman, Auteur
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
@ -29,27 +29,9 @@ class Command(BaseCommand):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
title = book[1]
|
title = book[1]
|
||||||
title_normalized = title.upper()
|
|
||||||
title_normalized = title_normalized.replace('’', '\'')
|
|
||||||
title_normalized = ''.join(
|
|
||||||
char
|
|
||||||
for char in unicodedata.normalize(
|
|
||||||
'NFKD', title_normalized.casefold())
|
|
||||||
if all(not unicodedata.category(char).startswith(cat)
|
|
||||||
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
|
|
||||||
).casefold().upper()
|
|
||||||
title_normalized = re.sub(r'^DE ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^LE ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^LA ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^LES ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^L\'', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'^THE ', '', title_normalized)
|
|
||||||
title_normalized = re.sub(r'Œ', 'OE', title_normalized)
|
|
||||||
title_normalized = title_normalized.replace(' ', '')
|
|
||||||
authors = [Auteur.objects.get_or_create(name=n)[0]
|
authors = [Auteur.objects.get_or_create(name=n)[0]
|
||||||
for n in book[0].split(';')]
|
for n in book[0].split(';')]
|
||||||
side_identifier = "{:.3} {:.3}" \
|
side_identifier = generate_side_identifier(title, authors)
|
||||||
.format(authors[0].name.upper(), title_normalized, )
|
|
||||||
roman = Roman.objects.create(
|
roman = Roman.objects.create(
|
||||||
title=title,
|
title=title,
|
||||||
side_identifier=side_identifier,
|
side_identifier=side_identifier,
|
||||||
|
56
media/management/commands/regenerate_side_identifiers.py
Normal file
56
media/management/commands/regenerate_side_identifiers.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
from django.core.management import BaseCommand
|
||||||
|
from django.db import transaction
|
||||||
|
|
||||||
|
from media.forms import generate_side_identifier
|
||||||
|
from media.models import BD, Manga, Roman
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument('--type', '-t',
|
||||||
|
type=str,
|
||||||
|
default='bd',
|
||||||
|
choices=['bd', 'manga', 'roman'],
|
||||||
|
help="Type of medium where the sides need to be regenerated.")
|
||||||
|
parser.add_argument('--noninteractivemode', '-n', action="store_true",
|
||||||
|
help="Disable the interaction mode and replace existing side identifiers.")
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
t = options["type"]
|
||||||
|
medium_class = None
|
||||||
|
if t == "bd":
|
||||||
|
medium_class = BD
|
||||||
|
elif t == "manga":
|
||||||
|
medium_class = Manga
|
||||||
|
elif t == "roman":
|
||||||
|
medium_class = Roman
|
||||||
|
|
||||||
|
interactive_mode = not options["noninteractivemode"]
|
||||||
|
|
||||||
|
replaced = 0
|
||||||
|
|
||||||
|
for obj in medium_class.objects.all():
|
||||||
|
current_side_identifier = obj.side_identifier
|
||||||
|
if not obj.authors.all():
|
||||||
|
self.stdout.write(str(obj))
|
||||||
|
subtitle = obj.subtitle if hasattr(obj, "subtitle") else None
|
||||||
|
generated_side_identifier = generate_side_identifier(obj.title, obj.authors.all(), subtitle)
|
||||||
|
if current_side_identifier != generated_side_identifier:
|
||||||
|
answer = 'y'
|
||||||
|
if interactive_mode:
|
||||||
|
answer = ''
|
||||||
|
while answer != 'y' and answer != 'n':
|
||||||
|
answer = input(f"For medium {obj}, current side: {current_side_identifier}, generated side: "
|
||||||
|
f"{generated_side_identifier}, would you like to replace ? [y/n]").lower()[0]
|
||||||
|
if answer == 'y':
|
||||||
|
self.stdout.write(self.style.WARNING(f"Replace side of {obj} from {current_side_identifier} "
|
||||||
|
f"to {generated_side_identifier}..."))
|
||||||
|
obj.side_identifier = generated_side_identifier
|
||||||
|
obj.save()
|
||||||
|
replaced += 1
|
||||||
|
|
||||||
|
if replaced:
|
||||||
|
self.stdout.write(self.style.SUCCESS(f"{replaced} side identifiers were replaced."))
|
||||||
|
else:
|
||||||
|
self.stdout.write(self.style.WARNING("Nothing changed."))
|
Loading…
Reference in New Issue
Block a user