1
0
mirror of https://gitlab.crans.org/mediatek/med.git synced 2024-12-25 17:42:25 +00:00

Import romans without any ISBN

This commit is contained in:
Yohann D'ANELLO 2020-05-24 16:01:05 +02:00
parent 963ff25506
commit 44abcaf202

View File

@ -0,0 +1,65 @@
import re
import unicodedata
from argparse import FileType
from sys import stdin
from django.core.management import BaseCommand
from media.models import Auteur, Roman
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('input', nargs='?',
type=FileType('r'),
default=stdin,
help="Revues to be imported.")
def handle(self, *args, **options):
file = options["input"]
romans = []
for line in file:
romans.append(line[:-1].split('|'))
print("Registering", len(romans), "romans")
imported = 0
for book in romans:
if len(book) != 2:
continue
title = book[1]
title_normalized = title.upper()
title_normalized = title_normalized.replace('', '\'')
title_normalized = ''.join(
char
for char in unicodedata.normalize(
'NFKD', title_normalized.casefold())
if all(not unicodedata.category(char).startswith(cat)
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
).casefold().upper()
title_normalized = re.sub(r'^LE ', '', title_normalized)
title_normalized = re.sub(r'^LA ', '', title_normalized)
title_normalized = re.sub(r'^LES ', '', title_normalized)
title_normalized = re.sub(r'^L\'', '', title_normalized)
title_normalized = re.sub(r'^THE ', '', title_normalized)
title_normalized = re.sub(r'Œ', 'OE', title_normalized)
title_normalized = title_normalized.replace(' ', '')
authors = [Auteur.objects.get_or_create(name=n)[0]
for n in book[0].split(';')]
side_identifier = "{:.3} {:.3}" \
.format(authors[0].name.upper(), title_normalized, )
roman = Roman.objects.create(
title=title,
side_identifier=side_identifier,
)
roman.authors.set(authors)
roman.save()
self.stdout.write(self.style.SUCCESS(
"Roman imported"))
imported += 1
self.stdout.write(self.style.SUCCESS(
"{count} romans imported".format(count=imported)))