import re import unicodedata from argparse import FileType from sys import stdin from django.core.management import BaseCommand from media.models import Auteur, Roman class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('input', nargs='?', type=FileType('r'), default=stdin, help="Revues to be imported.") def handle(self, *args, **options): file = options["input"] romans = [] for line in file: romans.append(line[:-1].split('|')) print("Registering", len(romans), "romans") imported = 0 for book in romans: if len(book) != 2: continue title = book[1] title_normalized = title.upper() title_normalized = title_normalized.replace('’', '\'') title_normalized = ''.join( char for char in unicodedata.normalize( 'NFKD', title_normalized.casefold()) if all(not unicodedata.category(char).startswith(cat) for cat in {'M', 'P', 'Z', 'C'}) or char == ' ' ).casefold().upper() title_normalized = re.sub(r'^LE ', '', title_normalized) title_normalized = re.sub(r'^LA ', '', title_normalized) title_normalized = re.sub(r'^LES ', '', title_normalized) title_normalized = re.sub(r'^L\'', '', title_normalized) title_normalized = re.sub(r'^THE ', '', title_normalized) title_normalized = re.sub(r'Œ', 'OE', title_normalized) title_normalized = title_normalized.replace(' ', '') authors = [Auteur.objects.get_or_create(name=n)[0] for n in book[0].split(';')] side_identifier = "{:.3} {:.3}" \ .format(authors[0].name.upper(), title_normalized, ) roman = Roman.objects.create( title=title, side_identifier=side_identifier, ) roman.authors.set(authors) roman.save() self.stdout.write(self.style.SUCCESS( "Roman imported")) imported += 1 self.stdout.write(self.style.SUCCESS( "{count} romans imported".format(count=imported)))