med/media/management/commands/import_no_isbn_roman.py

import re
import unicodedata
from argparse import FileType
from sys import stdin

from django.core.management import BaseCommand

from media.models import Auteur, Roman


class Command(BaseCommand):
    def add_arguments(self, parser):
        parser.add_argument('input', nargs='?',
                            type=FileType('r'),
                            default=stdin,
                            help="Revues to be imported.")

    def handle(self, *args, **options):
        file = options["input"]
        romans = []
        for line in file:
            romans.append(line[:-1].split('|'))

        print("Registering", len(romans), "romans")

        imported = 0

        for book in romans:
            if len(book) != 2:
                continue

            title = book[1]
            title_normalized = title.upper()
            title_normalized = title_normalized.replace('’', '\'')
            title_normalized = ''.join(
                char
                for char in unicodedata.normalize(
                    'NFKD', title_normalized.casefold())
                if all(not unicodedata.category(char).startswith(cat)
                       for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
            ).casefold().upper()
            title_normalized = re.sub(r'^LE ', '', title_normalized)
            title_normalized = re.sub(r'^LA ', '', title_normalized)
            title_normalized = re.sub(r'^LES ', '', title_normalized)
            title_normalized = re.sub(r'^L\'', '', title_normalized)
            title_normalized = re.sub(r'^THE ', '', title_normalized)
            title_normalized = re.sub(r'Œ', 'OE', title_normalized)
            title_normalized = title_normalized.replace(' ', '')
            authors = [Auteur.objects.get_or_create(name=n)[0]
                       for n in book[0].split(';')]
            side_identifier = "{:.3} {:.3}" \
                .format(authors[0].name.upper(), title_normalized, )
            roman = Roman.objects.create(
                title=title,
                side_identifier=side_identifier,
            )
            roman.authors.set(authors)
            roman.save()

            self.stdout.write(self.style.SUCCESS(
                "Roman imported"))
            imported += 1

        self.stdout.write(self.style.SUCCESS(
            "{count} romans imported".format(count=imported)))