From 82efeba272bbc20b173d7e68007752f153fbfee1 Mon Sep 17 00:00:00 2001 From: Yohann D'ANELLO Date: Thu, 21 May 2020 16:56:41 +0200 Subject: [PATCH] Split comic strips and mangas --- entrypoint.sh | 2 + media/admin.py | 3 +- media/forms.py | 25 ++++--- media/management/__init__.py | 0 media/management/commands/__init__.py | 0 .../management/commands/split_media_types.py | 50 ++++++++++++++ media/migrations/0027_futuremedia.py | 26 +++++++ media/migrations/0028_manga.py | 34 ++++++++++ media/models.py | 68 +++++++++++++++++++ media/scraper.py | 6 ++ 10 files changed, 202 insertions(+), 12 deletions(-) create mode 100644 media/management/__init__.py create mode 100644 media/management/commands/__init__.py create mode 100644 media/management/commands/split_media_types.py create mode 100644 media/migrations/0027_futuremedia.py create mode 100644 media/migrations/0028_manga.py diff --git a/entrypoint.sh b/entrypoint.sh index ed17c5f..50b05fe 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -13,6 +13,8 @@ sleep 2 python manage.py migrate python manage.py collectstatic --no-input +python manage.py runserver 0.0.0.0:8000 + # harakiri parameter respawns processes taking more than 20 seconds # max-requests parameter respawns processes after serving 5000 requests # vacuum parameter cleans up when stopped diff --git a/media/admin.py b/media/admin.py index 23432ec..e679f91 100644 --- a/media/admin.py +++ b/media/admin.py @@ -9,7 +9,7 @@ from reversion.admin import VersionAdmin from med.admin import admin_site from .forms import MediaAdminForm -from .models import Auteur, Emprunt, FutureMedia, Jeu, Media +from .models import Auteur, Emprunt, FutureMedia, Jeu, Media, Manga class AuteurAdmin(VersionAdmin): @@ -121,6 +121,7 @@ class JeuAdmin(VersionAdmin): admin_site.register(Auteur, AuteurAdmin) admin_site.register(Media, MediaAdmin) +admin_site.register(Manga, MediaAdmin) admin_site.register(FutureMedia, FutureMediaAdmin) admin_site.register(Emprunt, EmpruntAdmin) admin_site.register(Jeu, JeuAdmin) diff --git a/media/forms.py b/media/forms.py index 2a46052..0d9b0e4 100644 --- a/media/forms.py +++ b/media/forms.py @@ -10,7 +10,7 @@ import urllib.request from django.forms import ModelForm from django.utils.translation import gettext_lazy as _ -from .models import Auteur +from .models import Auteur, Media from .scraper import BedetequeScraper @@ -54,7 +54,6 @@ class MediaAdminForm(ModelForm): return False def parse_data_google(self, data): - print(data) info = data['volumeInfo'] self.cleaned_data['external_url'] = info['canonicalVolumeLink'] if 'title' in info: @@ -71,7 +70,7 @@ class MediaAdminForm(ModelForm): if 'publishedDate' in info: self.cleaned_data['publish_date'] = info['publishedDate'] - if 'authors' not in self.cleaned_data: + if 'authors' not in self.cleaned_data or not self.cleaned_data['authors']: self.cleaned_data['authors'] = list() if 'authors' in info: @@ -144,7 +143,7 @@ class MediaAdminForm(ModelForm): .format(split[2], months.index(split[0]) + 1, int(split[1]), ) - if 'authors' not in self.cleaned_data: + if 'authors' not in self.cleaned_data or not self.cleaned_data['authors']: self.cleaned_data['authors'] = list() if 'authors' in data: @@ -159,17 +158,17 @@ class MediaAdminForm(ModelForm): """ super().clean() - if "_isbn" in self.request.POST\ - or "_isbn_addanother" in self.request.POST: + if "_isbn" in self.data\ + or "_isbn_addanother" in self.data: isbn = self.cleaned_data.get('isbn') - if "_isbn_addanother" in self.request.POST: - self.request.POST = self.request.POST.copy() - self.request.POST['_addanother'] = 42 + if "_isbn_addanother" in self.data: + self.data = self.data.copy() + self.data['_addanother'] = 42 if isbn: # ISBN is present, try with bedeteque scrap_result = self.download_data_bedeteque(isbn) if not scrap_result: - # Try with Fnac + # Try with Google scrap_result = self.download_data_google(isbn) if not scrap_result: # Try with OpenLibrary @@ -251,7 +250,7 @@ class MediaAdminForm(ModelForm): from django.core.exceptions import ValidationError try: # We don't want to check a field when we enter an ISBN. - if "isbn" not in self.request.POST \ + if "isbn" not in self.data \ or not self.cleaned_data.get('isbn'): value = field.clean(value) self.cleaned_data[name] = value @@ -260,3 +259,7 @@ class MediaAdminForm(ModelForm): self.cleaned_data[name] = value except ValidationError as e: self.add_error(name, e) + + class Meta: + model = Media + fields = '__all__' diff --git a/media/management/__init__.py b/media/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/media/management/commands/__init__.py b/media/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/media/management/commands/split_media_types.py b/media/management/commands/split_media_types.py new file mode 100644 index 0000000..7e30a8a --- /dev/null +++ b/media/management/commands/split_media_types.py @@ -0,0 +1,50 @@ +from django.core.management import BaseCommand + +from media.forms import MediaAdminForm +from media.models import Media, Manga + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('--view-only', action="store_true", + help="Display only modifications. " + + "Only useful for debug.") + + def handle(self, *args, **options): + converted = 0 + + for media in Media.objects.all(): + self.stdout.write(str(media)) + form = MediaAdminForm(instance=media, data={"isbn": media.isbn, "_isbn": True, }) + form.full_clean() + + if not "format" in form.cleaned_data: + self.stdout.write("Format not specified. Assume it is a comic strip.") + continue + + format = form.cleaned_data["format"] + self.stdout.write("Format: {}".format(format)) + + if not options["view_only"]: + if format == "manga": + self.stdout.write(self.style.WARNING("This media is a manga. Transfer it into a new object...")) + manga = Manga.objects.create( + isbn=media.isbn, + title=media.title, + subtitle=media.subtitle, + external_url=media.external_url, + side_identifier=media.side_identifier, + number_of_pages=media.number_of_pages, + publish_date=media.publish_date, + ) + + manga.authors.set(media.authors.all()) + manga.save() + + self.stdout.write(self.style.SUCCESS("Manga successfully saved. Deleting old medium...")) + + media.delete() + self.stdout.write(self.style.SUCCESS("Medium deleted")) + + converted += 1 + self.stdout.write(self.style.SUCCESS("Successfully saved {:d} mangas".format(converted))) diff --git a/media/migrations/0027_futuremedia.py b/media/migrations/0027_futuremedia.py new file mode 100644 index 0000000..13ef53c --- /dev/null +++ b/media/migrations/0027_futuremedia.py @@ -0,0 +1,26 @@ +# Generated by Django 2.2.10 on 2020-05-12 15:23 + +from django.db import migrations, models +import media.fields +import media.validators + + +class Migration(migrations.Migration): + + dependencies = [ + ('media', '0026_auto_20200210_1740'), + ] + + operations = [ + migrations.CreateModel( + name='FutureMedia', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('isbn', media.fields.ISBNField(blank=True, help_text='You may be able to scan it from a bar code.', max_length=28, null=True, unique=True, validators=[media.validators.isbn_validator], verbose_name='ISBN')), + ], + options={ + 'verbose_name': 'future medium', + 'verbose_name_plural': 'future media', + }, + ), + ] diff --git a/media/migrations/0028_manga.py b/media/migrations/0028_manga.py new file mode 100644 index 0000000..2afd409 --- /dev/null +++ b/media/migrations/0028_manga.py @@ -0,0 +1,34 @@ +# Generated by Django 2.2.10 on 2020-05-21 14:28 + +from django.db import migrations, models +import media.fields +import media.validators + + +class Migration(migrations.Migration): + + dependencies = [ + ('media', '0027_futuremedia'), + ] + + operations = [ + migrations.CreateModel( + name='Manga', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('isbn', media.fields.ISBNField(blank=True, help_text='You may be able to scan it from a bar code.', max_length=28, null=True, unique=True, validators=[media.validators.isbn_validator], verbose_name='ISBN')), + ('title', models.CharField(max_length=255, verbose_name='title')), + ('subtitle', models.CharField(blank=True, max_length=255, null=True, verbose_name='subtitle')), + ('external_url', models.URLField(blank=True, null=True, verbose_name='external URL')), + ('side_identifier', models.CharField(max_length=255, verbose_name='side identifier')), + ('number_of_pages', models.PositiveIntegerField(blank=True, null=True, verbose_name='number of pages')), + ('publish_date', models.DateField(blank=True, null=True, verbose_name='publish date')), + ('authors', models.ManyToManyField(to='media.Auteur', verbose_name='authors')), + ], + options={ + 'verbose_name': 'medium', + 'verbose_name_plural': 'media', + 'ordering': ['title', 'subtitle'], + }, + ), + ] diff --git a/media/models.py b/media/models.py index 3a67389..60f7c2f 100644 --- a/media/models.py +++ b/media/models.py @@ -38,34 +38,41 @@ class Media(models.Model): blank=True, null=True, ) + title = models.CharField( verbose_name=_('title'), max_length=255, ) + subtitle = models.CharField( verbose_name=_('subtitle'), max_length=255, blank=True, null=True, ) + external_url = models.URLField( verbose_name=_('external URL'), blank=True, null=True, ) + side_identifier = models.CharField( verbose_name=_('side identifier'), max_length=255, ) + authors = models.ManyToManyField( 'Auteur', verbose_name=_('authors'), ) + number_of_pages = models.PositiveIntegerField( verbose_name=_('number of pages'), blank=True, null=True, ) + publish_date = models.DateField( verbose_name=_('publish date'), blank=True, @@ -84,6 +91,67 @@ class Media(models.Model): ordering = ['title', 'subtitle'] +class Manga(models.Model): + isbn = ISBNField( + _('ISBN'), + help_text=_('You may be able to scan it from a bar code.'), + unique=True, + blank=True, + null=True, + ) + + title = models.CharField( + verbose_name=_('title'), + max_length=255, + ) + + subtitle = models.CharField( + verbose_name=_('subtitle'), + max_length=255, + blank=True, + null=True, + ) + + external_url = models.URLField( + verbose_name=_('external URL'), + blank=True, + null=True, + ) + + side_identifier = models.CharField( + verbose_name=_('side identifier'), + max_length=255, + ) + + authors = models.ManyToManyField( + 'Auteur', + verbose_name=_('authors'), + ) + + number_of_pages = models.PositiveIntegerField( + verbose_name=_('number of pages'), + blank=True, + null=True, + ) + + publish_date = models.DateField( + verbose_name=_('publish date'), + blank=True, + null=True, + ) + + def __str__(self): + if self.subtitle: + return "{} : {}".format(self.title, self.subtitle) + else: + return self.title + + class Meta: + verbose_name = _("manga") + verbose_name_plural = _("mangas") + ordering = ['title', 'subtitle'] + + class FutureMedia(models.Model): isbn = ISBNField( _('ISBN'), diff --git a/media/scraper.py b/media/scraper.py index 045a8b0..84de5a8 100644 --- a/media/scraper.py +++ b/media/scraper.py @@ -58,6 +58,7 @@ class BedetequeScraper: regex_subtitle = r'

\s*(.*)

' regex_publish_date = r'datePublished\" content=\"([\d-]*)\">' regex_nb_of_pages = r'numberOfPages\">(\d*)Format : Format (\w+)' regex_author = r'(((?!<).)*)' regex_illustrator = r'span itemprop=\"illustrator\">(((?!<).)*)