Split comic strips and mangas
This commit is contained in:
parent
1657f5c42c
commit
82efeba272
|
@ -13,6 +13,8 @@ sleep 2
|
|||
python manage.py migrate
|
||||
python manage.py collectstatic --no-input
|
||||
|
||||
python manage.py runserver 0.0.0.0:8000
|
||||
|
||||
# harakiri parameter respawns processes taking more than 20 seconds
|
||||
# max-requests parameter respawns processes after serving 5000 requests
|
||||
# vacuum parameter cleans up when stopped
|
||||
|
|
|
@ -9,7 +9,7 @@ from reversion.admin import VersionAdmin
|
|||
|
||||
from med.admin import admin_site
|
||||
from .forms import MediaAdminForm
|
||||
from .models import Auteur, Emprunt, FutureMedia, Jeu, Media
|
||||
from .models import Auteur, Emprunt, FutureMedia, Jeu, Media, Manga
|
||||
|
||||
|
||||
class AuteurAdmin(VersionAdmin):
|
||||
|
@ -121,6 +121,7 @@ class JeuAdmin(VersionAdmin):
|
|||
|
||||
admin_site.register(Auteur, AuteurAdmin)
|
||||
admin_site.register(Media, MediaAdmin)
|
||||
admin_site.register(Manga, MediaAdmin)
|
||||
admin_site.register(FutureMedia, FutureMediaAdmin)
|
||||
admin_site.register(Emprunt, EmpruntAdmin)
|
||||
admin_site.register(Jeu, JeuAdmin)
|
||||
|
|
|
@ -10,7 +10,7 @@ import urllib.request
|
|||
from django.forms import ModelForm
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
from .models import Auteur
|
||||
from .models import Auteur, Media
|
||||
from .scraper import BedetequeScraper
|
||||
|
||||
|
||||
|
@ -54,7 +54,6 @@ class MediaAdminForm(ModelForm):
|
|||
return False
|
||||
|
||||
def parse_data_google(self, data):
|
||||
print(data)
|
||||
info = data['volumeInfo']
|
||||
self.cleaned_data['external_url'] = info['canonicalVolumeLink']
|
||||
if 'title' in info:
|
||||
|
@ -71,7 +70,7 @@ class MediaAdminForm(ModelForm):
|
|||
if 'publishedDate' in info:
|
||||
self.cleaned_data['publish_date'] = info['publishedDate']
|
||||
|
||||
if 'authors' not in self.cleaned_data:
|
||||
if 'authors' not in self.cleaned_data or not self.cleaned_data['authors']:
|
||||
self.cleaned_data['authors'] = list()
|
||||
|
||||
if 'authors' in info:
|
||||
|
@ -144,7 +143,7 @@ class MediaAdminForm(ModelForm):
|
|||
.format(split[2], months.index(split[0])
|
||||
+ 1, int(split[1]), )
|
||||
|
||||
if 'authors' not in self.cleaned_data:
|
||||
if 'authors' not in self.cleaned_data or not self.cleaned_data['authors']:
|
||||
self.cleaned_data['authors'] = list()
|
||||
|
||||
if 'authors' in data:
|
||||
|
@ -159,17 +158,17 @@ class MediaAdminForm(ModelForm):
|
|||
"""
|
||||
super().clean()
|
||||
|
||||
if "_isbn" in self.request.POST\
|
||||
or "_isbn_addanother" in self.request.POST:
|
||||
if "_isbn" in self.data\
|
||||
or "_isbn_addanother" in self.data:
|
||||
isbn = self.cleaned_data.get('isbn')
|
||||
if "_isbn_addanother" in self.request.POST:
|
||||
self.request.POST = self.request.POST.copy()
|
||||
self.request.POST['_addanother'] = 42
|
||||
if "_isbn_addanother" in self.data:
|
||||
self.data = self.data.copy()
|
||||
self.data['_addanother'] = 42
|
||||
if isbn:
|
||||
# ISBN is present, try with bedeteque
|
||||
scrap_result = self.download_data_bedeteque(isbn)
|
||||
if not scrap_result:
|
||||
# Try with Fnac
|
||||
# Try with Google
|
||||
scrap_result = self.download_data_google(isbn)
|
||||
if not scrap_result:
|
||||
# Try with OpenLibrary
|
||||
|
@ -251,7 +250,7 @@ class MediaAdminForm(ModelForm):
|
|||
from django.core.exceptions import ValidationError
|
||||
try:
|
||||
# We don't want to check a field when we enter an ISBN.
|
||||
if "isbn" not in self.request.POST \
|
||||
if "isbn" not in self.data \
|
||||
or not self.cleaned_data.get('isbn'):
|
||||
value = field.clean(value)
|
||||
self.cleaned_data[name] = value
|
||||
|
@ -260,3 +259,7 @@ class MediaAdminForm(ModelForm):
|
|||
self.cleaned_data[name] = value
|
||||
except ValidationError as e:
|
||||
self.add_error(name, e)
|
||||
|
||||
class Meta:
|
||||
model = Media
|
||||
fields = '__all__'
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
from django.core.management import BaseCommand
|
||||
|
||||
from media.forms import MediaAdminForm
|
||||
from media.models import Media, Manga
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--view-only', action="store_true",
|
||||
help="Display only modifications. "
|
||||
+ "Only useful for debug.")
|
||||
|
||||
def handle(self, *args, **options):
|
||||
converted = 0
|
||||
|
||||
for media in Media.objects.all():
|
||||
self.stdout.write(str(media))
|
||||
form = MediaAdminForm(instance=media, data={"isbn": media.isbn, "_isbn": True, })
|
||||
form.full_clean()
|
||||
|
||||
if not "format" in form.cleaned_data:
|
||||
self.stdout.write("Format not specified. Assume it is a comic strip.")
|
||||
continue
|
||||
|
||||
format = form.cleaned_data["format"]
|
||||
self.stdout.write("Format: {}".format(format))
|
||||
|
||||
if not options["view_only"]:
|
||||
if format == "manga":
|
||||
self.stdout.write(self.style.WARNING("This media is a manga. Transfer it into a new object..."))
|
||||
manga = Manga.objects.create(
|
||||
isbn=media.isbn,
|
||||
title=media.title,
|
||||
subtitle=media.subtitle,
|
||||
external_url=media.external_url,
|
||||
side_identifier=media.side_identifier,
|
||||
number_of_pages=media.number_of_pages,
|
||||
publish_date=media.publish_date,
|
||||
)
|
||||
|
||||
manga.authors.set(media.authors.all())
|
||||
manga.save()
|
||||
|
||||
self.stdout.write(self.style.SUCCESS("Manga successfully saved. Deleting old medium..."))
|
||||
|
||||
media.delete()
|
||||
self.stdout.write(self.style.SUCCESS("Medium deleted"))
|
||||
|
||||
converted += 1
|
||||
self.stdout.write(self.style.SUCCESS("Successfully saved {:d} mangas".format(converted)))
|
|
@ -0,0 +1,26 @@
|
|||
# Generated by Django 2.2.10 on 2020-05-12 15:23
|
||||
|
||||
from django.db import migrations, models
|
||||
import media.fields
|
||||
import media.validators
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('media', '0026_auto_20200210_1740'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='FutureMedia',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('isbn', media.fields.ISBNField(blank=True, help_text='You may be able to scan it from a bar code.', max_length=28, null=True, unique=True, validators=[media.validators.isbn_validator], verbose_name='ISBN')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'future medium',
|
||||
'verbose_name_plural': 'future media',
|
||||
},
|
||||
),
|
||||
]
|
|
@ -0,0 +1,34 @@
|
|||
# Generated by Django 2.2.10 on 2020-05-21 14:28
|
||||
|
||||
from django.db import migrations, models
|
||||
import media.fields
|
||||
import media.validators
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('media', '0027_futuremedia'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Manga',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('isbn', media.fields.ISBNField(blank=True, help_text='You may be able to scan it from a bar code.', max_length=28, null=True, unique=True, validators=[media.validators.isbn_validator], verbose_name='ISBN')),
|
||||
('title', models.CharField(max_length=255, verbose_name='title')),
|
||||
('subtitle', models.CharField(blank=True, max_length=255, null=True, verbose_name='subtitle')),
|
||||
('external_url', models.URLField(blank=True, null=True, verbose_name='external URL')),
|
||||
('side_identifier', models.CharField(max_length=255, verbose_name='side identifier')),
|
||||
('number_of_pages', models.PositiveIntegerField(blank=True, null=True, verbose_name='number of pages')),
|
||||
('publish_date', models.DateField(blank=True, null=True, verbose_name='publish date')),
|
||||
('authors', models.ManyToManyField(to='media.Auteur', verbose_name='authors')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'medium',
|
||||
'verbose_name_plural': 'media',
|
||||
'ordering': ['title', 'subtitle'],
|
||||
},
|
||||
),
|
||||
]
|
|
@ -38,34 +38,41 @@ class Media(models.Model):
|
|||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
title = models.CharField(
|
||||
verbose_name=_('title'),
|
||||
max_length=255,
|
||||
)
|
||||
|
||||
subtitle = models.CharField(
|
||||
verbose_name=_('subtitle'),
|
||||
max_length=255,
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
external_url = models.URLField(
|
||||
verbose_name=_('external URL'),
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
side_identifier = models.CharField(
|
||||
verbose_name=_('side identifier'),
|
||||
max_length=255,
|
||||
)
|
||||
|
||||
authors = models.ManyToManyField(
|
||||
'Auteur',
|
||||
verbose_name=_('authors'),
|
||||
)
|
||||
|
||||
number_of_pages = models.PositiveIntegerField(
|
||||
verbose_name=_('number of pages'),
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
publish_date = models.DateField(
|
||||
verbose_name=_('publish date'),
|
||||
blank=True,
|
||||
|
@ -84,6 +91,67 @@ class Media(models.Model):
|
|||
ordering = ['title', 'subtitle']
|
||||
|
||||
|
||||
class Manga(models.Model):
|
||||
isbn = ISBNField(
|
||||
_('ISBN'),
|
||||
help_text=_('You may be able to scan it from a bar code.'),
|
||||
unique=True,
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
title = models.CharField(
|
||||
verbose_name=_('title'),
|
||||
max_length=255,
|
||||
)
|
||||
|
||||
subtitle = models.CharField(
|
||||
verbose_name=_('subtitle'),
|
||||
max_length=255,
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
external_url = models.URLField(
|
||||
verbose_name=_('external URL'),
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
side_identifier = models.CharField(
|
||||
verbose_name=_('side identifier'),
|
||||
max_length=255,
|
||||
)
|
||||
|
||||
authors = models.ManyToManyField(
|
||||
'Auteur',
|
||||
verbose_name=_('authors'),
|
||||
)
|
||||
|
||||
number_of_pages = models.PositiveIntegerField(
|
||||
verbose_name=_('number of pages'),
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
publish_date = models.DateField(
|
||||
verbose_name=_('publish date'),
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
if self.subtitle:
|
||||
return "{} : {}".format(self.title, self.subtitle)
|
||||
else:
|
||||
return self.title
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("manga")
|
||||
verbose_name_plural = _("mangas")
|
||||
ordering = ['title', 'subtitle']
|
||||
|
||||
|
||||
class FutureMedia(models.Model):
|
||||
isbn = ISBNField(
|
||||
_('ISBN'),
|
||||
|
|
|
@ -58,6 +58,7 @@ class BedetequeScraper:
|
|||
regex_subtitle = r'<h2>\s*(.*)</h2>'
|
||||
regex_publish_date = r'datePublished\" content=\"([\d-]*)\">'
|
||||
regex_nb_of_pages = r'numberOfPages\">(\d*)</span'
|
||||
regex_format = r'<label>Format : </label>Format (\w+)</li>'
|
||||
regex_author = r'<span itemprop=\"author\">(((?!<).)*)</span>'
|
||||
regex_illustrator = r'span itemprop=\"illustrator\">(((?!<).)*)</span'
|
||||
|
||||
|
@ -89,6 +90,11 @@ class BedetequeScraper:
|
|||
elif 'number_of_pages' not in data:
|
||||
data['number_of_pages'] = 0
|
||||
|
||||
# Get format of the book
|
||||
search_format = re.search(regex_format, content)
|
||||
if search_format:
|
||||
data['format'] = search_format.group(1).lower()
|
||||
|
||||
# Get author and illustrator
|
||||
author = re.search(regex_author, content)
|
||||
if 'author' not in data:
|
||||
|
|
Loading…
Reference in New Issue