Split comic strips and mangas
This commit is contained in:
parent
1657f5c42c
commit
82efeba272
|
@ -13,6 +13,8 @@ sleep 2
|
||||||
python manage.py migrate
|
python manage.py migrate
|
||||||
python manage.py collectstatic --no-input
|
python manage.py collectstatic --no-input
|
||||||
|
|
||||||
|
python manage.py runserver 0.0.0.0:8000
|
||||||
|
|
||||||
# harakiri parameter respawns processes taking more than 20 seconds
|
# harakiri parameter respawns processes taking more than 20 seconds
|
||||||
# max-requests parameter respawns processes after serving 5000 requests
|
# max-requests parameter respawns processes after serving 5000 requests
|
||||||
# vacuum parameter cleans up when stopped
|
# vacuum parameter cleans up when stopped
|
||||||
|
|
|
@ -9,7 +9,7 @@ from reversion.admin import VersionAdmin
|
||||||
|
|
||||||
from med.admin import admin_site
|
from med.admin import admin_site
|
||||||
from .forms import MediaAdminForm
|
from .forms import MediaAdminForm
|
||||||
from .models import Auteur, Emprunt, FutureMedia, Jeu, Media
|
from .models import Auteur, Emprunt, FutureMedia, Jeu, Media, Manga
|
||||||
|
|
||||||
|
|
||||||
class AuteurAdmin(VersionAdmin):
|
class AuteurAdmin(VersionAdmin):
|
||||||
|
@ -121,6 +121,7 @@ class JeuAdmin(VersionAdmin):
|
||||||
|
|
||||||
admin_site.register(Auteur, AuteurAdmin)
|
admin_site.register(Auteur, AuteurAdmin)
|
||||||
admin_site.register(Media, MediaAdmin)
|
admin_site.register(Media, MediaAdmin)
|
||||||
|
admin_site.register(Manga, MediaAdmin)
|
||||||
admin_site.register(FutureMedia, FutureMediaAdmin)
|
admin_site.register(FutureMedia, FutureMediaAdmin)
|
||||||
admin_site.register(Emprunt, EmpruntAdmin)
|
admin_site.register(Emprunt, EmpruntAdmin)
|
||||||
admin_site.register(Jeu, JeuAdmin)
|
admin_site.register(Jeu, JeuAdmin)
|
||||||
|
|
|
@ -10,7 +10,7 @@ import urllib.request
|
||||||
from django.forms import ModelForm
|
from django.forms import ModelForm
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
from .models import Auteur
|
from .models import Auteur, Media
|
||||||
from .scraper import BedetequeScraper
|
from .scraper import BedetequeScraper
|
||||||
|
|
||||||
|
|
||||||
|
@ -54,7 +54,6 @@ class MediaAdminForm(ModelForm):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def parse_data_google(self, data):
|
def parse_data_google(self, data):
|
||||||
print(data)
|
|
||||||
info = data['volumeInfo']
|
info = data['volumeInfo']
|
||||||
self.cleaned_data['external_url'] = info['canonicalVolumeLink']
|
self.cleaned_data['external_url'] = info['canonicalVolumeLink']
|
||||||
if 'title' in info:
|
if 'title' in info:
|
||||||
|
@ -71,7 +70,7 @@ class MediaAdminForm(ModelForm):
|
||||||
if 'publishedDate' in info:
|
if 'publishedDate' in info:
|
||||||
self.cleaned_data['publish_date'] = info['publishedDate']
|
self.cleaned_data['publish_date'] = info['publishedDate']
|
||||||
|
|
||||||
if 'authors' not in self.cleaned_data:
|
if 'authors' not in self.cleaned_data or not self.cleaned_data['authors']:
|
||||||
self.cleaned_data['authors'] = list()
|
self.cleaned_data['authors'] = list()
|
||||||
|
|
||||||
if 'authors' in info:
|
if 'authors' in info:
|
||||||
|
@ -144,7 +143,7 @@ class MediaAdminForm(ModelForm):
|
||||||
.format(split[2], months.index(split[0])
|
.format(split[2], months.index(split[0])
|
||||||
+ 1, int(split[1]), )
|
+ 1, int(split[1]), )
|
||||||
|
|
||||||
if 'authors' not in self.cleaned_data:
|
if 'authors' not in self.cleaned_data or not self.cleaned_data['authors']:
|
||||||
self.cleaned_data['authors'] = list()
|
self.cleaned_data['authors'] = list()
|
||||||
|
|
||||||
if 'authors' in data:
|
if 'authors' in data:
|
||||||
|
@ -159,17 +158,17 @@ class MediaAdminForm(ModelForm):
|
||||||
"""
|
"""
|
||||||
super().clean()
|
super().clean()
|
||||||
|
|
||||||
if "_isbn" in self.request.POST\
|
if "_isbn" in self.data\
|
||||||
or "_isbn_addanother" in self.request.POST:
|
or "_isbn_addanother" in self.data:
|
||||||
isbn = self.cleaned_data.get('isbn')
|
isbn = self.cleaned_data.get('isbn')
|
||||||
if "_isbn_addanother" in self.request.POST:
|
if "_isbn_addanother" in self.data:
|
||||||
self.request.POST = self.request.POST.copy()
|
self.data = self.data.copy()
|
||||||
self.request.POST['_addanother'] = 42
|
self.data['_addanother'] = 42
|
||||||
if isbn:
|
if isbn:
|
||||||
# ISBN is present, try with bedeteque
|
# ISBN is present, try with bedeteque
|
||||||
scrap_result = self.download_data_bedeteque(isbn)
|
scrap_result = self.download_data_bedeteque(isbn)
|
||||||
if not scrap_result:
|
if not scrap_result:
|
||||||
# Try with Fnac
|
# Try with Google
|
||||||
scrap_result = self.download_data_google(isbn)
|
scrap_result = self.download_data_google(isbn)
|
||||||
if not scrap_result:
|
if not scrap_result:
|
||||||
# Try with OpenLibrary
|
# Try with OpenLibrary
|
||||||
|
@ -251,7 +250,7 @@ class MediaAdminForm(ModelForm):
|
||||||
from django.core.exceptions import ValidationError
|
from django.core.exceptions import ValidationError
|
||||||
try:
|
try:
|
||||||
# We don't want to check a field when we enter an ISBN.
|
# We don't want to check a field when we enter an ISBN.
|
||||||
if "isbn" not in self.request.POST \
|
if "isbn" not in self.data \
|
||||||
or not self.cleaned_data.get('isbn'):
|
or not self.cleaned_data.get('isbn'):
|
||||||
value = field.clean(value)
|
value = field.clean(value)
|
||||||
self.cleaned_data[name] = value
|
self.cleaned_data[name] = value
|
||||||
|
@ -260,3 +259,7 @@ class MediaAdminForm(ModelForm):
|
||||||
self.cleaned_data[name] = value
|
self.cleaned_data[name] = value
|
||||||
except ValidationError as e:
|
except ValidationError as e:
|
||||||
self.add_error(name, e)
|
self.add_error(name, e)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = Media
|
||||||
|
fields = '__all__'
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
from django.core.management import BaseCommand
|
||||||
|
|
||||||
|
from media.forms import MediaAdminForm
|
||||||
|
from media.models import Media, Manga
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument('--view-only', action="store_true",
|
||||||
|
help="Display only modifications. "
|
||||||
|
+ "Only useful for debug.")
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
converted = 0
|
||||||
|
|
||||||
|
for media in Media.objects.all():
|
||||||
|
self.stdout.write(str(media))
|
||||||
|
form = MediaAdminForm(instance=media, data={"isbn": media.isbn, "_isbn": True, })
|
||||||
|
form.full_clean()
|
||||||
|
|
||||||
|
if not "format" in form.cleaned_data:
|
||||||
|
self.stdout.write("Format not specified. Assume it is a comic strip.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
format = form.cleaned_data["format"]
|
||||||
|
self.stdout.write("Format: {}".format(format))
|
||||||
|
|
||||||
|
if not options["view_only"]:
|
||||||
|
if format == "manga":
|
||||||
|
self.stdout.write(self.style.WARNING("This media is a manga. Transfer it into a new object..."))
|
||||||
|
manga = Manga.objects.create(
|
||||||
|
isbn=media.isbn,
|
||||||
|
title=media.title,
|
||||||
|
subtitle=media.subtitle,
|
||||||
|
external_url=media.external_url,
|
||||||
|
side_identifier=media.side_identifier,
|
||||||
|
number_of_pages=media.number_of_pages,
|
||||||
|
publish_date=media.publish_date,
|
||||||
|
)
|
||||||
|
|
||||||
|
manga.authors.set(media.authors.all())
|
||||||
|
manga.save()
|
||||||
|
|
||||||
|
self.stdout.write(self.style.SUCCESS("Manga successfully saved. Deleting old medium..."))
|
||||||
|
|
||||||
|
media.delete()
|
||||||
|
self.stdout.write(self.style.SUCCESS("Medium deleted"))
|
||||||
|
|
||||||
|
converted += 1
|
||||||
|
self.stdout.write(self.style.SUCCESS("Successfully saved {:d} mangas".format(converted)))
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Generated by Django 2.2.10 on 2020-05-12 15:23
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import media.fields
|
||||||
|
import media.validators
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('media', '0026_auto_20200210_1740'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='FutureMedia',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('isbn', media.fields.ISBNField(blank=True, help_text='You may be able to scan it from a bar code.', max_length=28, null=True, unique=True, validators=[media.validators.isbn_validator], verbose_name='ISBN')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'future medium',
|
||||||
|
'verbose_name_plural': 'future media',
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
|
@ -0,0 +1,34 @@
|
||||||
|
# Generated by Django 2.2.10 on 2020-05-21 14:28
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import media.fields
|
||||||
|
import media.validators
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('media', '0027_futuremedia'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Manga',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('isbn', media.fields.ISBNField(blank=True, help_text='You may be able to scan it from a bar code.', max_length=28, null=True, unique=True, validators=[media.validators.isbn_validator], verbose_name='ISBN')),
|
||||||
|
('title', models.CharField(max_length=255, verbose_name='title')),
|
||||||
|
('subtitle', models.CharField(blank=True, max_length=255, null=True, verbose_name='subtitle')),
|
||||||
|
('external_url', models.URLField(blank=True, null=True, verbose_name='external URL')),
|
||||||
|
('side_identifier', models.CharField(max_length=255, verbose_name='side identifier')),
|
||||||
|
('number_of_pages', models.PositiveIntegerField(blank=True, null=True, verbose_name='number of pages')),
|
||||||
|
('publish_date', models.DateField(blank=True, null=True, verbose_name='publish date')),
|
||||||
|
('authors', models.ManyToManyField(to='media.Auteur', verbose_name='authors')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'medium',
|
||||||
|
'verbose_name_plural': 'media',
|
||||||
|
'ordering': ['title', 'subtitle'],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
|
@ -38,34 +38,41 @@ class Media(models.Model):
|
||||||
blank=True,
|
blank=True,
|
||||||
null=True,
|
null=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
title = models.CharField(
|
title = models.CharField(
|
||||||
verbose_name=_('title'),
|
verbose_name=_('title'),
|
||||||
max_length=255,
|
max_length=255,
|
||||||
)
|
)
|
||||||
|
|
||||||
subtitle = models.CharField(
|
subtitle = models.CharField(
|
||||||
verbose_name=_('subtitle'),
|
verbose_name=_('subtitle'),
|
||||||
max_length=255,
|
max_length=255,
|
||||||
blank=True,
|
blank=True,
|
||||||
null=True,
|
null=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
external_url = models.URLField(
|
external_url = models.URLField(
|
||||||
verbose_name=_('external URL'),
|
verbose_name=_('external URL'),
|
||||||
blank=True,
|
blank=True,
|
||||||
null=True,
|
null=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
side_identifier = models.CharField(
|
side_identifier = models.CharField(
|
||||||
verbose_name=_('side identifier'),
|
verbose_name=_('side identifier'),
|
||||||
max_length=255,
|
max_length=255,
|
||||||
)
|
)
|
||||||
|
|
||||||
authors = models.ManyToManyField(
|
authors = models.ManyToManyField(
|
||||||
'Auteur',
|
'Auteur',
|
||||||
verbose_name=_('authors'),
|
verbose_name=_('authors'),
|
||||||
)
|
)
|
||||||
|
|
||||||
number_of_pages = models.PositiveIntegerField(
|
number_of_pages = models.PositiveIntegerField(
|
||||||
verbose_name=_('number of pages'),
|
verbose_name=_('number of pages'),
|
||||||
blank=True,
|
blank=True,
|
||||||
null=True,
|
null=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
publish_date = models.DateField(
|
publish_date = models.DateField(
|
||||||
verbose_name=_('publish date'),
|
verbose_name=_('publish date'),
|
||||||
blank=True,
|
blank=True,
|
||||||
|
@ -84,6 +91,67 @@ class Media(models.Model):
|
||||||
ordering = ['title', 'subtitle']
|
ordering = ['title', 'subtitle']
|
||||||
|
|
||||||
|
|
||||||
|
class Manga(models.Model):
|
||||||
|
isbn = ISBNField(
|
||||||
|
_('ISBN'),
|
||||||
|
help_text=_('You may be able to scan it from a bar code.'),
|
||||||
|
unique=True,
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
title = models.CharField(
|
||||||
|
verbose_name=_('title'),
|
||||||
|
max_length=255,
|
||||||
|
)
|
||||||
|
|
||||||
|
subtitle = models.CharField(
|
||||||
|
verbose_name=_('subtitle'),
|
||||||
|
max_length=255,
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
external_url = models.URLField(
|
||||||
|
verbose_name=_('external URL'),
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
side_identifier = models.CharField(
|
||||||
|
verbose_name=_('side identifier'),
|
||||||
|
max_length=255,
|
||||||
|
)
|
||||||
|
|
||||||
|
authors = models.ManyToManyField(
|
||||||
|
'Auteur',
|
||||||
|
verbose_name=_('authors'),
|
||||||
|
)
|
||||||
|
|
||||||
|
number_of_pages = models.PositiveIntegerField(
|
||||||
|
verbose_name=_('number of pages'),
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
publish_date = models.DateField(
|
||||||
|
verbose_name=_('publish date'),
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.subtitle:
|
||||||
|
return "{} : {}".format(self.title, self.subtitle)
|
||||||
|
else:
|
||||||
|
return self.title
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
verbose_name = _("manga")
|
||||||
|
verbose_name_plural = _("mangas")
|
||||||
|
ordering = ['title', 'subtitle']
|
||||||
|
|
||||||
|
|
||||||
class FutureMedia(models.Model):
|
class FutureMedia(models.Model):
|
||||||
isbn = ISBNField(
|
isbn = ISBNField(
|
||||||
_('ISBN'),
|
_('ISBN'),
|
||||||
|
|
|
@ -58,6 +58,7 @@ class BedetequeScraper:
|
||||||
regex_subtitle = r'<h2>\s*(.*)</h2>'
|
regex_subtitle = r'<h2>\s*(.*)</h2>'
|
||||||
regex_publish_date = r'datePublished\" content=\"([\d-]*)\">'
|
regex_publish_date = r'datePublished\" content=\"([\d-]*)\">'
|
||||||
regex_nb_of_pages = r'numberOfPages\">(\d*)</span'
|
regex_nb_of_pages = r'numberOfPages\">(\d*)</span'
|
||||||
|
regex_format = r'<label>Format : </label>Format (\w+)</li>'
|
||||||
regex_author = r'<span itemprop=\"author\">(((?!<).)*)</span>'
|
regex_author = r'<span itemprop=\"author\">(((?!<).)*)</span>'
|
||||||
regex_illustrator = r'span itemprop=\"illustrator\">(((?!<).)*)</span'
|
regex_illustrator = r'span itemprop=\"illustrator\">(((?!<).)*)</span'
|
||||||
|
|
||||||
|
@ -89,6 +90,11 @@ class BedetequeScraper:
|
||||||
elif 'number_of_pages' not in data:
|
elif 'number_of_pages' not in data:
|
||||||
data['number_of_pages'] = 0
|
data['number_of_pages'] = 0
|
||||||
|
|
||||||
|
# Get format of the book
|
||||||
|
search_format = re.search(regex_format, content)
|
||||||
|
if search_format:
|
||||||
|
data['format'] = search_format.group(1).lower()
|
||||||
|
|
||||||
# Get author and illustrator
|
# Get author and illustrator
|
||||||
author = re.search(regex_author, content)
|
author = re.search(regex_author, content)
|
||||||
if 'author' not in data:
|
if 'author' not in data:
|
||||||
|
|
Loading…
Reference in New Issue