mirror of
https://gitlab.crans.org/mediatek/med.git
synced 2024-11-26 21:27:11 +00:00
Scrap from Google books API
This commit is contained in:
parent
43b3b5ccfe
commit
adbaf66401
@ -8,6 +8,7 @@ import unicodedata
|
|||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
from django.forms import ModelForm
|
from django.forms import ModelForm
|
||||||
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
from .models import Auteur
|
from .models import Auteur
|
||||||
from .scraper import BedetequeScraper
|
from .scraper import BedetequeScraper
|
||||||
@ -35,6 +36,51 @@ class MediaAdminForm(ModelForm):
|
|||||||
self.cleaned_data.update(data)
|
self.cleaned_data.update(data)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def download_data_google(self, isbn):
|
||||||
|
"""
|
||||||
|
Download data from google books
|
||||||
|
:return True if success
|
||||||
|
"""
|
||||||
|
api_url = "https://www.googleapis.com/books/v1/volumes?q=isbn:{}".format(isbn)
|
||||||
|
with urllib.request.urlopen(api_url) as url:
|
||||||
|
data = json.loads(url.read().decode())
|
||||||
|
|
||||||
|
if data and data['totalItems']:
|
||||||
|
data = data['items'][0]
|
||||||
|
# Fill the data
|
||||||
|
self.parse_data_google(data)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def parse_data_google(self, data):
|
||||||
|
print(data)
|
||||||
|
info = data['volumeInfo']
|
||||||
|
self.cleaned_data['external_url'] = info['canonicalVolumeLink']
|
||||||
|
if 'title' in info:
|
||||||
|
self.cleaned_data['title'] = info['title']
|
||||||
|
if 'subtitle' in data:
|
||||||
|
self.cleaned_data['subtitle'] = info['subtitle']
|
||||||
|
|
||||||
|
if 'pageCount' in info:
|
||||||
|
self.cleaned_data['number_of_pages'] = \
|
||||||
|
info['pageCount']
|
||||||
|
elif not self.cleaned_data['number_of_pages']:
|
||||||
|
self.cleaned_data['number_of_pages'] = 0
|
||||||
|
|
||||||
|
if 'publishedDate' in info:
|
||||||
|
self.cleaned_data['publish_date'] = info['publishedDate']
|
||||||
|
|
||||||
|
if 'authors' not in self.cleaned_data:
|
||||||
|
self.cleaned_data['authors'] = list()
|
||||||
|
|
||||||
|
if 'authors' in info:
|
||||||
|
for author in info['authors']:
|
||||||
|
author_obj = Auteur.objects.get_or_create(
|
||||||
|
name=author)[0]
|
||||||
|
self.cleaned_data['authors'].append(author_obj)
|
||||||
|
|
||||||
|
print(self.cleaned_data)
|
||||||
|
|
||||||
def download_data_openlibrary(self, isbn):
|
def download_data_openlibrary(self, isbn):
|
||||||
"""
|
"""
|
||||||
Download data from openlibrary
|
Download data from openlibrary
|
||||||
@ -54,7 +100,6 @@ class MediaAdminForm(ModelForm):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def parse_data_openlibrary(self, data):
|
def parse_data_openlibrary(self, data):
|
||||||
print(data)
|
|
||||||
self.cleaned_data['external_url'] = data['url']
|
self.cleaned_data['external_url'] = data['url']
|
||||||
if 'title' in data:
|
if 'title' in data:
|
||||||
self.cleaned_data['title'] = data['title']
|
self.cleaned_data['title'] = data['title']
|
||||||
@ -89,18 +134,19 @@ class MediaAdminForm(ModelForm):
|
|||||||
Nov="11",
|
Nov="11",
|
||||||
Dec="12",
|
Dec="12",
|
||||||
)
|
)
|
||||||
if split[0] in month_to_number:
|
if split[0][:3] in month_to_number:
|
||||||
self.cleaned_data['publish_date']\
|
self.cleaned_data['publish_date']\
|
||||||
= split[2] + "-" \
|
= split[2] + "-" \
|
||||||
+ month_to_number[split[0]] + "-" + split[1]
|
+ month_to_number[split[0][:3]] + "-" + split[1]
|
||||||
else:
|
else:
|
||||||
self.cleaned_data['publish_date'] = "{}-{:02d}-{:02d}" \
|
self.cleaned_data['publish_date'] = "{}-{:02d}-{:02d}" \
|
||||||
.format(split[2], months.index(split[0])
|
.format(split[2], months.index(split[0])
|
||||||
+ 1, int(split[1]), )
|
+ 1, int(split[1]), )
|
||||||
|
|
||||||
if 'authors' in data:
|
if 'authors' not in self.cleaned_data:
|
||||||
if 'author' not in self.cleaned_data:
|
|
||||||
self.cleaned_data['authors'] = list()
|
self.cleaned_data['authors'] = list()
|
||||||
|
|
||||||
|
if 'authors' in data:
|
||||||
for author in data['authors']:
|
for author in data['authors']:
|
||||||
author_obj = Auteur.objects.get_or_create(
|
author_obj = Auteur.objects.get_or_create(
|
||||||
name=author['name'])[0]
|
name=author['name'])[0]
|
||||||
@ -121,9 +167,14 @@ class MediaAdminForm(ModelForm):
|
|||||||
if isbn:
|
if isbn:
|
||||||
# ISBN is present, try with bedeteque
|
# ISBN is present, try with bedeteque
|
||||||
scrap_result = self.download_data_bedeteque(isbn)
|
scrap_result = self.download_data_bedeteque(isbn)
|
||||||
|
if not scrap_result:
|
||||||
|
# Try with Fnac
|
||||||
|
scrap_result = self.download_data_google(isbn)
|
||||||
if not scrap_result:
|
if not scrap_result:
|
||||||
# Try with OpenLibrary
|
# Try with OpenLibrary
|
||||||
self.download_data_openlibrary(isbn)
|
if not self.download_data_openlibrary(isbn):
|
||||||
|
self.add_error('isbn', _("This ISBN is not found."))
|
||||||
|
return self.cleaned_data
|
||||||
|
|
||||||
if self.cleaned_data['title']:
|
if self.cleaned_data['title']:
|
||||||
self.cleaned_data['title'] = re.sub(
|
self.cleaned_data['title'] = re.sub(
|
||||||
@ -183,6 +234,8 @@ class MediaAdminForm(ModelForm):
|
|||||||
).casefold().upper()
|
).casefold().upper()
|
||||||
self.cleaned_data['side_identifier'] = side_identifier
|
self.cleaned_data['side_identifier'] = side_identifier
|
||||||
|
|
||||||
|
print(self.cleaned_data)
|
||||||
|
|
||||||
return self.cleaned_data
|
return self.cleaned_data
|
||||||
|
|
||||||
def _clean_fields(self):
|
def _clean_fields(self):
|
||||||
@ -198,7 +251,7 @@ class MediaAdminForm(ModelForm):
|
|||||||
from django.core.exceptions import ValidationError
|
from django.core.exceptions import ValidationError
|
||||||
try:
|
try:
|
||||||
# We don't want to check a field when we enter an ISBN.
|
# We don't want to check a field when we enter an ISBN.
|
||||||
if "_isbn" not in self.request.POST \
|
if "isbn" not in self.request.POST \
|
||||||
or not self.cleaned_data.get('isbn'):
|
or not self.cleaned_data.get('isbn'):
|
||||||
value = field.clean(value)
|
value = field.clean(value)
|
||||||
self.cleaned_data[name] = value
|
self.cleaned_data[name] = value
|
||||||
|
Loading…
Reference in New Issue
Block a user