Batch import Function
On utilise bulk_create de Django, mais il faut préparer tous les models au préalable. C'est *beaucoup* plus rapide.
This commit is contained in:
parent
64e8e88ed3
commit
b1366995e0
|
@ -0,0 +1,84 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from collections import defaultdict
|
||||
from django.apps import apps
|
||||
from django.db import transaction
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.db.models import Model
|
||||
from polymorphic.models import PolymorphicModel
|
||||
|
||||
|
||||
class ImportCommand(BaseCommand):
|
||||
"""
|
||||
Generic command for import of NK15 database
|
||||
"""
|
||||
|
||||
def print_success(self, to_print):
|
||||
return self.stdout.write(self.style.SUCCESS(to_print))
|
||||
|
||||
def print_error(self, to_print):
|
||||
return self.stdout.write(self.style.ERROR(to_print))
|
||||
|
||||
def update_line(self, n, total, content):
|
||||
n = str(n)
|
||||
total = str(total)
|
||||
n.rjust(len(total))
|
||||
print(f"\r ({n}/{total}) {content:10.10}", end="")
|
||||
|
||||
def create_parser(self, prog_name, subcommand, **kwargs):
|
||||
parser = super().create_parser(prog_name, subcommand, **kwargs)
|
||||
parser.add_argument('--nk15db', action='store', default='nk15', help='NK15 database name')
|
||||
parser.add_argument('--nk15user', action='store', default='nk15_user', help='NK15 database owner')
|
||||
parser.add_argument('-s', '--save', action='store', help="save mapping of idbde")
|
||||
parser.add_argument('-m', '--map', action='store', help="import mapping of idbde")
|
||||
return parser
|
||||
|
||||
|
||||
class BulkCreateManager(object):
|
||||
"""
|
||||
This helper class keeps track of ORM objects to be created for multiple
|
||||
model classes, and automatically creates those objects with `bulk_create`
|
||||
when the number of objects accumulated for a given model class exceeds
|
||||
`chunk_size`.
|
||||
Upon completion of the loop that's `add()`ing objects, the developer must
|
||||
call `done()` to ensure the final set of objects is created for all models.
|
||||
"""
|
||||
|
||||
def __init__(self, chunk_size=100):
|
||||
self._create_queues = defaultdict(list)
|
||||
self.chunk_size = chunk_size
|
||||
|
||||
def _commit(self, model_class):
|
||||
model_key = model_class._meta.label
|
||||
if model_class.__base__ in [Model, PolymorphicModel] or model_class is User:
|
||||
model_class.objects.bulk_create(self._create_queues[model_key])
|
||||
else:
|
||||
# ensure that parents models exists
|
||||
self._commit(model_class.__base__)
|
||||
with transaction.atomic():
|
||||
for obj in self._create_queues[model_key]:
|
||||
obj.save_base(raw=True)
|
||||
self._create_queues[model_key] = []
|
||||
|
||||
def add(self, *args):
|
||||
"""
|
||||
Add an object to the queue to be created, and call bulk_create if we
|
||||
have enough objs.
|
||||
"""
|
||||
for obj in args:
|
||||
model_class = type(obj)
|
||||
model_key = model_class._meta.label
|
||||
self._create_queues[model_key].append(obj)
|
||||
if len(self._create_queues[model_key]) >= self.chunk_size:
|
||||
self._commit(model_class)
|
||||
|
||||
def done(self):
|
||||
"""
|
||||
Always call this upon completion to make sure the final partial chunk
|
||||
is saved.
|
||||
"""
|
||||
for model_name, objs in self._create_queues.items():
|
||||
if len(objs) > 0:
|
||||
self._commit(apps.get_model(model_name))
|
|
@ -0,0 +1,199 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import psycopg2 as pg
|
||||
import psycopg2.extras as pge
|
||||
import datetime
|
||||
|
||||
from django.utils.timezone import make_aware, now
|
||||
from django.contrib.auth.models import User
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
|
||||
from django.db import transaction
|
||||
from django.db import IntegrityError
|
||||
|
||||
from note.models import Note, NoteUser, NoteClub
|
||||
from note.models import Alias
|
||||
from member.models import Club, Profile
|
||||
|
||||
from ._import_utils import ImportCommand, BulkCreateManager
|
||||
|
||||
M_DURATION = 396
|
||||
M_START = datetime.date(2019, 8, 31)
|
||||
M_END = datetime.date(2020, 9, 30)
|
||||
|
||||
MAP_IDBDE = {
|
||||
-4: 2, # Carte Bancaire
|
||||
-3: 4, # Virement
|
||||
-2: 1, # Especes
|
||||
-1: 3, # Chèque
|
||||
0: 5, # BDE
|
||||
}
|
||||
note_user_type = ContentType.objects.get(app_label="note", model="noteuser")
|
||||
note_club_type = ContentType.objects.get(app_label="note", model="noteclub")
|
||||
|
||||
|
||||
class Command(ImportCommand):
|
||||
"""
|
||||
Import command for People base data (Comptes, and Aliases)
|
||||
"""
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('-a', '--alias', action='store', help="import alias")
|
||||
parser.add_argument('-c', '--chunk', type=int, default=100, help="chunk size for bulk_create")
|
||||
|
||||
@transaction.atomic
|
||||
def import_account(self, cur, chunk_size):
|
||||
"""
|
||||
Import every account of the nk15 in a batch fashion.
|
||||
Every Model has to be manually created, and no magic `.save()`
|
||||
function is being called.
|
||||
"""
|
||||
cur.execute("SELECT * FROM comptes WHERE idbde > 0 ORDER BY idbde;")
|
||||
pk_club = 3
|
||||
pk_user = 1
|
||||
pk_profile = 1
|
||||
pk_note = 7 # pk 6 is Kfet!
|
||||
n = cur.rowcount
|
||||
|
||||
bulk_mgr = BulkCreateManager(chunk_size=chunk_size)
|
||||
pseudo_list = set()
|
||||
for idx, row in enumerate(cur):
|
||||
pseudo = row["pseudo"]
|
||||
pseudo_norm = Alias.normalize(pseudo)
|
||||
self.update_line(idx, n, pseudo)
|
||||
# clean pseudo (normalized pseudo must be unique)
|
||||
if pseudo_norm in pseudo_list:
|
||||
pseudo = pseudo+str(row["idbde"])
|
||||
else:
|
||||
pseudo_list.add(pseudo_norm)
|
||||
# clean date
|
||||
note_dict = {
|
||||
"pk": pk_note,
|
||||
"balance": 0,
|
||||
"last_negative": None,
|
||||
"is_active": True,
|
||||
"display_image": "",
|
||||
"created_at": now()
|
||||
}
|
||||
if row["last_negatif"] is not None:
|
||||
note_dict["last_negative"] = make_aware(row["last_negatif"])
|
||||
if row["type"] == "personne":
|
||||
# sanitize password
|
||||
if row["passwd"] != "*|*" and not row["deleted"]:
|
||||
passwd_nk15 = "$".join(["custom_nk15", "1", row["passwd"]])
|
||||
else:
|
||||
passwd_nk15 = ''
|
||||
|
||||
obj_dict = {
|
||||
"pk": pk_user,
|
||||
"username": row["pseudo"],
|
||||
"password": passwd_nk15,
|
||||
"first_name": row["nom"],
|
||||
"last_name": row["prenom"],
|
||||
"email": row["mail"],
|
||||
"is_active": True, # temporary
|
||||
}
|
||||
profile_dict = {
|
||||
"pk": pk_profile,
|
||||
"user_id": pk_user,
|
||||
"phone_number": row['tel'],
|
||||
"address": row['adresse'],
|
||||
"paid": row['normalien'],
|
||||
"registration_valid": True,
|
||||
"email_confirmed": True,
|
||||
}
|
||||
note_dict["polymorphic_ctype"] = note_user_type
|
||||
note_user_dict = {
|
||||
"pk": pk_note,
|
||||
"user_id": pk_user,
|
||||
}
|
||||
alias_dict = {
|
||||
"pk": pk_note,
|
||||
"name": pseudo,
|
||||
"normalized_name": Alias.normalize(pseudo),
|
||||
"note_id": pk_note,
|
||||
}
|
||||
|
||||
bulk_mgr.add(User(**obj_dict),
|
||||
Profile(**profile_dict),
|
||||
Note(**note_dict),
|
||||
NoteUser(**note_user_dict),
|
||||
Alias(**alias_dict),)
|
||||
pk_user += 1
|
||||
pk_profile += 1
|
||||
|
||||
else: # club
|
||||
obj_dict = {
|
||||
"pk": pk_club,
|
||||
"name": row["pseudo"],
|
||||
"email": row["mail"],
|
||||
"membership_duration": M_DURATION,
|
||||
"membership_start": M_START,
|
||||
"membership_end": M_END,
|
||||
"membership_fee_paid": 0,
|
||||
"membership_fee_unpaid": 0,
|
||||
}
|
||||
note_club_dict = {
|
||||
"pk": pk_note,
|
||||
"club_id": pk_club,
|
||||
}
|
||||
alias_dict = {
|
||||
"pk": pk_note,
|
||||
"name": pseudo,
|
||||
"normalized_name": Alias.normalize(pseudo),
|
||||
"note_id": pk_note
|
||||
}
|
||||
note_dict["polymorphic_ctype"] = note_club_type
|
||||
bulk_mgr.add(Club(**obj_dict),
|
||||
Note(**note_dict),
|
||||
NoteClub(**note_club_dict),
|
||||
Alias(**alias_dict))
|
||||
pk_club += 1
|
||||
# row import completed
|
||||
MAP_IDBDE[row["idbde"]] = pk_note
|
||||
pk_note += 1
|
||||
self.print_success("comptes table imported")
|
||||
|
||||
def import_alias(self, cur):
|
||||
"""
|
||||
Import Alias from nk15
|
||||
We rely on validation of the models, but it is slow.
|
||||
"""
|
||||
cur.execute("SELECT * FROM aliases ORDER by id")
|
||||
n = cur.rowcount
|
||||
for idx, row in enumerate(cur):
|
||||
self.update_line(idx, n, row["alias"])
|
||||
alias_name = row["alias"]
|
||||
alias_name_good = (alias_name[:252] + '...') if len(alias_name) > 255 else alias_name
|
||||
obj_dict = {
|
||||
"note_id": MAP_IDBDE[row["idbde"]],
|
||||
"name": alias_name_good,
|
||||
"normalized_name": Alias.normalize(alias_name_good),
|
||||
}
|
||||
try:
|
||||
with transaction.atomic():
|
||||
alias, created = Alias.objects.get_or_create(**obj_dict)
|
||||
except IntegrityError as e:
|
||||
if "unique" in e.args[0]:
|
||||
continue
|
||||
else:
|
||||
raise e
|
||||
alias.save()
|
||||
return None
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
global MAP_IDBDE
|
||||
# default args, provided by ImportCommand.
|
||||
nk15db, nk15user = kwargs['nk15db'], kwargs['nk15user']
|
||||
# connecting to nk15 database
|
||||
conn = pg.connect(database=nk15db, user=nk15user)
|
||||
cur = conn.cursor(cursor_factory=pge.DictCursor)
|
||||
|
||||
self.import_account(cur,kwargs["chunk"])
|
||||
if kwargs["save"]:
|
||||
filename = kwargs["save"]
|
||||
with open(filename, 'w') as fp:
|
||||
json.dump(MAP_IDBDE, fp, sort_keys=True, indent=2)
|
||||
# Alias Management
|
||||
if kwargs["alias"]:
|
||||
self.impot_alias(cur)
|
Loading…
Reference in New Issue