From 64dd63503914870615e91357d4c19f9798804e5b Mon Sep 17 00:00:00 2001 From: Pierre-antoine Comby Date: Mon, 18 May 2020 19:39:19 +0200 Subject: [PATCH] make alias import work --- management/commands/import_account.py | 58 ++++++++++++++++----------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/management/commands/import_account.py b/management/commands/import_account.py index 3f7a789..bf66e4d 100644 --- a/management/commands/import_account.py +++ b/management/commands/import_account.py @@ -3,6 +3,7 @@ import psycopg2 as pg import psycopg2.extras as pge import datetime +import json from django.utils.timezone import make_aware, now from django.contrib.auth.models import User @@ -28,6 +29,10 @@ MAP_IDBDE = { -1: 3, # Chèque 0: 5, # BDE } + +# some Aliases have been created in the fixtures +ALIAS_SET = {a[0] for a in Alias.objects.all().values_list("normalized_name")} + note_user_type = ContentType.objects.get(app_label="note", model="noteuser") note_club_type = ContentType.objects.get(app_label="note", model="noteclub") @@ -55,16 +60,15 @@ class Command(ImportCommand): n = cur.rowcount bulk_mgr = BulkCreateManager(chunk_size=chunk_size) - pseudo_list = set() for idx, row in enumerate(cur): pseudo = row["pseudo"] pseudo_norm = Alias.normalize(pseudo) self.update_line(idx, n, pseudo) # clean pseudo (normalized pseudo must be unique) - if pseudo_norm in pseudo_list: + if pseudo_norm in ALIAS_SET: pseudo = pseudo+str(row["idbde"]) else: - pseudo_list.add(pseudo_norm) + ALIAS_SET.add(pseudo_norm) # clean date note_dict = { "pk": pk_note, @@ -151,37 +155,42 @@ class Command(ImportCommand): # row import completed MAP_IDBDE[row["idbde"]] = pk_note pk_note += 1 + bulk_mgr.done() self.print_success("comptes table imported") - def import_alias(self, cur): + def import_alias(self, cur, chunk_size): """ Import Alias from nk15 We rely on validation of the models, but it is slow. """ cur.execute("SELECT * FROM aliases ORDER by id") n = cur.rowcount + + bulk_mgr = BulkCreateManager(chunk_size=chunk_size) + pk_alias = Alias.objects.order_by('-id').first().id + 1 for idx, row in enumerate(cur): self.update_line(idx, n, row["alias"]) alias_name = row["alias"] - alias_name_good = (alias_name[:252] + '...') if len(alias_name) > 255 else alias_name - obj_dict = { - "note_id": MAP_IDBDE[row["idbde"]], - "name": alias_name_good, - "normalized_name": Alias.normalize(alias_name_good), - } - try: - with transaction.atomic(): - alias, created = Alias.objects.get_or_create(**obj_dict) - except IntegrityError as e: - if "unique" in e.args[0]: - continue - else: - raise e - alias.save() - return None + alias_name = (alias_name[:252] + '...') if len(alias_name) > 255 else alias_name + alias_norm = Alias.normalize(alias_name) + # clean pseudo (normalized pseudo must be unique) + if alias_norm in ALIAS_SET: + continue + else: + print(alias_norm) + ALIAS_SET.add(alias_norm) + obj_dict = { + "pk": pk_alias, + "note_id": MAP_IDBDE[row["idbde"]], + "name": alias_name, + "normalized_name": alias_norm, + } + pk_alias += 1 + bulk_mgr.add(Alias(**obj_dict)) + bulk_mgr.done() + def handle(self, *args, **kwargs): - global MAP_IDBDE # default args, provided by ImportCommand. nk15db, nk15user = kwargs['nk15db'], kwargs['nk15user'] # connecting to nk15 database @@ -189,10 +198,11 @@ class Command(ImportCommand): cur = conn.cursor(cursor_factory=pge.DictCursor) self.import_account(cur,kwargs["chunk"]) + # Alias Management + if kwargs["alias"]: + self.import_alias(cur,kwargs["chunk"]) + #save to disk if kwargs["save"]: filename = kwargs["save"] with open(filename, 'w') as fp: json.dump(MAP_IDBDE, fp, sort_keys=True, indent=2) - # Alias Management - if kwargs["alias"]: - self.impot_alias(cur)