From 68b860668831d31e936bae59ecd3a69d77761d9a Mon Sep 17 00:00:00 2001 From: Emmy D'Anello Date: Sun, 12 May 2024 13:44:25 +0200 Subject: [PATCH] More optimizations --- .../commands/update_trainline_stations.py | 20 ++-- ...on_parent_station_alter_station_same_as.py | 40 ++++++++ trainvel/core/models.py | 4 +- .../commands/update_trainvel_gtfs.py | 99 +++++++++---------- 4 files changed, 99 insertions(+), 64 deletions(-) create mode 100644 trainvel/core/migrations/0002_alter_station_parent_station_alter_station_same_as.py diff --git a/trainvel/core/management/commands/update_trainline_stations.py b/trainvel/core/management/commands/update_trainline_stations.py index 11f1669..fc2b40e 100644 --- a/trainvel/core/management/commands/update_trainline_stations.py +++ b/trainvel/core/management/commands/update_trainline_stations.py @@ -1,7 +1,9 @@ import csv +from time import time import requests from django.core.management import BaseCommand +from django.db import transaction from tqdm import tqdm from trainvel.core.models import Station @@ -12,7 +14,7 @@ class Command(BaseCommand): def convert_value(value: str) -> str: return True if value == 't' else False if value == 'f' else (value or None) - stations, stations_without_fk = [], [] + stations = [] STATIONS_URL = "https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv" with requests.get(STATIONS_URL, stream=True) as resp: @@ -21,13 +23,13 @@ class Command(BaseCommand): values = {k.replace(':', '_').replace('normalised_code', 'normalized_code_trainline') .replace('same_as', 'same_as_id'): convert_value(v) for k, v in row.items()} - values_without_fk = values.copy() - del values_without_fk['same_as_id'] - del values_without_fk['parent_station_id'] stations.append(Station(**values)) - stations_without_fk.append(Station(**values_without_fk)) - Station.objects.bulk_create(stations_without_fk, update_conflicts=True, unique_fields=['id'], - update_fields=[k for k in values_without_fk.keys() if k != 'id']) - Station.objects.bulk_create(stations, update_conflicts=True, unique_fields=['id'], - update_fields=['same_as_id', 'parent_station_id']) + Station.objects.all().delete() + if options['verbosity'] >= 1: + self.stdout.write("Deleted all stations.") + start_time = time() + with transaction.atomic(): + Station.objects.bulk_create(stations) + if options['verbosity'] >= 1: + self.stdout.write(f"Inserted {len(stations)} stations in {time() - start_time:.2f} seconds.") diff --git a/trainvel/core/migrations/0002_alter_station_parent_station_alter_station_same_as.py b/trainvel/core/migrations/0002_alter_station_parent_station_alter_station_same_as.py new file mode 100644 index 0000000..4685a7b --- /dev/null +++ b/trainvel/core/migrations/0002_alter_station_parent_station_alter_station_same_as.py @@ -0,0 +1,40 @@ +# Generated by Django 5.0.6 on 2024-05-12 11:09 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("core", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="station", + name="parent_station", + field=models.ForeignKey( + blank=True, + default=None, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="children", + to="core.station", + verbose_name="parent station", + ), + ), + migrations.AlterField( + model_name="station", + name="same_as", + field=models.ForeignKey( + blank=True, + default=None, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="same_as_other", + to="core.station", + verbose_name="same as", + ), + ), + ] diff --git a/trainvel/core/models.py b/trainvel/core/models.py index 33230fc..bec865f 100644 --- a/trainvel/core/models.py +++ b/trainvel/core/models.py @@ -53,7 +53,7 @@ class Station(models.Model): parent_station = models.ForeignKey( "Station", - on_delete=models.CASCADE, + on_delete=models.SET_NULL, verbose_name=_("parent station"), blank=True, null=True, @@ -333,7 +333,7 @@ class Station(models.Model): same_as = models.ForeignKey( "Station", - on_delete=models.CASCADE, + on_delete=models.SET_NULL, verbose_name=_("same as"), blank=True, null=True, diff --git a/trainvel/gtfs/management/commands/update_trainvel_gtfs.py b/trainvel/gtfs/management/commands/update_trainvel_gtfs.py index bbb0cdb..bd9e201 100644 --- a/trainvel/gtfs/management/commands/update_trainvel_gtfs.py +++ b/trainvel/gtfs/management/commands/update_trainvel_gtfs.py @@ -8,6 +8,7 @@ from zoneinfo import ZoneInfo import requests from django.core.management import BaseCommand +from django.db import transaction from tqdm import tqdm from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \ @@ -216,67 +217,59 @@ class Command(BaseCommand): calendars[calendar.id] = calendar if len(calendars) >= bulk_size and not dry_run: - Calendar.objects.bulk_create(calendars.values(), - update_conflicts=True, - update_fields=['monday', 'tuesday', 'wednesday', 'thursday', - 'friday', 'saturday', 'sunday', 'start_date', - 'end_date', 'gtfs_feed'], - unique_fields=['id']) + Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size) calendars.clear() + if calendars and not dry_run: - Calendar.objects.bulk_create(calendars.values(), update_conflicts=True, - update_fields=['monday', 'tuesday', 'wednesday', 'thursday', - 'friday', 'saturday', 'sunday', 'start_date', - 'end_date', 'gtfs_feed'], - unique_fields=['id']) + Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size) calendars.clear() calendar_dates = [] - for calendar_date_dict in read_csv("calendar_dates.txt"): - calendar_date_dict: dict - calendar_date = CalendarDate( - id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}", - service_id=f"{gtfs_code}-{calendar_date_dict['service_id']}", - date=calendar_date_dict['date'], - exception_type=calendar_date_dict['exception_type'], - ) - calendar_dates.append(calendar_date) + with transaction.atomic(): + for calendar_date_dict in read_csv("calendar_dates.txt"): + calendar_date_dict: dict + service_id = f"{gtfs_code}-{calendar_date_dict['service_id']}" + date = calendar_date_dict['date'] - if calendar_date.service_id not in calendars: - calendar = Calendar( - id=f"{gtfs_code}-{calendar_date_dict['service_id']}", - monday=False, - tuesday=False, - wednesday=False, - thursday=False, - friday=False, - saturday=False, - sunday=False, - start_date=calendar_date_dict['date'], - end_date=calendar_date_dict['date'], - gtfs_feed_id=gtfs_code, + calendar_date = CalendarDate( + id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}", + service_id=service_id, + date=date, + exception_type=calendar_date_dict['exception_type'], ) - calendars[calendar.id] = calendar - else: - calendar = calendars[f"{gtfs_code}-{calendar_date_dict['service_id']}"] - if calendar.start_date > calendar_date.date: - calendar.start_date = calendar_date.date - if calendar.end_date < calendar_date.date: - calendar.end_date = calendar_date.date + calendar_dates.append(calendar_date) - if calendar_dates and not dry_run: - Calendar.objects.bulk_create(calendars.values(), - batch_size=bulk_size, - update_conflicts=True, - update_fields=['start_date', 'end_date', 'gtfs_feed'], - unique_fields=['id']) - CalendarDate.objects.bulk_create(calendar_dates, - batch_size=bulk_size, - update_conflicts=True, - update_fields=['service_id', 'date', 'exception_type'], - unique_fields=['id']) - calendars.clear() - calendar_dates.clear() + if not Calendar.objects.filter(id=calendar_date.service_id).exists(): + calendar = Calendar( + id=service_id, + monday=False, + tuesday=False, + wednesday=False, + thursday=False, + friday=False, + saturday=False, + sunday=False, + start_date=calendar_date_dict['date'], + end_date=calendar_date_dict['date'], + gtfs_feed_id=gtfs_code, + ) + calendar.save() + else: + calendar = Calendar.objects.get(id=service_id) + if calendar.start_date.isoformat() > date: + calendar.start_date = date + calendar.save() + if calendar.end_date.isoformat() < date: + calendar.end_date = date + calendar.save() + + if len(calendar_dates) >= bulk_size and not dry_run: + CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size) + calendar_dates.clear() + + if calendar_dates and not dry_run: + CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size) + calendar_dates.clear() trips = [] # start_time = time()