More optimizations

This commit is contained in:
Emmy D'Anello 2024-05-12 13:44:25 +02:00
parent 15239117f5
commit 68b8606688
Signed by: ynerant
GPG Key ID: 3A75C55819C8CF85
4 changed files with 99 additions and 64 deletions

View File

@ -1,7 +1,9 @@
import csv
from time import time
import requests
from django.core.management import BaseCommand
from django.db import transaction
from tqdm import tqdm
from trainvel.core.models import Station
@ -12,7 +14,7 @@ class Command(BaseCommand):
def convert_value(value: str) -> str:
return True if value == 't' else False if value == 'f' else (value or None)
stations, stations_without_fk = [], []
stations = []
STATIONS_URL = "https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv"
with requests.get(STATIONS_URL, stream=True) as resp:
@ -21,13 +23,13 @@ class Command(BaseCommand):
values = {k.replace(':', '_').replace('normalised_code', 'normalized_code_trainline')
.replace('same_as', 'same_as_id'): convert_value(v)
for k, v in row.items()}
values_without_fk = values.copy()
del values_without_fk['same_as_id']
del values_without_fk['parent_station_id']
stations.append(Station(**values))
stations_without_fk.append(Station(**values_without_fk))
Station.objects.bulk_create(stations_without_fk, update_conflicts=True, unique_fields=['id'],
update_fields=[k for k in values_without_fk.keys() if k != 'id'])
Station.objects.bulk_create(stations, update_conflicts=True, unique_fields=['id'],
update_fields=['same_as_id', 'parent_station_id'])
Station.objects.all().delete()
if options['verbosity'] >= 1:
self.stdout.write("Deleted all stations.")
start_time = time()
with transaction.atomic():
Station.objects.bulk_create(stations)
if options['verbosity'] >= 1:
self.stdout.write(f"Inserted {len(stations)} stations in {time() - start_time:.2f} seconds.")

View File

@ -0,0 +1,40 @@
# Generated by Django 5.0.6 on 2024-05-12 11:09
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("core", "0001_initial"),
]
operations = [
migrations.AlterField(
model_name="station",
name="parent_station",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="children",
to="core.station",
verbose_name="parent station",
),
),
migrations.AlterField(
model_name="station",
name="same_as",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="same_as_other",
to="core.station",
verbose_name="same as",
),
),
]

View File

@ -53,7 +53,7 @@ class Station(models.Model):
parent_station = models.ForeignKey(
"Station",
on_delete=models.CASCADE,
on_delete=models.SET_NULL,
verbose_name=_("parent station"),
blank=True,
null=True,
@ -333,7 +333,7 @@ class Station(models.Model):
same_as = models.ForeignKey(
"Station",
on_delete=models.CASCADE,
on_delete=models.SET_NULL,
verbose_name=_("same as"),
blank=True,
null=True,

View File

@ -8,6 +8,7 @@ from zoneinfo import ZoneInfo
import requests
from django.core.management import BaseCommand
from django.db import transaction
from tqdm import tqdm
from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \
@ -216,67 +217,59 @@ class Command(BaseCommand):
calendars[calendar.id] = calendar
if len(calendars) >= bulk_size and not dry_run:
Calendar.objects.bulk_create(calendars.values(),
update_conflicts=True,
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'gtfs_feed'],
unique_fields=['id'])
Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size)
calendars.clear()
if calendars and not dry_run:
Calendar.objects.bulk_create(calendars.values(), update_conflicts=True,
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'gtfs_feed'],
unique_fields=['id'])
Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size)
calendars.clear()
calendar_dates = []
for calendar_date_dict in read_csv("calendar_dates.txt"):
calendar_date_dict: dict
calendar_date = CalendarDate(
id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
service_id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
date=calendar_date_dict['date'],
exception_type=calendar_date_dict['exception_type'],
)
calendar_dates.append(calendar_date)
with transaction.atomic():
for calendar_date_dict in read_csv("calendar_dates.txt"):
calendar_date_dict: dict
service_id = f"{gtfs_code}-{calendar_date_dict['service_id']}"
date = calendar_date_dict['date']
if calendar_date.service_id not in calendars:
calendar = Calendar(
id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
monday=False,
tuesday=False,
wednesday=False,
thursday=False,
friday=False,
saturday=False,
sunday=False,
start_date=calendar_date_dict['date'],
end_date=calendar_date_dict['date'],
gtfs_feed_id=gtfs_code,
calendar_date = CalendarDate(
id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
service_id=service_id,
date=date,
exception_type=calendar_date_dict['exception_type'],
)
calendars[calendar.id] = calendar
else:
calendar = calendars[f"{gtfs_code}-{calendar_date_dict['service_id']}"]
if calendar.start_date > calendar_date.date:
calendar.start_date = calendar_date.date
if calendar.end_date < calendar_date.date:
calendar.end_date = calendar_date.date
calendar_dates.append(calendar_date)
if calendar_dates and not dry_run:
Calendar.objects.bulk_create(calendars.values(),
batch_size=bulk_size,
update_conflicts=True,
update_fields=['start_date', 'end_date', 'gtfs_feed'],
unique_fields=['id'])
CalendarDate.objects.bulk_create(calendar_dates,
batch_size=bulk_size,
update_conflicts=True,
update_fields=['service_id', 'date', 'exception_type'],
unique_fields=['id'])
calendars.clear()
calendar_dates.clear()
if not Calendar.objects.filter(id=calendar_date.service_id).exists():
calendar = Calendar(
id=service_id,
monday=False,
tuesday=False,
wednesday=False,
thursday=False,
friday=False,
saturday=False,
sunday=False,
start_date=calendar_date_dict['date'],
end_date=calendar_date_dict['date'],
gtfs_feed_id=gtfs_code,
)
calendar.save()
else:
calendar = Calendar.objects.get(id=service_id)
if calendar.start_date.isoformat() > date:
calendar.start_date = date
calendar.save()
if calendar.end_date.isoformat() < date:
calendar.end_date = date
calendar.save()
if len(calendar_dates) >= bulk_size and not dry_run:
CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size)
calendar_dates.clear()
if calendar_dates and not dry_run:
CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size)
calendar_dates.clear()
trips = []
# start_time = time()