More optimizations

This commit is contained in:
Emmy D'Anello 2024-05-12 13:44:25 +02:00
parent 15239117f5
commit 68b8606688
Signed by: ynerant
GPG Key ID: 3A75C55819C8CF85
4 changed files with 99 additions and 64 deletions

View File

@ -1,7 +1,9 @@
import csv import csv
from time import time
import requests import requests
from django.core.management import BaseCommand from django.core.management import BaseCommand
from django.db import transaction
from tqdm import tqdm from tqdm import tqdm
from trainvel.core.models import Station from trainvel.core.models import Station
@ -12,7 +14,7 @@ class Command(BaseCommand):
def convert_value(value: str) -> str: def convert_value(value: str) -> str:
return True if value == 't' else False if value == 'f' else (value or None) return True if value == 't' else False if value == 'f' else (value or None)
stations, stations_without_fk = [], [] stations = []
STATIONS_URL = "https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv" STATIONS_URL = "https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv"
with requests.get(STATIONS_URL, stream=True) as resp: with requests.get(STATIONS_URL, stream=True) as resp:
@ -21,13 +23,13 @@ class Command(BaseCommand):
values = {k.replace(':', '_').replace('normalised_code', 'normalized_code_trainline') values = {k.replace(':', '_').replace('normalised_code', 'normalized_code_trainline')
.replace('same_as', 'same_as_id'): convert_value(v) .replace('same_as', 'same_as_id'): convert_value(v)
for k, v in row.items()} for k, v in row.items()}
values_without_fk = values.copy()
del values_without_fk['same_as_id']
del values_without_fk['parent_station_id']
stations.append(Station(**values)) stations.append(Station(**values))
stations_without_fk.append(Station(**values_without_fk))
Station.objects.bulk_create(stations_without_fk, update_conflicts=True, unique_fields=['id'], Station.objects.all().delete()
update_fields=[k for k in values_without_fk.keys() if k != 'id']) if options['verbosity'] >= 1:
Station.objects.bulk_create(stations, update_conflicts=True, unique_fields=['id'], self.stdout.write("Deleted all stations.")
update_fields=['same_as_id', 'parent_station_id']) start_time = time()
with transaction.atomic():
Station.objects.bulk_create(stations)
if options['verbosity'] >= 1:
self.stdout.write(f"Inserted {len(stations)} stations in {time() - start_time:.2f} seconds.")

View File

@ -0,0 +1,40 @@
# Generated by Django 5.0.6 on 2024-05-12 11:09
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("core", "0001_initial"),
]
operations = [
migrations.AlterField(
model_name="station",
name="parent_station",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="children",
to="core.station",
verbose_name="parent station",
),
),
migrations.AlterField(
model_name="station",
name="same_as",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="same_as_other",
to="core.station",
verbose_name="same as",
),
),
]

View File

@ -53,7 +53,7 @@ class Station(models.Model):
parent_station = models.ForeignKey( parent_station = models.ForeignKey(
"Station", "Station",
on_delete=models.CASCADE, on_delete=models.SET_NULL,
verbose_name=_("parent station"), verbose_name=_("parent station"),
blank=True, blank=True,
null=True, null=True,
@ -333,7 +333,7 @@ class Station(models.Model):
same_as = models.ForeignKey( same_as = models.ForeignKey(
"Station", "Station",
on_delete=models.CASCADE, on_delete=models.SET_NULL,
verbose_name=_("same as"), verbose_name=_("same as"),
blank=True, blank=True,
null=True, null=True,

View File

@ -8,6 +8,7 @@ from zoneinfo import ZoneInfo
import requests import requests
from django.core.management import BaseCommand from django.core.management import BaseCommand
from django.db import transaction
from tqdm import tqdm from tqdm import tqdm
from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \ from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \
@ -216,67 +217,59 @@ class Command(BaseCommand):
calendars[calendar.id] = calendar calendars[calendar.id] = calendar
if len(calendars) >= bulk_size and not dry_run: if len(calendars) >= bulk_size and not dry_run:
Calendar.objects.bulk_create(calendars.values(), Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size)
update_conflicts=True,
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'gtfs_feed'],
unique_fields=['id'])
calendars.clear() calendars.clear()
if calendars and not dry_run: if calendars and not dry_run:
Calendar.objects.bulk_create(calendars.values(), update_conflicts=True, Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size)
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'gtfs_feed'],
unique_fields=['id'])
calendars.clear() calendars.clear()
calendar_dates = [] calendar_dates = []
for calendar_date_dict in read_csv("calendar_dates.txt"): with transaction.atomic():
calendar_date_dict: dict for calendar_date_dict in read_csv("calendar_dates.txt"):
calendar_date = CalendarDate( calendar_date_dict: dict
id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}", service_id = f"{gtfs_code}-{calendar_date_dict['service_id']}"
service_id=f"{gtfs_code}-{calendar_date_dict['service_id']}", date = calendar_date_dict['date']
date=calendar_date_dict['date'],
exception_type=calendar_date_dict['exception_type'],
)
calendar_dates.append(calendar_date)
if calendar_date.service_id not in calendars: calendar_date = CalendarDate(
calendar = Calendar( id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
id=f"{gtfs_code}-{calendar_date_dict['service_id']}", service_id=service_id,
monday=False, date=date,
tuesday=False, exception_type=calendar_date_dict['exception_type'],
wednesday=False,
thursday=False,
friday=False,
saturday=False,
sunday=False,
start_date=calendar_date_dict['date'],
end_date=calendar_date_dict['date'],
gtfs_feed_id=gtfs_code,
) )
calendars[calendar.id] = calendar calendar_dates.append(calendar_date)
else:
calendar = calendars[f"{gtfs_code}-{calendar_date_dict['service_id']}"]
if calendar.start_date > calendar_date.date:
calendar.start_date = calendar_date.date
if calendar.end_date < calendar_date.date:
calendar.end_date = calendar_date.date
if calendar_dates and not dry_run: if not Calendar.objects.filter(id=calendar_date.service_id).exists():
Calendar.objects.bulk_create(calendars.values(), calendar = Calendar(
batch_size=bulk_size, id=service_id,
update_conflicts=True, monday=False,
update_fields=['start_date', 'end_date', 'gtfs_feed'], tuesday=False,
unique_fields=['id']) wednesday=False,
CalendarDate.objects.bulk_create(calendar_dates, thursday=False,
batch_size=bulk_size, friday=False,
update_conflicts=True, saturday=False,
update_fields=['service_id', 'date', 'exception_type'], sunday=False,
unique_fields=['id']) start_date=calendar_date_dict['date'],
calendars.clear() end_date=calendar_date_dict['date'],
calendar_dates.clear() gtfs_feed_id=gtfs_code,
)
calendar.save()
else:
calendar = Calendar.objects.get(id=service_id)
if calendar.start_date.isoformat() > date:
calendar.start_date = date
calendar.save()
if calendar.end_date.isoformat() < date:
calendar.end_date = date
calendar.save()
if len(calendar_dates) >= bulk_size and not dry_run:
CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size)
calendar_dates.clear()
if calendar_dates and not dry_run:
CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size)
calendar_dates.clear()
trips = [] trips = []
# start_time = time() # start_time = time()