More optimizations
This commit is contained in:
parent
15239117f5
commit
68b8606688
|
@ -1,7 +1,9 @@
|
|||
import csv
|
||||
from time import time
|
||||
|
||||
import requests
|
||||
from django.core.management import BaseCommand
|
||||
from django.db import transaction
|
||||
from tqdm import tqdm
|
||||
|
||||
from trainvel.core.models import Station
|
||||
|
@ -12,7 +14,7 @@ class Command(BaseCommand):
|
|||
def convert_value(value: str) -> str:
|
||||
return True if value == 't' else False if value == 'f' else (value or None)
|
||||
|
||||
stations, stations_without_fk = [], []
|
||||
stations = []
|
||||
|
||||
STATIONS_URL = "https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv"
|
||||
with requests.get(STATIONS_URL, stream=True) as resp:
|
||||
|
@ -21,13 +23,13 @@ class Command(BaseCommand):
|
|||
values = {k.replace(':', '_').replace('normalised_code', 'normalized_code_trainline')
|
||||
.replace('same_as', 'same_as_id'): convert_value(v)
|
||||
for k, v in row.items()}
|
||||
values_without_fk = values.copy()
|
||||
del values_without_fk['same_as_id']
|
||||
del values_without_fk['parent_station_id']
|
||||
stations.append(Station(**values))
|
||||
stations_without_fk.append(Station(**values_without_fk))
|
||||
|
||||
Station.objects.bulk_create(stations_without_fk, update_conflicts=True, unique_fields=['id'],
|
||||
update_fields=[k for k in values_without_fk.keys() if k != 'id'])
|
||||
Station.objects.bulk_create(stations, update_conflicts=True, unique_fields=['id'],
|
||||
update_fields=['same_as_id', 'parent_station_id'])
|
||||
Station.objects.all().delete()
|
||||
if options['verbosity'] >= 1:
|
||||
self.stdout.write("Deleted all stations.")
|
||||
start_time = time()
|
||||
with transaction.atomic():
|
||||
Station.objects.bulk_create(stations)
|
||||
if options['verbosity'] >= 1:
|
||||
self.stdout.write(f"Inserted {len(stations)} stations in {time() - start_time:.2f} seconds.")
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-12 11:09
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("core", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="station",
|
||||
name="parent_station",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="children",
|
||||
to="core.station",
|
||||
verbose_name="parent station",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="station",
|
||||
name="same_as",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="same_as_other",
|
||||
to="core.station",
|
||||
verbose_name="same as",
|
||||
),
|
||||
),
|
||||
]
|
|
@ -53,7 +53,7 @@ class Station(models.Model):
|
|||
|
||||
parent_station = models.ForeignKey(
|
||||
"Station",
|
||||
on_delete=models.CASCADE,
|
||||
on_delete=models.SET_NULL,
|
||||
verbose_name=_("parent station"),
|
||||
blank=True,
|
||||
null=True,
|
||||
|
@ -333,7 +333,7 @@ class Station(models.Model):
|
|||
|
||||
same_as = models.ForeignKey(
|
||||
"Station",
|
||||
on_delete=models.CASCADE,
|
||||
on_delete=models.SET_NULL,
|
||||
verbose_name=_("same as"),
|
||||
blank=True,
|
||||
null=True,
|
||||
|
|
|
@ -8,6 +8,7 @@ from zoneinfo import ZoneInfo
|
|||
|
||||
import requests
|
||||
from django.core.management import BaseCommand
|
||||
from django.db import transaction
|
||||
from tqdm import tqdm
|
||||
|
||||
from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \
|
||||
|
@ -216,35 +217,31 @@ class Command(BaseCommand):
|
|||
calendars[calendar.id] = calendar
|
||||
|
||||
if len(calendars) >= bulk_size and not dry_run:
|
||||
Calendar.objects.bulk_create(calendars.values(),
|
||||
update_conflicts=True,
|
||||
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
|
||||
'friday', 'saturday', 'sunday', 'start_date',
|
||||
'end_date', 'gtfs_feed'],
|
||||
unique_fields=['id'])
|
||||
Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size)
|
||||
calendars.clear()
|
||||
|
||||
if calendars and not dry_run:
|
||||
Calendar.objects.bulk_create(calendars.values(), update_conflicts=True,
|
||||
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
|
||||
'friday', 'saturday', 'sunday', 'start_date',
|
||||
'end_date', 'gtfs_feed'],
|
||||
unique_fields=['id'])
|
||||
Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size)
|
||||
calendars.clear()
|
||||
|
||||
calendar_dates = []
|
||||
with transaction.atomic():
|
||||
for calendar_date_dict in read_csv("calendar_dates.txt"):
|
||||
calendar_date_dict: dict
|
||||
service_id = f"{gtfs_code}-{calendar_date_dict['service_id']}"
|
||||
date = calendar_date_dict['date']
|
||||
|
||||
calendar_date = CalendarDate(
|
||||
id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
|
||||
service_id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
|
||||
date=calendar_date_dict['date'],
|
||||
service_id=service_id,
|
||||
date=date,
|
||||
exception_type=calendar_date_dict['exception_type'],
|
||||
)
|
||||
calendar_dates.append(calendar_date)
|
||||
|
||||
if calendar_date.service_id not in calendars:
|
||||
if not Calendar.objects.filter(id=calendar_date.service_id).exists():
|
||||
calendar = Calendar(
|
||||
id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
|
||||
id=service_id,
|
||||
monday=False,
|
||||
tuesday=False,
|
||||
wednesday=False,
|
||||
|
@ -256,26 +253,22 @@ class Command(BaseCommand):
|
|||
end_date=calendar_date_dict['date'],
|
||||
gtfs_feed_id=gtfs_code,
|
||||
)
|
||||
calendars[calendar.id] = calendar
|
||||
calendar.save()
|
||||
else:
|
||||
calendar = calendars[f"{gtfs_code}-{calendar_date_dict['service_id']}"]
|
||||
if calendar.start_date > calendar_date.date:
|
||||
calendar.start_date = calendar_date.date
|
||||
if calendar.end_date < calendar_date.date:
|
||||
calendar.end_date = calendar_date.date
|
||||
calendar = Calendar.objects.get(id=service_id)
|
||||
if calendar.start_date.isoformat() > date:
|
||||
calendar.start_date = date
|
||||
calendar.save()
|
||||
if calendar.end_date.isoformat() < date:
|
||||
calendar.end_date = date
|
||||
calendar.save()
|
||||
|
||||
if len(calendar_dates) >= bulk_size and not dry_run:
|
||||
CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size)
|
||||
calendar_dates.clear()
|
||||
|
||||
if calendar_dates and not dry_run:
|
||||
Calendar.objects.bulk_create(calendars.values(),
|
||||
batch_size=bulk_size,
|
||||
update_conflicts=True,
|
||||
update_fields=['start_date', 'end_date', 'gtfs_feed'],
|
||||
unique_fields=['id'])
|
||||
CalendarDate.objects.bulk_create(calendar_dates,
|
||||
batch_size=bulk_size,
|
||||
update_conflicts=True,
|
||||
update_fields=['service_id', 'date', 'exception_type'],
|
||||
unique_fields=['id'])
|
||||
calendars.clear()
|
||||
CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size)
|
||||
calendar_dates.clear()
|
||||
|
||||
trips = []
|
||||
|
|
Loading…
Reference in New Issue