More optimizations
This commit is contained in:
parent
15239117f5
commit
68b8606688
|
@ -1,7 +1,9 @@
|
||||||
import csv
|
import csv
|
||||||
|
from time import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
|
from django.db import transaction
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from trainvel.core.models import Station
|
from trainvel.core.models import Station
|
||||||
|
@ -12,7 +14,7 @@ class Command(BaseCommand):
|
||||||
def convert_value(value: str) -> str:
|
def convert_value(value: str) -> str:
|
||||||
return True if value == 't' else False if value == 'f' else (value or None)
|
return True if value == 't' else False if value == 'f' else (value or None)
|
||||||
|
|
||||||
stations, stations_without_fk = [], []
|
stations = []
|
||||||
|
|
||||||
STATIONS_URL = "https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv"
|
STATIONS_URL = "https://raw.githubusercontent.com/trainline-eu/stations/master/stations.csv"
|
||||||
with requests.get(STATIONS_URL, stream=True) as resp:
|
with requests.get(STATIONS_URL, stream=True) as resp:
|
||||||
|
@ -21,13 +23,13 @@ class Command(BaseCommand):
|
||||||
values = {k.replace(':', '_').replace('normalised_code', 'normalized_code_trainline')
|
values = {k.replace(':', '_').replace('normalised_code', 'normalized_code_trainline')
|
||||||
.replace('same_as', 'same_as_id'): convert_value(v)
|
.replace('same_as', 'same_as_id'): convert_value(v)
|
||||||
for k, v in row.items()}
|
for k, v in row.items()}
|
||||||
values_without_fk = values.copy()
|
|
||||||
del values_without_fk['same_as_id']
|
|
||||||
del values_without_fk['parent_station_id']
|
|
||||||
stations.append(Station(**values))
|
stations.append(Station(**values))
|
||||||
stations_without_fk.append(Station(**values_without_fk))
|
|
||||||
|
|
||||||
Station.objects.bulk_create(stations_without_fk, update_conflicts=True, unique_fields=['id'],
|
Station.objects.all().delete()
|
||||||
update_fields=[k for k in values_without_fk.keys() if k != 'id'])
|
if options['verbosity'] >= 1:
|
||||||
Station.objects.bulk_create(stations, update_conflicts=True, unique_fields=['id'],
|
self.stdout.write("Deleted all stations.")
|
||||||
update_fields=['same_as_id', 'parent_station_id'])
|
start_time = time()
|
||||||
|
with transaction.atomic():
|
||||||
|
Station.objects.bulk_create(stations)
|
||||||
|
if options['verbosity'] >= 1:
|
||||||
|
self.stdout.write(f"Inserted {len(stations)} stations in {time() - start_time:.2f} seconds.")
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
# Generated by Django 5.0.6 on 2024-05-12 11:09
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("core", "0001_initial"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="station",
|
||||||
|
name="parent_station",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
default=None,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="children",
|
||||||
|
to="core.station",
|
||||||
|
verbose_name="parent station",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="station",
|
||||||
|
name="same_as",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
default=None,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="same_as_other",
|
||||||
|
to="core.station",
|
||||||
|
verbose_name="same as",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
|
@ -53,7 +53,7 @@ class Station(models.Model):
|
||||||
|
|
||||||
parent_station = models.ForeignKey(
|
parent_station = models.ForeignKey(
|
||||||
"Station",
|
"Station",
|
||||||
on_delete=models.CASCADE,
|
on_delete=models.SET_NULL,
|
||||||
verbose_name=_("parent station"),
|
verbose_name=_("parent station"),
|
||||||
blank=True,
|
blank=True,
|
||||||
null=True,
|
null=True,
|
||||||
|
@ -333,7 +333,7 @@ class Station(models.Model):
|
||||||
|
|
||||||
same_as = models.ForeignKey(
|
same_as = models.ForeignKey(
|
||||||
"Station",
|
"Station",
|
||||||
on_delete=models.CASCADE,
|
on_delete=models.SET_NULL,
|
||||||
verbose_name=_("same as"),
|
verbose_name=_("same as"),
|
||||||
blank=True,
|
blank=True,
|
||||||
null=True,
|
null=True,
|
||||||
|
|
|
@ -8,6 +8,7 @@ from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
|
from django.db import transaction
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \
|
from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \
|
||||||
|
@ -216,67 +217,59 @@ class Command(BaseCommand):
|
||||||
calendars[calendar.id] = calendar
|
calendars[calendar.id] = calendar
|
||||||
|
|
||||||
if len(calendars) >= bulk_size and not dry_run:
|
if len(calendars) >= bulk_size and not dry_run:
|
||||||
Calendar.objects.bulk_create(calendars.values(),
|
Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size)
|
||||||
update_conflicts=True,
|
|
||||||
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
|
|
||||||
'friday', 'saturday', 'sunday', 'start_date',
|
|
||||||
'end_date', 'gtfs_feed'],
|
|
||||||
unique_fields=['id'])
|
|
||||||
calendars.clear()
|
calendars.clear()
|
||||||
|
|
||||||
if calendars and not dry_run:
|
if calendars and not dry_run:
|
||||||
Calendar.objects.bulk_create(calendars.values(), update_conflicts=True,
|
Calendar.objects.bulk_create(calendars.values(), batch_size=bulk_size)
|
||||||
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
|
|
||||||
'friday', 'saturday', 'sunday', 'start_date',
|
|
||||||
'end_date', 'gtfs_feed'],
|
|
||||||
unique_fields=['id'])
|
|
||||||
calendars.clear()
|
calendars.clear()
|
||||||
|
|
||||||
calendar_dates = []
|
calendar_dates = []
|
||||||
for calendar_date_dict in read_csv("calendar_dates.txt"):
|
with transaction.atomic():
|
||||||
calendar_date_dict: dict
|
for calendar_date_dict in read_csv("calendar_dates.txt"):
|
||||||
calendar_date = CalendarDate(
|
calendar_date_dict: dict
|
||||||
id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
|
service_id = f"{gtfs_code}-{calendar_date_dict['service_id']}"
|
||||||
service_id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
|
date = calendar_date_dict['date']
|
||||||
date=calendar_date_dict['date'],
|
|
||||||
exception_type=calendar_date_dict['exception_type'],
|
|
||||||
)
|
|
||||||
calendar_dates.append(calendar_date)
|
|
||||||
|
|
||||||
if calendar_date.service_id not in calendars:
|
calendar_date = CalendarDate(
|
||||||
calendar = Calendar(
|
id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
|
||||||
id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
|
service_id=service_id,
|
||||||
monday=False,
|
date=date,
|
||||||
tuesday=False,
|
exception_type=calendar_date_dict['exception_type'],
|
||||||
wednesday=False,
|
|
||||||
thursday=False,
|
|
||||||
friday=False,
|
|
||||||
saturday=False,
|
|
||||||
sunday=False,
|
|
||||||
start_date=calendar_date_dict['date'],
|
|
||||||
end_date=calendar_date_dict['date'],
|
|
||||||
gtfs_feed_id=gtfs_code,
|
|
||||||
)
|
)
|
||||||
calendars[calendar.id] = calendar
|
calendar_dates.append(calendar_date)
|
||||||
else:
|
|
||||||
calendar = calendars[f"{gtfs_code}-{calendar_date_dict['service_id']}"]
|
|
||||||
if calendar.start_date > calendar_date.date:
|
|
||||||
calendar.start_date = calendar_date.date
|
|
||||||
if calendar.end_date < calendar_date.date:
|
|
||||||
calendar.end_date = calendar_date.date
|
|
||||||
|
|
||||||
if calendar_dates and not dry_run:
|
if not Calendar.objects.filter(id=calendar_date.service_id).exists():
|
||||||
Calendar.objects.bulk_create(calendars.values(),
|
calendar = Calendar(
|
||||||
batch_size=bulk_size,
|
id=service_id,
|
||||||
update_conflicts=True,
|
monday=False,
|
||||||
update_fields=['start_date', 'end_date', 'gtfs_feed'],
|
tuesday=False,
|
||||||
unique_fields=['id'])
|
wednesday=False,
|
||||||
CalendarDate.objects.bulk_create(calendar_dates,
|
thursday=False,
|
||||||
batch_size=bulk_size,
|
friday=False,
|
||||||
update_conflicts=True,
|
saturday=False,
|
||||||
update_fields=['service_id', 'date', 'exception_type'],
|
sunday=False,
|
||||||
unique_fields=['id'])
|
start_date=calendar_date_dict['date'],
|
||||||
calendars.clear()
|
end_date=calendar_date_dict['date'],
|
||||||
calendar_dates.clear()
|
gtfs_feed_id=gtfs_code,
|
||||||
|
)
|
||||||
|
calendar.save()
|
||||||
|
else:
|
||||||
|
calendar = Calendar.objects.get(id=service_id)
|
||||||
|
if calendar.start_date.isoformat() > date:
|
||||||
|
calendar.start_date = date
|
||||||
|
calendar.save()
|
||||||
|
if calendar.end_date.isoformat() < date:
|
||||||
|
calendar.end_date = date
|
||||||
|
calendar.save()
|
||||||
|
|
||||||
|
if len(calendar_dates) >= bulk_size and not dry_run:
|
||||||
|
CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size)
|
||||||
|
calendar_dates.clear()
|
||||||
|
|
||||||
|
if calendar_dates and not dry_run:
|
||||||
|
CalendarDate.objects.bulk_create(calendar_dates, batch_size=bulk_size)
|
||||||
|
calendar_dates.clear()
|
||||||
|
|
||||||
trips = []
|
trips = []
|
||||||
# start_time = time()
|
# start_time = time()
|
||||||
|
|
Loading…
Reference in New Issue