More optimization

This commit is contained in:
Emmy D'Anello 2024-05-12 11:52:44 +02:00
parent eade9e84de
commit b85a1b7734
Signed by: ynerant
GPG Key ID: 3A75C55819C8CF85
3 changed files with 381 additions and 337 deletions

View File

@ -1,6 +1,8 @@
import csv import csv
import os.path
import tempfile
from datetime import datetime, timedelta from datetime import datetime, timedelta
from io import BytesIO, TextIOWrapper from time import time
from zipfile import ZipFile from zipfile import ZipFile
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
@ -9,7 +11,7 @@ from django.core.management import BaseCommand
from tqdm import tqdm from tqdm import tqdm
from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \ from trainvel.gtfs.models import Agency, Calendar, CalendarDate, FeedInfo, GTFSFeed, Route, Stop, StopTime, \
Transfer, Trip, PickupType Transfer, Trip, PickupType, TripUpdate
class Command(BaseCommand): class Command(BaseCommand):
@ -17,7 +19,7 @@ class Command(BaseCommand):
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument('--debug', '-d', action='store_true', help="Activate debug mode") parser.add_argument('--debug', '-d', action='store_true', help="Activate debug mode")
parser.add_argument('--bulk_size', type=int, default=1000, help="Number of objects to create in bulk.") parser.add_argument('--bulk_size', '-b', type=int, default=10000, help="Number of objects to create in bulk.")
parser.add_argument('--dry-run', action='store_true', parser.add_argument('--dry-run', action='store_true',
help="Do not update the database, only print what would be done.") help="Do not update the database, only print what would be done.")
parser.add_argument('--force', '-f', action='store_true', help="Force the update of the database.") parser.add_argument('--force', '-f', action='store_true', help="Force the update of the database.")
@ -30,8 +32,6 @@ class Command(BaseCommand):
self.stdout.write("Updating database...") self.stdout.write("Updating database...")
for gtfs_feed in GTFSFeed.objects.all(): for gtfs_feed in GTFSFeed.objects.all():
gtfs_code = gtfs_feed.code
if not force: if not force:
# Check if the source file was updated # Check if the source file was updated
resp = requests.head(gtfs_feed.feed_url, allow_redirects=True) resp = requests.head(gtfs_feed.feed_url, allow_redirects=True)
@ -51,340 +51,17 @@ class Command(BaseCommand):
self.stdout.write(f"Downloading GTFS feed for {gtfs_feed}...") self.stdout.write(f"Downloading GTFS feed for {gtfs_feed}...")
resp = requests.get(gtfs_feed.feed_url, allow_redirects=True, stream=True) resp = requests.get(gtfs_feed.feed_url, allow_redirects=True, stream=True)
with ZipFile(BytesIO(resp.content)) as zipfile:
def read_csv(filename):
with zipfile.open(filename, 'r') as zf:
with TextIOWrapper(zf, encoding='utf-8') as wrapper:
reader = csv.DictReader(wrapper)
reader.fieldnames = [field.replace('\ufeff', '').strip()
for field in reader.fieldnames]
for row in tqdm(reader, desc=filename, unit=' rows'):
yield {k.strip(): v.strip() for k, v in row.items()}
agencies = [] with tempfile.TemporaryFile(suffix=".zip") as file:
for agency_dict in read_csv("agency.txt"): for chunk in resp.iter_content(chunk_size=128):
agency_dict: dict file.write(chunk)
agency = Agency( file.seek(0)
id=f"{gtfs_code}-{agency_dict['agency_id']}",
name=agency_dict['agency_name'],
url=agency_dict['agency_url'],
timezone=agency_dict['agency_timezone'],
lang=agency_dict.get('agency_lang', "fr"),
phone=agency_dict.get('agency_phone', ""),
email=agency_dict.get('agency_email', ""),
gtfs_feed=gtfs_feed,
)
agencies.append(agency)
if agencies and not dry_run:
Agency.objects.bulk_create(agencies,
update_conflicts=True,
update_fields=['name', 'url', 'timezone', 'lang', 'phone', 'email',
'gtfs_feed'],
unique_fields=['id'])
agencies.clear()
stops = [] with tempfile.TemporaryDirectory() as tmp_dir:
for stop_dict in read_csv("stops.txt"): with ZipFile(file) as zipfile:
stop_dict: dict zipfile.extractall(tmp_dir)
stop_id = stop_dict['stop_id']
stop_id = f"{gtfs_code}-{stop_id}"
parent_station_id = stop_dict.get('parent_station', None) self.parse_gtfs(tmp_dir, gtfs_feed, bulk_size, dry_run, verbosity)
parent_station_id = f"{gtfs_code}-{parent_station_id}" if parent_station_id else None
stop = Stop(
id=stop_id,
name=stop_dict['stop_name'],
desc=stop_dict.get('stop_desc', ""),
lat=stop_dict['stop_lat'],
lon=stop_dict['stop_lon'],
zone_id=stop_dict.get('zone_id', ""),
url=stop_dict.get('stop_url', ""),
location_type=stop_dict.get('location_type', 0) or 0,
parent_station_id=parent_station_id,
timezone=stop_dict.get('stop_timezone', ""),
wheelchair_boarding=stop_dict.get('wheelchair_boarding', 0),
level_id=stop_dict.get('level_id', ""),
platform_code=stop_dict.get('platform_code', ""),
gtfs_feed=gtfs_feed,
)
stops.append(stop)
if stops and not dry_run:
Stop.objects.bulk_create(stops,
batch_size=bulk_size,
update_conflicts=True,
update_fields=['name', 'desc', 'lat', 'lon', 'zone_id', 'url',
'location_type', 'parent_station_id', 'timezone',
'wheelchair_boarding', 'level_id', 'platform_code',
'gtfs_feed'],
unique_fields=['id'])
stops.clear()
routes = []
for route_dict in read_csv("routes.txt"):
route_dict: dict
route_id = route_dict['route_id']
route_id = f"{gtfs_code}-{route_id}"
# Agency is optional there is only one
agency_id = route_dict.get('agency_id', "") or Agency.objects.get(gtfs_feed=gtfs_feed)
route = Route(
id=route_id,
agency_id=f"{gtfs_code}-{agency_id}",
short_name=route_dict['route_short_name'],
long_name=route_dict['route_long_name'],
desc=route_dict.get('route_desc', ""),
type=route_dict['route_type'],
url=route_dict.get('route_url', ""),
color=route_dict.get('route_color', ""),
text_color=route_dict.get('route_text_color', ""),
gtfs_feed=gtfs_feed,
)
routes.append(route)
if len(routes) >= bulk_size and not dry_run:
Route.objects.bulk_create(routes,
update_conflicts=True,
update_fields=['agency_id', 'short_name', 'long_name', 'desc',
'type', 'url', 'color', 'text_color',
'gtfs_feed'],
unique_fields=['id'])
routes.clear()
if routes and not dry_run:
Route.objects.bulk_create(routes,
update_conflicts=True,
update_fields=['agency_id', 'short_name', 'long_name', 'desc',
'type', 'url', 'color', 'text_color',
'gtfs_feed'],
unique_fields=['id'])
routes.clear()
Calendar.objects.filter(gtfs_feed=gtfs_feed).delete()
calendars = {}
if "calendar.txt" in zipfile.namelist():
for calendar_dict in read_csv("calendar.txt"):
calendar_dict: dict
calendar = Calendar(
id=f"{gtfs_code}-{calendar_dict['service_id']}",
monday=calendar_dict['monday'],
tuesday=calendar_dict['tuesday'],
wednesday=calendar_dict['wednesday'],
thursday=calendar_dict['thursday'],
friday=calendar_dict['friday'],
saturday=calendar_dict['saturday'],
sunday=calendar_dict['sunday'],
start_date=calendar_dict['start_date'],
end_date=calendar_dict['end_date'],
gtfs_feed=gtfs_feed,
)
calendars[calendar.id] = calendar
if len(calendars) >= bulk_size and not dry_run:
Calendar.objects.bulk_create(calendars.values(),
update_conflicts=True,
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'gtfs_feed'],
unique_fields=['id'])
calendars.clear()
if calendars and not dry_run:
Calendar.objects.bulk_create(calendars.values(), update_conflicts=True,
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'gtfs_feed'],
unique_fields=['id'])
calendars.clear()
calendar_dates = []
for calendar_date_dict in read_csv("calendar_dates.txt"):
calendar_date_dict: dict
calendar_date = CalendarDate(
id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
service_id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
date=calendar_date_dict['date'],
exception_type=calendar_date_dict['exception_type'],
)
calendar_dates.append(calendar_date)
if calendar_date.service_id not in calendars:
calendar = Calendar(
id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
monday=False,
tuesday=False,
wednesday=False,
thursday=False,
friday=False,
saturday=False,
sunday=False,
start_date=calendar_date_dict['date'],
end_date=calendar_date_dict['date'],
gtfs_feed=gtfs_feed,
)
calendars[calendar.id] = calendar
else:
calendar = calendars[f"{gtfs_code}-{calendar_date_dict['service_id']}"]
if calendar.start_date > calendar_date.date:
calendar.start_date = calendar_date.date
if calendar.end_date < calendar_date.date:
calendar.end_date = calendar_date.date
if calendar_dates and not dry_run:
Calendar.objects.bulk_create(calendars.values(),
batch_size=bulk_size,
update_conflicts=True,
update_fields=['start_date', 'end_date', 'gtfs_feed'],
unique_fields=['id'])
CalendarDate.objects.bulk_create(calendar_dates,
batch_size=bulk_size,
update_conflicts=True,
update_fields=['service_id', 'date', 'exception_type'],
unique_fields=['id'])
calendars.clear()
calendar_dates.clear()
trips = []
for trip_dict in read_csv("trips.txt"):
trip_dict: dict
trip_id = trip_dict['trip_id']
route_id = trip_dict['route_id']
trip_id = f"{gtfs_code}-{trip_id}"
route_id = f"{gtfs_code}-{route_id}"
trip = Trip(
id=trip_id,
route_id=route_id,
service_id=f"{gtfs_code}-{trip_dict['service_id']}",
headsign=trip_dict.get('trip_headsign', ""),
short_name=trip_dict.get('trip_short_name', ""),
direction_id=trip_dict.get('direction_id', None) or None,
block_id=trip_dict.get('block_id', ""),
shape_id=trip_dict.get('shape_id', ""),
wheelchair_accessible=trip_dict.get('wheelchair_accessible', None),
bikes_allowed=trip_dict.get('bikes_allowed', None),
gtfs_feed=gtfs_feed,
)
trips.append(trip)
if len(trips) >= bulk_size and not dry_run:
Trip.objects.bulk_create(trips,
update_conflicts=True,
update_fields=['route_id', 'service_id', 'headsign', 'short_name',
'direction_id', 'block_id', 'shape_id',
'wheelchair_accessible', 'bikes_allowed', 'gtfs_feed'],
unique_fields=['id'])
trips.clear()
if trips and not dry_run:
Trip.objects.bulk_create(trips,
update_conflicts=True,
update_fields=['route_id', 'service_id', 'headsign', 'short_name',
'direction_id', 'block_id', 'shape_id',
'wheelchair_accessible', 'bikes_allowed', 'gtfs_feed'],
unique_fields=['id'])
trips.clear()
stop_times = []
for stop_time_dict in read_csv("stop_times.txt"):
stop_time_dict: dict
stop_id = stop_time_dict['stop_id']
stop_id = f"{gtfs_code}-{stop_id}"
trip_id = stop_time_dict['trip_id']
trip_id = f"{gtfs_code}-{trip_id}"
arr_time = stop_time_dict['arrival_time']
arr_h, arr_m, arr_s = map(int, arr_time.split(':'))
arr_time = arr_h * 3600 + arr_m * 60 + arr_s
dep_time = stop_time_dict['departure_time']
dep_h, dep_m, dep_s = map(int, dep_time.split(':'))
dep_time = dep_h * 3600 + dep_m * 60 + dep_s
pickup_type = stop_time_dict.get('pickup_type', PickupType.REGULAR)
drop_off_type = stop_time_dict.get('drop_off_type', PickupType.REGULAR)
# if stop_time_dict['stop_sequence'] == "1":
# # First stop
# drop_off_type = PickupType.NONE
# elif arr_time == dep_time:
# # Last stop
# pickup_type = PickupType.NONE
st = StopTime(
id=f"{gtfs_code}-{stop_time_dict['trip_id']}-{stop_time_dict['stop_id']}"
f"-{stop_time_dict['departure_time']}",
trip_id=trip_id,
arrival_time=timedelta(seconds=arr_time),
departure_time=timedelta(seconds=dep_time),
stop_id=stop_id,
stop_sequence=stop_time_dict['stop_sequence'],
stop_headsign=stop_time_dict.get('stop_headsign', ""),
pickup_type=pickup_type,
drop_off_type=drop_off_type,
timepoint=stop_time_dict.get('timepoint', None),
)
stop_times.append(st)
if len(stop_times) >= bulk_size and not dry_run:
StopTime.objects.bulk_create(stop_times,
update_conflicts=True,
update_fields=['stop_id', 'arrival_time', 'departure_time',
'stop_headsign', 'pickup_type',
'drop_off_type', 'timepoint'],
unique_fields=['id'])
stop_times.clear()
if stop_times and not dry_run:
StopTime.objects.bulk_create(stop_times,
update_conflicts=True,
update_fields=['stop_id', 'arrival_time', 'departure_time',
'stop_headsign', 'pickup_type',
'drop_off_type', 'timepoint'],
unique_fields=['id'])
stop_times.clear()
if "transfers.txt" in zipfile.namelist():
transfers = []
for transfer_dict in read_csv("transfers.txt"):
transfer_dict: dict
from_stop_id = transfer_dict['from_stop_id']
to_stop_id = transfer_dict['to_stop_id']
from_stop_id = f"{gtfs_code}-{from_stop_id}"
to_stop_id = f"{gtfs_code}-{to_stop_id}"
transfer = Transfer(
id=f"{transfer_dict['from_stop_id']}-{transfer_dict['to_stop_id']}",
from_stop_id=from_stop_id,
to_stop_id=to_stop_id,
transfer_type=transfer_dict['transfer_type'],
min_transfer_time=transfer_dict.get('min_transfer_time', 0) or 0,
)
transfers.append(transfer)
if len(transfers) >= bulk_size and not dry_run:
Transfer.objects.bulk_create(transfers,
update_conflicts=True,
update_fields=['transfer_type', 'min_transfer_time'],
unique_fields=['id'])
transfers.clear()
if transfers and not dry_run:
Transfer.objects.bulk_create(transfers,
update_conflicts=True,
update_fields=['transfer_type', 'min_transfer_time'],
unique_fields=['id'])
transfers.clear()
if "feed_info.txt" in zipfile.namelist() and not dry_run:
for feed_info_dict in read_csv("feed_info.txt"):
feed_info_dict: dict
FeedInfo.objects.update_or_create(
publisher_name=feed_info_dict['feed_publisher_name'],
gtfs_feed=gtfs_feed,
defaults=dict(
publisher_url=feed_info_dict['feed_publisher_url'],
lang=feed_info_dict['feed_lang'],
start_date=feed_info_dict.get('feed_start_date', datetime.now().date()),
end_date=feed_info_dict.get('feed_end_date', datetime.now().date()),
version=feed_info_dict.get('feed_version', 1),
)
)
if 'ETag' in resp.headers: if 'ETag' in resp.headers:
gtfs_feed.etag = resp.headers['ETag'] gtfs_feed.etag = resp.headers['ETag']
@ -394,3 +71,344 @@ class Command(BaseCommand):
gtfs_feed.last_modified = datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z") \ gtfs_feed.last_modified = datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z") \
.replace(tzinfo=ZoneInfo(last_modified.split(' ')[-1])) .replace(tzinfo=ZoneInfo(last_modified.split(' ')[-1]))
gtfs_feed.save() gtfs_feed.save()
def parse_gtfs(self, zip_dir: str, gtfs_feed: GTFSFeed, bulk_size: int, dry_run: bool, verbosity: int):
gtfs_code = gtfs_feed.code
def read_csv(filename):
with open(os.path.join(zip_dir, filename), 'r') as f:
reader = csv.DictReader(f)
reader.fieldnames = [field.replace('\ufeff', '').strip()
for field in reader.fieldnames]
iterator = tqdm(reader, desc=filename, unit=' rows') if verbosity >= 2 else reader
for row in iterator:
yield {k.strip(): v.strip() for k, v in row.items()}
agencies = []
for agency_dict in read_csv("agency.txt"):
agency_dict: dict
agency = Agency(
id=f"{gtfs_code}-{agency_dict['agency_id']}",
name=agency_dict['agency_name'],
url=agency_dict['agency_url'],
timezone=agency_dict['agency_timezone'],
lang=agency_dict.get('agency_lang', "fr"),
phone=agency_dict.get('agency_phone', ""),
email=agency_dict.get('agency_email', ""),
gtfs_feed_id=gtfs_code,
)
agencies.append(agency)
if agencies and not dry_run:
Agency.objects.bulk_create(agencies,
update_conflicts=True,
update_fields=['name', 'url', 'timezone', 'lang', 'phone', 'email',
'gtfs_feed'],
unique_fields=['id'])
agencies.clear()
stops = []
for stop_dict in read_csv("stops.txt"):
stop_dict: dict
stop_id = stop_dict['stop_id']
stop_id = f"{gtfs_code}-{stop_id}"
parent_station_id = stop_dict.get('parent_station', None)
parent_station_id = f"{gtfs_code}-{parent_station_id}" if parent_station_id else None
stop = Stop(
id=stop_id,
name=stop_dict['stop_name'],
desc=stop_dict.get('stop_desc', ""),
lat=stop_dict['stop_lat'],
lon=stop_dict['stop_lon'],
zone_id=stop_dict.get('zone_id', ""),
url=stop_dict.get('stop_url', ""),
location_type=stop_dict.get('location_type', 0) or 0,
parent_station_id=parent_station_id,
timezone=stop_dict.get('stop_timezone', ""),
wheelchair_boarding=stop_dict.get('wheelchair_boarding', 0),
level_id=stop_dict.get('level_id', ""),
platform_code=stop_dict.get('platform_code', ""),
gtfs_feed_id=gtfs_code,
)
stops.append(stop)
if stops and not dry_run:
Stop.objects.bulk_create(stops,
batch_size=bulk_size,
update_conflicts=True,
update_fields=['name', 'desc', 'lat', 'lon', 'zone_id', 'url',
'location_type', 'parent_station_id', 'timezone',
'wheelchair_boarding', 'level_id', 'platform_code',
'gtfs_feed'],
unique_fields=['id'])
stops.clear()
routes = []
for route_dict in read_csv("routes.txt"):
route_dict: dict
route_id = route_dict['route_id']
route_id = f"{gtfs_code}-{route_id}"
# Agency is optional there is only one
agency_id = route_dict.get('agency_id', "") or Agency.objects.get(gtfs_feed_id=gtfs_code)
route = Route(
id=route_id,
agency_id=f"{gtfs_code}-{agency_id}",
short_name=route_dict['route_short_name'],
long_name=route_dict['route_long_name'],
desc=route_dict.get('route_desc', ""),
type=route_dict['route_type'],
url=route_dict.get('route_url', ""),
color=route_dict.get('route_color', ""),
text_color=route_dict.get('route_text_color', ""),
gtfs_feed_id=gtfs_code,
)
routes.append(route)
if len(routes) >= bulk_size and not dry_run:
Route.objects.bulk_create(routes,
update_conflicts=True,
update_fields=['agency_id', 'short_name', 'long_name', 'desc',
'type', 'url', 'color', 'text_color',
'gtfs_feed'],
unique_fields=['id'])
routes.clear()
if routes and not dry_run:
Route.objects.bulk_create(routes,
update_conflicts=True,
update_fields=['agency_id', 'short_name', 'long_name', 'desc',
'type', 'url', 'color', 'text_color',
'gtfs_feed'],
unique_fields=['id'])
routes.clear()
start_time = 0
if verbosity >= 1:
self.stdout.write("Deleting old calendars, trips and stop times…")
start_time = time()
TripUpdate.objects.filter(trip__gtfs_feed_id=gtfs_code).delete()
StopTime.objects.filter(trip__gtfs_feed_id=gtfs_code)._raw_delete(StopTime.objects.db)
Trip.objects.filter(gtfs_feed_id=gtfs_code)._raw_delete(Trip.objects.db)
Calendar.objects.filter(gtfs_feed_id=gtfs_code).delete()
if verbosity >= 1:
end = time()
self.stdout.write(f"Done in {end - start_time:.2f} s")
calendars = {}
if os.path.exists(os.path.join(zip_dir, "calendar.txt")):
for calendar_dict in read_csv("calendar.txt"):
calendar_dict: dict
calendar = Calendar(
id=f"{gtfs_code}-{calendar_dict['service_id']}",
monday=calendar_dict['monday'],
tuesday=calendar_dict['tuesday'],
wednesday=calendar_dict['wednesday'],
thursday=calendar_dict['thursday'],
friday=calendar_dict['friday'],
saturday=calendar_dict['saturday'],
sunday=calendar_dict['sunday'],
start_date=calendar_dict['start_date'],
end_date=calendar_dict['end_date'],
gtfs_feed_id=gtfs_code,
)
calendars[calendar.id] = calendar
if len(calendars) >= bulk_size and not dry_run:
Calendar.objects.bulk_create(calendars.values(),
update_conflicts=True,
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'gtfs_feed'],
unique_fields=['id'])
calendars.clear()
if calendars and not dry_run:
Calendar.objects.bulk_create(calendars.values(), update_conflicts=True,
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'gtfs_feed'],
unique_fields=['id'])
calendars.clear()
calendar_dates = []
for calendar_date_dict in read_csv("calendar_dates.txt"):
calendar_date_dict: dict
calendar_date = CalendarDate(
id=f"{gtfs_code}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
service_id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
date=calendar_date_dict['date'],
exception_type=calendar_date_dict['exception_type'],
)
calendar_dates.append(calendar_date)
if calendar_date.service_id not in calendars:
calendar = Calendar(
id=f"{gtfs_code}-{calendar_date_dict['service_id']}",
monday=False,
tuesday=False,
wednesday=False,
thursday=False,
friday=False,
saturday=False,
sunday=False,
start_date=calendar_date_dict['date'],
end_date=calendar_date_dict['date'],
gtfs_feed_id=gtfs_code,
)
calendars[calendar.id] = calendar
else:
calendar = calendars[f"{gtfs_code}-{calendar_date_dict['service_id']}"]
if calendar.start_date > calendar_date.date:
calendar.start_date = calendar_date.date
if calendar.end_date < calendar_date.date:
calendar.end_date = calendar_date.date
if calendar_dates and not dry_run:
Calendar.objects.bulk_create(calendars.values(),
batch_size=bulk_size,
update_conflicts=True,
update_fields=['start_date', 'end_date', 'gtfs_feed'],
unique_fields=['id'])
CalendarDate.objects.bulk_create(calendar_dates,
batch_size=bulk_size,
update_conflicts=True,
update_fields=['service_id', 'date', 'exception_type'],
unique_fields=['id'])
calendars.clear()
calendar_dates.clear()
trips = []
# start_time = time()
for trip_dict in read_csv("trips.txt"):
trip_dict: dict
trip_id = trip_dict['trip_id']
route_id = trip_dict['route_id']
trip_id = f"{gtfs_code}-{trip_id}"
route_id = f"{gtfs_code}-{route_id}"
trip = Trip(
id=trip_id,
route_id=route_id,
service_id=f"{gtfs_code}-{trip_dict['service_id']}",
headsign=trip_dict.get('trip_headsign', ""),
short_name=trip_dict.get('trip_short_name', ""),
direction_id=trip_dict.get('direction_id', None) or None,
block_id=trip_dict.get('block_id', ""),
shape_id=trip_dict.get('shape_id', ""),
wheelchair_accessible=trip_dict.get('wheelchair_accessible', None),
bikes_allowed=trip_dict.get('bikes_allowed', None),
gtfs_feed_id=gtfs_code,
)
trips.append(trip)
if len(trips) >= bulk_size and not dry_run:
# now = time()
# print(f"Elapsed time: {now - start_time:.3f}s, "
# f"{1000 * (now - start_time) / len(trips):.2f}ms per iteration")
# start_time = now
Trip.objects.bulk_create(trips)
# now = time()
# print(f"Elapsed time: {now - start_time:.3f}s to save")
# start_time = now
trips.clear()
if trips and not dry_run:
Trip.objects.bulk_create(trips)
trips.clear()
stop_times = []
# start_time = time()
for stop_time_dict in read_csv("stop_times.txt"):
stop_time_dict: dict
stop_id = stop_time_dict['stop_id']
stop_id = f"{gtfs_code}-{stop_id}"
trip_id = stop_time_dict['trip_id']
trip_id = f"{gtfs_code}-{trip_id}"
arr_time = stop_time_dict['arrival_time']
arr_h, arr_m, arr_s = map(int, arr_time.split(':'))
arr_time = arr_h * 3600 + arr_m * 60 + arr_s
dep_time = stop_time_dict['departure_time']
dep_h, dep_m, dep_s = map(int, dep_time.split(':'))
dep_time = dep_h * 3600 + dep_m * 60 + dep_s
pickup_type = stop_time_dict.get('pickup_type', PickupType.REGULAR)
drop_off_type = stop_time_dict.get('drop_off_type', PickupType.REGULAR)
# if stop_time_dict['stop_sequence'] == "1":
# # First stop
# drop_off_type = PickupType.NONE
# elif arr_time == dep_time:
# # Last stop
# pickup_type = PickupType.NONE
st = StopTime(
id=f"{gtfs_code}-{stop_time_dict['trip_id']}-{stop_time_dict['stop_id']}"
f"-{stop_time_dict['departure_time']}",
trip_id=trip_id,
arrival_time=timedelta(seconds=arr_time),
departure_time=timedelta(seconds=dep_time),
stop_id=stop_id,
stop_sequence=stop_time_dict['stop_sequence'],
stop_headsign=stop_time_dict.get('stop_headsign', ""),
pickup_type=pickup_type,
drop_off_type=drop_off_type,
timepoint=stop_time_dict.get('timepoint', None),
)
stop_times.append(st)
if len(stop_times) >= bulk_size and not dry_run:
# now = time()
# print(f"Elapsed time: {now - start_time:.3f}s, "
# f"{1000 * (now - start_time) / len(stop_times):.2f}ms per iteration")
# start_time = now
StopTime.objects.bulk_create(stop_times)
# now = time()
# print(f"Elapsed time: {now - start_time:.3f}s to save")
# start_time = now
stop_times.clear()
if stop_times and not dry_run:
StopTime.objects.bulk_create(stop_times)
stop_times.clear()
if os.path.exists(os.path.join(zip_dir, "transfers.txt")):
transfers = []
for transfer_dict in read_csv("transfers.txt"):
transfer_dict: dict
from_stop_id = transfer_dict['from_stop_id']
to_stop_id = transfer_dict['to_stop_id']
from_stop_id = f"{gtfs_code}-{from_stop_id}"
to_stop_id = f"{gtfs_code}-{to_stop_id}"
transfer = Transfer(
id=f"{gtfs_code}-{transfer_dict['from_stop_id']}-{transfer_dict['to_stop_id']}",
from_stop_id=from_stop_id,
to_stop_id=to_stop_id,
transfer_type=transfer_dict['transfer_type'],
min_transfer_time=transfer_dict.get('min_transfer_time', 0) or 0,
)
transfers.append(transfer)
if len(transfers) >= bulk_size and not dry_run:
Transfer.objects.bulk_create(transfers)
transfers.clear()
if transfers and not dry_run:
Transfer.objects.bulk_create(transfers)
transfers.clear()
if os.path.exists(os.path.join(zip_dir, "feed_info.txt")) and not dry_run:
for feed_info_dict in read_csv("feed_info.txt"):
feed_info_dict: dict
FeedInfo.objects.update_or_create(
publisher_name=feed_info_dict['feed_publisher_name'],
gtfs_feed_id=gtfs_code,
defaults=dict(
publisher_url=feed_info_dict['feed_publisher_url'],
lang=feed_info_dict['feed_lang'],
start_date=feed_info_dict.get('feed_start_date', datetime.now().date()),
end_date=feed_info_dict.get('feed_end_date', datetime.now().date()),
version=feed_info_dict.get('feed_version', 1),
)
)

View File

@ -0,0 +1,26 @@
# Generated by Django 5.0.6 on 2024-05-12 09:31
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("gtfs", "0001_initial"),
]
operations = [
migrations.AlterField(
model_name="stop",
name="parent_station",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="children",
to="gtfs.stop",
verbose_name="Parent station",
),
),
]

View File

@ -288,7 +288,7 @@ class Stop(models.Model):
parent_station = models.ForeignKey( parent_station = models.ForeignKey(
to="Stop", to="Stop",
on_delete=models.PROTECT, on_delete=models.SET_NULL,
verbose_name=_("Parent station"), verbose_name=_("Parent station"),
related_name="children", related_name="children",
blank=True, blank=True,