trainvel/sncfgtfs/management/commands/update_sncf_gtfs.py

405 lines
22 KiB
Python
Raw Normal View History

2024-01-27 09:43:59 +00:00
import csv
2024-01-27 10:43:01 +00:00
from datetime import datetime, timedelta
2024-01-27 09:43:59 +00:00
from io import BytesIO
from zipfile import ZipFile
import requests
from django.core.management import BaseCommand
from sncfgtfs.models import Agency, Calendar, CalendarDate, FeedInfo, Route, Stop, StopTime, Transfer, Trip
class Command(BaseCommand):
help = "Update the SNCF GTFS database."
GTFS_FEEDS = {
"TGV": "https://eu.ftp.opendatasoft.com/sncf/gtfs/export_gtfs_voyages.zip",
"IC": "https://eu.ftp.opendatasoft.com/sncf/gtfs/export-intercites-gtfs-last.zip",
"TER": "https://eu.ftp.opendatasoft.com/sncf/gtfs/export-ter-gtfs-last.zip",
"TN": "https://eu.ftp.opendatasoft.com/sncf/gtfs/transilien-gtfs.zip",
# "ES": "https://www.data.gouv.fr/fr/datasets/r/9089b550-696e-4ae0-87b5-40ea55a14292",
# "TI": "https://www.data.gouv.fr/fr/datasets/r/4d1dd21a-b061-47ac-9514-57ffcc09b4a5",
# "RENFE": "https://ssl.renfe.com/gtransit/Fichero_AV_LD/google_transit.zip",
# "OBB": "https://static.oebb.at/open-data/soll-fahrplan-gtfs/GTFS_OP_2024_obb.zip",
2024-01-27 09:43:59 +00:00
}
def add_arguments(self, parser):
2024-02-09 22:15:14 +00:00
parser.add_argument('--bulk_size', type=int, default=1000, help="Number of objects to create in bulk.")
parser.add_argument('--dry-run', action='store_true',
help="Do not update the database, only print what would be done.")
parser.add_argument('--force', '-f', action='store_true', help="Force the update of the database.")
2024-01-27 09:43:59 +00:00
def handle(self, *args, **options):
bulk_size = options['bulk_size']
2024-02-09 22:15:14 +00:00
dry_run = options['dry_run']
force = options['force']
if dry_run:
self.stdout.write(self.style.WARNING("Dry run mode activated."))
2024-01-27 09:43:59 +00:00
if not FeedInfo.objects.exists():
last_update_date = "1970-01-01"
else:
last_update_date = FeedInfo.objects.get(publisher_name='SNCF_default').version
2024-01-27 09:43:59 +00:00
for url in self.GTFS_FEEDS.values():
resp = requests.head(url)
if "Last-Modified" not in resp.headers:
continue
last_modified = resp.headers["Last-Modified"]
2024-01-27 09:43:59 +00:00
last_modified = datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")
if last_modified.date().isoformat() > last_update_date:
break
else:
2024-02-09 22:15:14 +00:00
if not force:
self.stdout.write(self.style.WARNING("Database already up-to-date."))
return
2024-01-27 09:43:59 +00:00
self.stdout.write("Updating database...")
for transport_type, feed_url in self.GTFS_FEEDS.items():
self.stdout.write(f"Downloading {transport_type} GTFS feed...")
with ZipFile(BytesIO(requests.get(feed_url).content)) as zipfile:
def read_file(filename):
lines = zipfile.read(filename).decode().replace('\ufeff', '').splitlines()
return [line.strip() for line in lines]
2024-01-27 09:43:59 +00:00
agencies = []
for agency_dict in csv.DictReader(read_file("agency.txt")):
2024-01-27 09:43:59 +00:00
agency_dict: dict
if transport_type == "ES" \
and agency_dict['agency_id'] != 'ES' and agency_dict['agency_id'] != 'ER':
continue
2024-01-27 09:43:59 +00:00
agency = Agency(
id=agency_dict['agency_id'],
name=agency_dict['agency_name'],
url=agency_dict['agency_url'],
timezone=agency_dict['agency_timezone'],
lang=agency_dict.get('agency_lang', "fr"),
2024-01-27 09:43:59 +00:00
phone=agency_dict.get('agency_phone', ""),
email=agency_dict.get('agency_email', ""),
)
agencies.append(agency)
2024-02-09 22:15:14 +00:00
if agencies and not dry_run:
2024-01-27 09:43:59 +00:00
Agency.objects.bulk_create(agencies,
update_conflicts=True,
update_fields=['name', 'url', 'timezone', 'lang', 'phone', 'email'],
unique_fields=['id'])
agencies.clear()
stops = []
for stop_dict in csv.DictReader(read_file("stops.txt")):
2024-01-27 09:43:59 +00:00
stop_dict: dict
stop_id = stop_dict['stop_id']
if transport_type in ["ES", "TI", "RENFE"]:
stop_id = f"{transport_type}-{stop_id}"
2024-01-27 09:43:59 +00:00
stop = Stop(
id=stop_id,
2024-01-27 09:43:59 +00:00
name=stop_dict['stop_name'],
desc=stop_dict.get('stop_desc', ""),
2024-01-27 09:43:59 +00:00
lat=stop_dict['stop_lat'],
lon=stop_dict['stop_lon'],
zone_id=stop_dict.get('zone_id', ""),
url=stop_dict.get('stop_url', ""),
location_type=stop_dict.get('location_type', 1) or 1,
2024-02-10 17:28:48 +00:00
parent_station_id=stop_dict.get('parent_station', None) or None,
2024-01-27 09:43:59 +00:00
timezone=stop_dict.get('stop_timezone', ""),
wheelchair_boarding=stop_dict.get('wheelchair_boarding', 0),
level_id=stop_dict.get('level_id', ""),
platform_code=stop_dict.get('platform_code', ""),
transport_type=transport_type,
2024-01-27 09:43:59 +00:00
)
stops.append(stop)
2024-02-09 22:15:14 +00:00
if stops and not dry_run:
2024-01-27 09:43:59 +00:00
Stop.objects.bulk_create(stops,
2024-02-10 17:28:48 +00:00
batch_size=bulk_size,
2024-01-27 09:43:59 +00:00
update_conflicts=True,
update_fields=['name', 'desc', 'lat', 'lon', 'zone_id', 'url',
'location_type', 'parent_station_id', 'timezone',
'wheelchair_boarding', 'level_id', 'platform_code',
'transport_type'],
2024-01-27 09:43:59 +00:00
unique_fields=['id'])
stops.clear()
routes = []
for route_dict in csv.DictReader(read_file("routes.txt")):
2024-01-27 09:43:59 +00:00
route_dict: dict
route_id = route_dict['route_id']
if transport_type == "TI":
route_id = f"{transport_type}-{route_id}"
2024-01-27 09:43:59 +00:00
route = Route(
id=route_id,
2024-01-27 09:43:59 +00:00
agency_id=route_dict['agency_id'],
short_name=route_dict['route_short_name'],
long_name=route_dict['route_long_name'],
desc=route_dict.get('route_desc', ""),
2024-01-27 09:43:59 +00:00
type=route_dict['route_type'],
url=route_dict.get('route_url', ""),
color=route_dict.get('route_color', ""),
text_color=route_dict.get('route_text_color', ""),
2024-02-09 22:15:14 +00:00
transport_type=transport_type,
2024-01-27 09:43:59 +00:00
)
routes.append(route)
2024-02-09 22:15:14 +00:00
if len(routes) >= bulk_size and not dry_run:
2024-01-27 09:43:59 +00:00
Route.objects.bulk_create(routes,
update_conflicts=True,
update_fields=['agency_id', 'short_name', 'long_name', 'desc',
2024-02-09 22:15:14 +00:00
'type', 'url', 'color', 'text_color',
'transport_type'],
2024-01-27 09:43:59 +00:00
unique_fields=['id'])
routes.clear()
2024-02-09 22:15:14 +00:00
if routes and not dry_run:
2024-01-27 09:43:59 +00:00
Route.objects.bulk_create(routes,
update_conflicts=True,
update_fields=['agency_id', 'short_name', 'long_name', 'desc',
'type', 'url', 'color', 'text_color',
'transport_type'],
2024-01-27 09:43:59 +00:00
unique_fields=['id'])
routes.clear()
Calendar.objects.filter(transport_type=transport_type).delete()
2024-02-10 18:54:10 +00:00
calendars = {}
2024-01-27 09:43:59 +00:00
if "calendar.txt" in zipfile.namelist():
for calendar_dict in csv.DictReader(read_file("calendar.txt")):
2024-01-27 09:43:59 +00:00
calendar_dict: dict
calendar = Calendar(
id=f"{transport_type}-{calendar_dict['service_id']}",
monday=calendar_dict['monday'],
tuesday=calendar_dict['tuesday'],
wednesday=calendar_dict['wednesday'],
thursday=calendar_dict['thursday'],
friday=calendar_dict['friday'],
saturday=calendar_dict['saturday'],
sunday=calendar_dict['sunday'],
start_date=calendar_dict['start_date'],
end_date=calendar_dict['end_date'],
transport_type=transport_type,
)
2024-02-10 18:54:27 +00:00
calendars[calendar.id] = calendar
2024-01-27 09:43:59 +00:00
2024-02-09 22:15:14 +00:00
if len(calendars) >= bulk_size and not dry_run:
2024-02-10 19:42:55 +00:00
Calendar.objects.bulk_create(calendars.values(),
2024-01-27 09:43:59 +00:00
update_conflicts=True,
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'transport_type'],
unique_fields=['id'])
calendars.clear()
2024-02-09 22:15:14 +00:00
if calendars and not dry_run:
2024-02-10 18:57:03 +00:00
Calendar.objects.bulk_create(calendars.values(), update_conflicts=True,
2024-01-27 09:43:59 +00:00
update_fields=['monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday', 'start_date',
'end_date', 'transport_type'],
unique_fields=['id'])
calendars.clear()
calendar_dates = []
for calendar_date_dict in csv.DictReader(read_file("calendar_dates.txt")):
2024-01-27 09:43:59 +00:00
calendar_date_dict: dict
calendar_date = CalendarDate(
id=f"{transport_type}-{calendar_date_dict['service_id']}-{calendar_date_dict['date']}",
service_id=f"{transport_type}-{calendar_date_dict['service_id']}",
date=calendar_date_dict['date'],
exception_type=calendar_date_dict['exception_type'],
)
calendar_dates.append(calendar_date)
2024-02-10 18:54:10 +00:00
if calendar_date.service_id not in calendars:
2024-01-27 09:43:59 +00:00
calendar = Calendar(
id=f"{transport_type}-{calendar_date_dict['service_id']}",
monday=False,
tuesday=False,
wednesday=False,
thursday=False,
friday=False,
saturday=False,
sunday=False,
start_date=calendar_date_dict['date'],
end_date=calendar_date_dict['date'],
transport_type=transport_type,
)
2024-02-10 18:54:10 +00:00
calendars[calendar.id] = calendar
else:
calendar = calendars[f"{transport_type}-{calendar_date_dict['service_id']}"]
if calendar.start_date > calendar_date.date:
calendar.start_date = calendar_date.date
if calendar.end_date < calendar_date.date:
calendar.end_date = calendar_date.date
2024-01-27 09:43:59 +00:00
2024-02-09 22:15:14 +00:00
if calendar_dates and not dry_run:
2024-02-10 18:54:10 +00:00
Calendar.objects.bulk_create(calendars.values(),
batch_size=bulk_size,
2024-01-27 09:43:59 +00:00
update_conflicts=True,
2024-02-10 18:54:10 +00:00
update_fields=['start_date', 'end_date'],
2024-01-27 09:43:59 +00:00
unique_fields=['id'])
CalendarDate.objects.bulk_create(calendar_dates,
2024-02-10 18:54:10 +00:00
batch_size=bulk_size,
2024-01-27 09:43:59 +00:00
update_conflicts=True,
update_fields=['service_id', 'date', 'exception_type'],
unique_fields=['id'])
calendars.clear()
calendar_dates.clear()
trips = []
for trip_dict in csv.DictReader(read_file("trips.txt")):
2024-01-27 09:43:59 +00:00
trip_dict: dict
2024-02-09 22:15:14 +00:00
trip_id = trip_dict['trip_id']
route_id = trip_dict['route_id']
if transport_type in ["TGV", "IC", "TER"]:
2024-02-09 22:15:14 +00:00
trip_id, last_update = trip_id.split(':', 1)
last_update = datetime.fromisoformat(last_update)
elif transport_type in ["ES", "RENFE"]:
trip_id = f"{transport_type}-{trip_id}"
last_update = None
elif transport_type == "TI":
trip_id = f"{transport_type}-{trip_id}"
route_id = f"{transport_type}-{route_id}"
last_update = None
2024-02-09 22:15:14 +00:00
else:
last_update = None
2024-01-27 09:43:59 +00:00
trip = Trip(
2024-02-09 22:15:14 +00:00
id=trip_id,
route_id=route_id,
2024-01-27 09:43:59 +00:00
service_id=f"{transport_type}-{trip_dict['service_id']}",
headsign=trip_dict.get('trip_headsign', ""),
2024-01-27 09:43:59 +00:00
short_name=trip_dict.get('trip_short_name', ""),
direction_id=trip_dict.get('direction_id', None) or None,
block_id=trip_dict.get('block_id', ""),
shape_id=trip_dict.get('shape_id', ""),
2024-01-27 09:43:59 +00:00
wheelchair_accessible=trip_dict.get('wheelchair_accessible', None),
bikes_allowed=trip_dict.get('bikes_allowed', None),
2024-02-09 22:15:14 +00:00
last_update=last_update,
2024-01-27 09:43:59 +00:00
)
trips.append(trip)
2024-02-09 22:15:14 +00:00
if len(trips) >= bulk_size and not dry_run:
2024-01-27 09:43:59 +00:00
Trip.objects.bulk_create(trips,
update_conflicts=True,
update_fields=['route_id', 'service_id', 'headsign', 'short_name',
'direction_id', 'block_id', 'shape_id',
'wheelchair_accessible', 'bikes_allowed'],
unique_fields=['id'])
trips.clear()
2024-02-09 22:15:14 +00:00
if trips and not dry_run:
2024-01-27 09:43:59 +00:00
Trip.objects.bulk_create(trips,
update_conflicts=True,
update_fields=['route_id', 'service_id', 'headsign', 'short_name',
'direction_id', 'block_id', 'shape_id',
'wheelchair_accessible', 'bikes_allowed'],
unique_fields=['id'])
trips.clear()
stop_times = []
for stop_time_dict in csv.DictReader(read_file("stop_times.txt")):
2024-01-27 09:43:59 +00:00
stop_time_dict: dict
stop_id = stop_time_dict['stop_id']
if transport_type in ["ES", "TI", "RENFE"]:
stop_id = f"{transport_type}-{stop_id}"
2024-02-09 22:15:14 +00:00
trip_id = stop_time_dict['trip_id']
if transport_type in ["TGV", "IC", "TER"]:
2024-02-09 22:15:14 +00:00
trip_id = trip_id.split(':', 1)[0]
elif transport_type in ["ES", "TI", "RENFE"]:
trip_id = f"{transport_type}-{trip_id}"
2024-01-27 10:43:01 +00:00
arr_time = stop_time_dict['arrival_time']
arr_h, arr_m, arr_s = map(int, arr_time.split(':'))
arr_time = arr_h * 3600 + arr_m * 60 + arr_s
2024-01-27 10:43:01 +00:00
dep_time = stop_time_dict['departure_time']
dep_h, dep_m, dep_s = map(int, dep_time.split(':'))
dep_time = dep_h * 3600 + dep_m * 60 + dep_s
pickup_type = stop_time_dict.get('pickup_type', 0)
drop_off_type = stop_time_dict.get('drop_off_type', 0)
if transport_type in ["ES", "RENFE", "OBB"]:
if stop_time_dict['stop_sequence'] == "1":
drop_off_type = 1
elif arr_time == dep_time:
pickup_type = 1
elif transport_type == "TI":
if stop_time_dict['stop_sequence'] == "0":
drop_off_type = 1
elif arr_time == dep_time:
pickup_type = 1
2024-01-27 09:43:59 +00:00
st = StopTime(
id=f"{trip_id}-{stop_id}-{stop_time_dict['departure_time']}",
2024-02-09 22:15:14 +00:00
trip_id=trip_id,
2024-01-27 10:43:01 +00:00
arrival_time=timedelta(seconds=arr_time),
departure_time=timedelta(seconds=dep_time),
stop_id=stop_id,
2024-01-27 09:43:59 +00:00
stop_sequence=stop_time_dict['stop_sequence'],
stop_headsign=stop_time_dict.get('stop_headsign', ""),
pickup_type=pickup_type,
drop_off_type=drop_off_type,
2024-01-27 09:43:59 +00:00
timepoint=stop_time_dict.get('timepoint', None),
)
stop_times.append(st)
2024-02-09 22:15:14 +00:00
if len(stop_times) >= bulk_size and not dry_run:
2024-01-27 09:43:59 +00:00
StopTime.objects.bulk_create(stop_times,
update_conflicts=True,
update_fields=['stop_id', 'arrival_time', 'departure_time',
2024-01-27 10:43:01 +00:00
'stop_headsign', 'pickup_type',
2024-01-27 09:43:59 +00:00
'drop_off_type', 'timepoint'],
unique_fields=['id'])
stop_times.clear()
2024-02-09 22:15:14 +00:00
if stop_times and not dry_run:
2024-01-27 09:43:59 +00:00
StopTime.objects.bulk_create(stop_times,
update_conflicts=True,
update_fields=['stop_id', 'arrival_time', 'departure_time',
2024-01-27 10:43:01 +00:00
'stop_headsign', 'pickup_type',
2024-01-27 09:43:59 +00:00
'drop_off_type', 'timepoint'],
unique_fields=['id'])
stop_times.clear()
if "transfers.txt" in zipfile.namelist():
transfers = []
for transfer_dict in csv.DictReader(read_file("transfers.txt")):
transfer_dict: dict
from_stop_id = transfer_dict['from_stop_id']
to_stop_id = transfer_dict['to_stop_id']
if transport_type in ["ES", "RENFE", "OBB"]:
from_stop_id = f"{transport_type}-{from_stop_id}"
to_stop_id = f"{transport_type}-{to_stop_id}"
transfer = Transfer(
id=f"{from_stop_id}-{to_stop_id}",
from_stop_id=transfer_dict['from_stop_id'],
to_stop_id=transfer_dict['to_stop_id'],
transfer_type=transfer_dict['transfer_type'],
min_transfer_time=transfer_dict['min_transfer_time'],
)
transfers.append(transfer)
if len(transfers) >= bulk_size and not dry_run:
Transfer.objects.bulk_create(transfers,
update_conflicts=True,
update_fields=['transfer_type', 'min_transfer_time'],
unique_fields=['id'])
transfers.clear()
2024-01-27 09:43:59 +00:00
if transfers and not dry_run:
2024-01-27 09:43:59 +00:00
Transfer.objects.bulk_create(transfers,
update_conflicts=True,
update_fields=['transfer_type', 'min_transfer_time'],
unique_fields=['id'])
transfers.clear()
2024-02-09 22:15:14 +00:00
if "feed_info.txt" in zipfile.namelist() and not dry_run:
for feed_info_dict in csv.DictReader(read_file("feed_info.txt")):
2024-01-27 09:43:59 +00:00
feed_info_dict: dict
FeedInfo.objects.update_or_create(
2024-01-27 14:18:33 +00:00
publisher_name=feed_info_dict['feed_publisher_name'],
defaults=dict(
publisher_url=feed_info_dict['feed_publisher_url'],
lang=feed_info_dict['feed_lang'],
start_date=feed_info_dict.get('feed_start_date', datetime.now().date()),
end_date=feed_info_dict.get('feed_end_date', datetime.now().date()),
version=feed_info_dict.get('feed_version', 1),
2024-01-27 14:18:33 +00:00
)
2024-01-27 09:43:59 +00:00
)