Compare commits

..

7 Commits

Author SHA1 Message Date
ae859e0db4 Put content in a container
Signed-off-by: Emmy D'Anello <ynerant@emy.lu>
2023-04-25 15:14:57 +02:00
4332862419 Optimize computation
Signed-off-by: Emmy D'Anello <ynerant@emy.lu>
2023-04-25 15:05:23 +02:00
2004155e51 Multiple changes
Signed-off-by: Emmy D'Anello <ynerant@emy.lu>
2023-04-25 11:54:11 +02:00
089af1db75 Remove progress bar while searching routes 2023-02-19 18:01:16 +01:00
0f61b3bece Replace loading text by a spinner 2023-02-19 17:43:36 +01:00
06f836e13f Use local bootstrap5 files 2023-02-13 13:19:14 +01:00
9fbf4fb172 Add option to update dataset silently 2023-02-13 13:12:39 +01:00
3 changed files with 157 additions and 117 deletions

242
app.py
View File

@ -1,11 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from contextlib import nullcontext
import csv import csv
from datetime import date, datetime, time, timedelta from datetime import date, datetime, time, timedelta
import os import os
import json import json
from pytz import timezone from pytz import timezone
import requests import requests
from time import sleep
import click import click
from flask import Flask, render_template from flask import Flask, render_template
@ -59,54 +61,58 @@ class RouteQueue(db.Model):
@cli.command("update-dataset") @cli.command("update-dataset")
def update_dataset(): @click.option('--verbose', '-v', is_flag=True, help="Display errors.")
def update_dataset(verbose: bool = False):
""" """
Query the latest version of the SNCF OpenData dataset, as a CSV file. Query the latest version of the SNCF OpenData dataset, as a CSV file.
""" """
try: resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/')
resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/') content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip()
content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip() content = content.replace('\r', '')
content = content.replace('\r', '') content = content.replace('" \n', '" \\n')
content = content.replace('" \n', '" \\n') content = content.replace('.\n', '.\\n')
content = content.replace('.\n', '.\\n') content = content.replace('\n\n \nLa', '\\n\\n \\nLa')
content = content.replace('\n\n \nLa', '\\n\\n \\nLa') content = content.replace('\n"', '\\n"')
content = content.replace('\n"', '\\n"')
info = json.loads(content) info = json.loads(content)
modified_date = datetime.fromisoformat(info['dateModified']) modified_date = datetime.fromisoformat(info['dateModified'])
utc = timezone('UTC') utc = timezone('UTC')
last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile( last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile(
'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc) 'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc)
if last_modified < modified_date: if last_modified < modified_date:
if verbose:
print("Updating tgvmax.csv…") print("Updating tgvmax.csv…")
with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp: with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp:
resp.raise_for_status() resp.raise_for_status()
with open('tgvmax.csv', 'wb') as f: with open('tgvmax.csv', 'wb') as f:
with tqdm(unit='io', unit_scale=True) as t: with tqdm(unit='io', unit_scale=True) if verbose else nullcontext() as t:
for chunk in resp.iter_content(chunk_size=512 * 1024): for chunk in resp.iter_content(chunk_size=512 * 1024):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
if verbose:
t.update(len(chunk)) t.update(len(chunk))
os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp())) os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp()))
if verbose:
print("Done") print("Done")
else:
print("Last modification:", modified_date) if verbose:
except Exception as e: print("Last modification:", modified_date)
print("An error occured while updating tgvmax.csv") exit(2)
print(e)
@cli.command("parse-csv") @cli.command("parse-csv")
@click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.") @click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.")
def parse_trains(flush: bool = False): @click.option('--verbose', '-v', is_flag=True, help="Display errors.")
def parse_trains(flush: bool = False, verbose: bool = False):
""" """
Parse the CSV file and store it to the database. Parse the CSV file and store it to the database.
""" """
if flush: if flush:
print("Flush database…") if verbose:
print("Flush database…")
db.session.query(Train).delete() db.session.query(Train).delete()
last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC')) last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC'))
@ -114,8 +120,11 @@ def parse_trains(flush: bool = False):
with open('tgvmax.csv') as f: with open('tgvmax.csv') as f:
first_line = True first_line = True
already_seen = set() already_seen = set()
for line in tqdm(csv.reader(f, delimiter=';')): already_updated = set(x[0] for x in db.session.query(Train).filter(Train.last_modification > last_modification)\
.values(Train.id))
for line in (tqdm if verbose else lambda x: x)(csv.reader(f, delimiter=';')):
if first_line: if first_line:
# Skip first line
first_line = False first_line = False
continue continue
@ -128,6 +137,10 @@ def parse_trains(flush: bool = False):
print("Duplicate:", train_id) print("Duplicate:", train_id)
continue continue
if train_id in already_updated:
# Already updated by the simulator
continue
train = Train( train = Train(
id=train_id, id=train_id,
day=date.fromisoformat(line[0]), day=date.fromisoformat(line[0]),
@ -155,57 +168,69 @@ def parse_trains(flush: bool = False):
db.session.commit() db.session.commit()
def find_routes(day: date, origin: str, destination: str | None): def find_routes(day: date | datetime, origin: str, destination: str | None,
trains = db.session.query(Train).filter_by(day=day, tgvmax=True).all() verbose: bool = False, min_dep: time = time(0, 0),
explored: dict | None = None):
if isinstance(day, datetime):
day = day.date()
if explored is None:
explored = {}
if origin not in explored:
explored[origin] = (min_dep, None)
valid_routes = []
max_dep = time(23, 59)
else:
max_dep, valid_routes = explored[origin]
if max_dep < min_dep:
# Already parsed these trains
return {destination: valid_routes}
explored[origin] = min_dep, None
trains = db.session.query(Train).filter_by(day=day, tgvmax=True, orig=origin)\
.filter(Train.dep >= min_dep, Train.dep < max_dep).all()
if not trains:
# No train in the requested interval
explored[origin] = (min_dep, valid_routes)
return {destination: valid_routes}
trains.sort(key=lambda train: train.dep) trains.sort(key=lambda train: train.dep)
db.session.commit()
# For better results later, fetch all trains from the origin or to the destination for train in (t := tqdm(trains, desc=origin) if verbose else trains):
# This is not exhaustive, but can be a good approximation if train.dest == destination:
queue_routes(day, origin=origin) # We hope that we have a direct train
if destination: valid_routes.append([train])
queue_routes(day, destination=destination) else:
if train.dest in explored and explored[train.dest][1] is None:
per_arr_explore = {} # This is a loop
valid_routes = []
for train in tqdm(trains):
if train.orig == origin:
# Update from the TGVMax simulator
queue_route(day, train.orig_iata, train.dest_iata)
it = [train]
if train.dest == destination:
# We hope that we have a direct train
valid_routes.append(it)
else:
per_arr_explore.setdefault(train.dest, [])
per_arr_explore[train.dest].append(it)
continue
for it in list(per_arr_explore.get(train.orig, [])):
if any(train.dest == tr.dest or train.dest == origin for tr in it):
# Avoid loops
continue continue
elif train.arr < min_dep:
# The train is not direct and arrives on the next day, we avoid that
continue
find_routes(day, train.dest, destination, verbose, train.arr, explored)
last_train = it[-1] # Filter unusuable trains
valid_routes += [[train] + it for it in explored[train.dest][1] if it[0].dep >= train.arr]
if last_train.arr <= train.dep: explored[origin] = (min_dep, valid_routes)
# Update from the TGVMax simulator, this line can be useful later
queue_route(day, train.orig_iata, train.dest_iata)
new_it = it + [train] # Send queued trains to the database
if train.dest == destination: db.session.commit()
# Goal is achieved
valid_routes.append(new_it)
else:
per_arr_explore.setdefault(train.dest, [])
per_arr_explore[train.dest].append(new_it)
return {destination: valid_routes} if destination else per_arr_explore return {destination: valid_routes} if destination else {}
def queue_route(day: date | datetime, origin: str, destination: str, verbose: bool = False): # Don't use the decorator to keep the function callable
cli.command('find-routes')(click.argument('day', type=click.DateTime(formats=['%Y-%m-%d']))
(click.argument('origin', type=str)
(click.argument('destination', type=str, default=None)
(click.option('--verbose', '-v', type=bool, is_flag=True, help="Display errors.")
(find_routes)))))
def queue_route(day: date | datetime, origin: str, destination: str, verbose: bool = False, autocommit: bool = True):
""" """
Fetch the TGVMax simulator to refresh data. Fetch the TGVMax simulator to refresh data.
@ -218,23 +243,14 @@ def queue_route(day: date | datetime, origin: str, destination: str, verbose: bo
if isinstance(day, datetime): if isinstance(day, datetime):
day = day.date() day = day.date()
query = db.session.query(RouteQueue).filter_by(day=day, origin=origin, destination=destination, response_time=None) query = db.session.query(RouteQueue).filter_by(day=day, origin=origin, destination=destination)\
.filter((RouteQueue.response_time == None) | (RouteQueue.expiration_time >= datetime.now(timezone('UTC'))))
if query.count(): if query.count():
if verbose:
print("Already queued")
return
query = db.session.query(RouteQueue).filter(RouteQueue.day == day,
RouteQueue.origin == origin,
RouteQueue.destination == destination,
RouteQueue.expiration_time >= datetime.now(timezone('UTC')))
if query.count():
if verbose:
print("Using recent value")
return return
db.session.add(RouteQueue(day=day, origin=origin, destination=destination)) db.session.add(RouteQueue(day=day, origin=origin, destination=destination))
db.session.commit() if autocommit:
db.session.commit()
# Don't use the decorator to keep the function callable # Don't use the decorator to keep the function callable
@ -246,17 +262,27 @@ cli.command('queue-route')(click.argument('day', type=click.DateTime(formats=['%
def queue_routes(day: date | datetime, origin: str | None = None, def queue_routes(day: date | datetime, origin: str | None = None,
destination: str | None = None, verbose: bool = False): destination: str | None = None, verbose: bool = False,
autocommit: bool = True):
if isinstance(day, datetime): if isinstance(day, datetime):
day = day.date() day = day.date()
valid_routes = set(db.session.query(RouteQueue).filter_by(day=day)\
.filter((RouteQueue.response_time == None) | (RouteQueue.expiration_time >= datetime.now(timezone('UTC'))))\
.values(RouteQueue.origin, RouteQueue.destination))
query = db.session.query(Train).filter((Train.day == day)) query = db.session.query(Train).filter((Train.day == day))
if origin: if origin:
query = query.filter((Train.orig_iata == origin) | (Train.orig == origin)) query = query.filter((Train.orig_iata == origin) | (Train.orig == origin))
if destination: if destination:
query = query.filter((Train.dest_iata == destination) | (Train.dest == destination)) query = query.filter((Train.dest_iata == destination) | (Train.dest == destination))
for train in query.all(): query = query.all()
queue_route(day, train.orig_iata, train.dest_iata, verbose) for train in (t := tqdm(query) if verbose else query):
if verbose:
t.set_description(f"{day}: {train.orig} --> {train.dest}")
if (train.orig_iata, train.dest_iata) not in valid_routes:
queue_route(day, train.orig_iata, train.dest_iata, verbose, autocommit)
valid_routes.add((train.orig_iata, train.dest_iata))
# Same as above # Same as above
@ -269,27 +295,38 @@ cli.command('queue-routes')(click.argument('day', type=click.DateTime(formats=['
@cli.command('process-queue', help="Process the waiting list to refresh from the simulator.") @cli.command('process-queue', help="Process the waiting list to refresh from the simulator.")
@click.argument('number', default=30, type=int) @click.argument('number', default=30, type=int)
def process_queue(number: int): @click.option('--verbose', '-v', type=bool, is_flag=True, help="Display errors.")
queue = db.session.query(RouteQueue).filter_by(response_time=None).order_by(RouteQueue.queue_time) def process_queue(number: int, verbose: bool = False):
queue = db.session.query(RouteQueue).filter_by(response_time=None).order_by(RouteQueue.queue_time).all()
if number > 0: if number > 0:
queue = queue[:number] queue = queue[:number]
URL = "https://www.maxjeune-tgvinoui.sncf/api/public/refdata/search-freeplaces-proposals" URL = "https://www.maxjeune-tgvinoui.sncf/api/public/refdata/search-freeplaces-proposals"
for req in queue: if verbose:
query = db.session.query(Train).with_entities(Train.orig_iata, Train.orig).distinct()
iata_to_names = {k: v for (k, v) in query.all()}
for i, req in enumerate(t := tqdm(queue) if verbose else queue):
req: RouteQueue req: RouteQueue
resp = requests.post(URL, json={ if verbose:
'departureDateTime': req.day.isoformat(), t.set_description(f"{req.day:%d/%m/%Y}: {iata_to_names[req.origin]} --> {iata_to_names[req.destination]}")
'origin': req.origin,
'destination': req.destination, resp = None
}) while resp is None or resp.status_code == 429:
resp = requests.post(URL, json={
'departureDateTime': req.day.isoformat(),
'origin': req.origin,
'destination': req.destination,
})
if resp.status_code == 429:
sleep(1)
if resp.status_code == 404: if resp.status_code == 404:
# No travel found # No travel found
req.response_time = datetime.now() req.response_time = datetime.now()
req.expiration_time = datetime.now() + timedelta(hours=1) req.expiration_time = datetime.now() + timedelta(hours=3)
db.session.add(req) db.session.add(req)
db.session.commit()
continue continue
resp.raise_for_status() resp.raise_for_status()
@ -305,9 +342,11 @@ def process_queue(number: int):
for proposal in data['proposals']: for proposal in data['proposals']:
train = db.session.query(Train).filter_by(day=req.day, number=int(proposal['trainNumber']), train = db.session.query(Train).filter_by(day=req.day, number=int(proposal['trainNumber']),
orig_iata=req.origin, dest_iata=req.destination).first() orig_iata=proposal['origin']['rrCode'],
dest_iata=proposal['destination']['rrCode']).first()
if train is None: if train is None:
# In a city with multiple stations # In a city with multiple stations
print("ERROR")
print(proposal) print(proposal)
continue continue
train.tgvmax = True train.tgvmax = True
@ -316,7 +355,10 @@ def process_queue(number: int):
train.expiration_time = req.expiration_time train.expiration_time = req.expiration_time
db.session.add(train) db.session.add(train)
db.session.commit() if i % 50 == 0:
db.session.commit()
db.session.commit()
@app.get('/') @app.get('/')

1
static/bootstrap5 Symbolic link
View File

@ -0,0 +1 @@
/usr/share/javascript/bootstrap5/

View File

@ -2,53 +2,50 @@
<html lang="fr"> <html lang="fr">
<head> <head>
<title>Calculateur TGVMax</title> <title>Calculateur TGVMax</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" <link href="/static/bootstrap5/css/bootstrap.min.css" rel="stylesheet">
rel="stylesheet"
integrity="sha384-GLhlTQ8iRABdZLl6O3oVMWSktQOp6b7In1Zl3/Jr59b6EGGoI1aFkw7cmDA6j6gD"
crossorigin="anonymous">
</head> </head>
<body> <body>
<form id="form" action="#"> <main class="container">
<form id="form" action="#">
<datalist id="iataCodes"> <datalist id="iataCodes">
<option value="Chargement…"> <option value="Chargement…">
</datalist> </datalist>
<div class="mb-3"> <div class="mb-3">
<label for="origin" class="form-label">Origine :</label> <label for="origin" class="form-label">Origine :</label>
<input type="text" class="form-control" list="iataCodes" id="origin" <input type="text" class="form-control" list="iataCodes" id="origin"
placeholder="Origine…" aria-describedby="originHelp"> placeholder="Origine…" aria-describedby="originHelp">
<input type="hidden" class="form-control" id="originIata"> <input type="hidden" class="form-control" id="originIata">
<div id="originHelp" class="form-text">Le point de départ de votre trajet.</div> <div id="originHelp" class="form-text">Le point de départ de votre trajet.</div>
</div> </div>
<div class="mb-3"> <div class="mb-3">
<label for="destination" class="form-label">Destination :</label> <label for="destination" class="form-label">Destination :</label>
<input type="text" class="form-control" list="iataCodes" id="destination" <input type="text" class="form-control" list="iataCodes" id="destination"
placeholder="Destination…" aria-describedby="destinationHelp"> placeholder="Destination…" aria-describedby="destinationHelp">
<input type="hidden" class="form-control" id="destinationIata"> <input type="hidden" class="form-control" id="destinationIata">
<div id="destinationHelp" class="form-text">Le point d'arrivée de votre trajet.</div> <div id="destinationHelp" class="form-text">Le point d'arrivée de votre trajet.</div>
</div> </div>
<div class="mb-3"> <div class="mb-3">
<label for="day" class="form-label">Jour de départ :</label> <label for="day" class="form-label">Jour de départ :</label>
<input type="date" class="form-control" id="day" aria-describedby="dayHelp" <input type="date" class="form-control" id="day" aria-describedby="dayHelp"
min="{{ today }}" max="{{ max_day }}" value="{{ today }}"> min="{{ today }}" max="{{ max_day }}" value="{{ today }}">
<div id="dayHelp" class="form-text">Le jour de votre départ.</div> <div id="dayHelp" class="form-text">Le jour de votre départ.</div>
</div> </div>
<div class="mb-3"> <div class="mb-3">
<input type="submit" class="form-control btn btn-primary" value="Rechercher…"> <input type="submit" class="form-control btn btn-primary" value="Rechercher…">
</div> </div>
</form> </form>
<div id="result"></div> <div id="result"></div>
</main>
</body> </body>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js" <script src="/static/bootstrap5/js/bootstrap.bundle.min.js"></script>
integrity="sha384-w76AqPfDkMBDXo30jS1Sgez6pr3x5MlQ1ZAGC+nuZB+EYdgRZgiwxhTBTkF7CXvN"
crossorigin="anonymous"></script>
<script> <script>
fetch('/api/iata-codes/').then(res => res.json()).then(out => { fetch('/api/iata-codes/').then(res => res.json()).then(out => {
let datalist = document.getElementById('iataCodes') let datalist = document.getElementById('iataCodes')
datalist.innerHTML = '' datalist.innerHTML = ''
for (let iata in out.iata2name) { for (let name in out.name2iata) {
let name = out.iata2name[iata] let iata = out.name2iata[name]
let elem = document.createElement('option') let elem = document.createElement('option')
elem.value = name elem.value = name
elem.setAttribute('data-iata', iata) elem.setAttribute('data-iata', iata)
@ -77,7 +74,7 @@
let day_elem = document.getElementById('day') let day_elem = document.getElementById('day')
let result_elem = document.getElementById('result') let result_elem = document.getElementById('result')
document.getElementById('form').addEventListener('submit', () => { document.getElementById('form').addEventListener('submit', () => {
result_elem.innerHTML = 'Chargement…' result_elem.innerHTML = '<div class="spinner-border" role="status"><span class="visually-hidden">Chargement…</span></div>'
fetch('/api/routes/' + day_elem.value + '/' + origin_elem.value + '/' + (destination_elem.value || 'undefined') + '/') fetch('/api/routes/' + day_elem.value + '/' + origin_elem.value + '/' + (destination_elem.value || 'undefined') + '/')
.then(resp => resp.json()) .then(resp => resp.json())
.then(routes => { .then(routes => {
@ -138,7 +135,7 @@
let text = route[0].origin let text = route[0].origin
for (let train of route) { for (let train of route) {
text += " (" + train.departure + ") --> (" + train.arrival + ") " + train.destination + ", " text += " (" + train.departure + ") --> (" + train.arrival + ") " + train.destination + " "
} }
route_elem.textContent = text route_elem.textContent = text
} }