Compare commits

..

7 Commits

Author SHA1 Message Date
ae859e0db4 Put content in a container
Signed-off-by: Emmy D'Anello <ynerant@emy.lu>
2023-04-25 15:14:57 +02:00
4332862419 Optimize computation
Signed-off-by: Emmy D'Anello <ynerant@emy.lu>
2023-04-25 15:05:23 +02:00
2004155e51 Multiple changes
Signed-off-by: Emmy D'Anello <ynerant@emy.lu>
2023-04-25 11:54:11 +02:00
089af1db75 Remove progress bar while searching routes 2023-02-19 18:01:16 +01:00
0f61b3bece Replace loading text by a spinner 2023-02-19 17:43:36 +01:00
06f836e13f Use local bootstrap5 files 2023-02-13 13:19:14 +01:00
9fbf4fb172 Add option to update dataset silently 2023-02-13 13:12:39 +01:00
3 changed files with 157 additions and 117 deletions

242
app.py
View File

@ -1,11 +1,13 @@
#!/usr/bin/env python3
from contextlib import nullcontext
import csv
from datetime import date, datetime, time, timedelta
import os
import json
from pytz import timezone
import requests
from time import sleep
import click
from flask import Flask, render_template
@ -59,54 +61,58 @@ class RouteQueue(db.Model):
@cli.command("update-dataset")
def update_dataset():
@click.option('--verbose', '-v', is_flag=True, help="Display errors.")
def update_dataset(verbose: bool = False):
"""
Query the latest version of the SNCF OpenData dataset, as a CSV file.
"""
try:
resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/')
content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip()
content = content.replace('\r', '')
content = content.replace('" \n', '" \\n')
content = content.replace('.\n', '.\\n')
content = content.replace('\n\n \nLa', '\\n\\n \\nLa')
content = content.replace('\n"', '\\n"')
resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/')
content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip()
content = content.replace('\r', '')
content = content.replace('" \n', '" \\n')
content = content.replace('.\n', '.\\n')
content = content.replace('\n\n \nLa', '\\n\\n \\nLa')
content = content.replace('\n"', '\\n"')
info = json.loads(content)
modified_date = datetime.fromisoformat(info['dateModified'])
info = json.loads(content)
modified_date = datetime.fromisoformat(info['dateModified'])
utc = timezone('UTC')
last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile(
'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc)
utc = timezone('UTC')
last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile(
'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc)
if last_modified < modified_date:
if last_modified < modified_date:
if verbose:
print("Updating tgvmax.csv…")
with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp:
resp.raise_for_status()
with open('tgvmax.csv', 'wb') as f:
with tqdm(unit='io', unit_scale=True) as t:
for chunk in resp.iter_content(chunk_size=512 * 1024):
if chunk:
f.write(chunk)
with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp:
resp.raise_for_status()
with open('tgvmax.csv', 'wb') as f:
with tqdm(unit='io', unit_scale=True) if verbose else nullcontext() as t:
for chunk in resp.iter_content(chunk_size=512 * 1024):
if chunk:
f.write(chunk)
if verbose:
t.update(len(chunk))
os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp()))
os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp()))
if verbose:
print("Done")
print("Last modification:", modified_date)
except Exception as e:
print("An error occured while updating tgvmax.csv")
print(e)
else:
if verbose:
print("Last modification:", modified_date)
exit(2)
@cli.command("parse-csv")
@click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.")
def parse_trains(flush: bool = False):
@click.option('--verbose', '-v', is_flag=True, help="Display errors.")
def parse_trains(flush: bool = False, verbose: bool = False):
"""
Parse the CSV file and store it to the database.
"""
if flush:
print("Flush database…")
if verbose:
print("Flush database…")
db.session.query(Train).delete()
last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC'))
@ -114,8 +120,11 @@ def parse_trains(flush: bool = False):
with open('tgvmax.csv') as f:
first_line = True
already_seen = set()
for line in tqdm(csv.reader(f, delimiter=';')):
already_updated = set(x[0] for x in db.session.query(Train).filter(Train.last_modification > last_modification)\
.values(Train.id))
for line in (tqdm if verbose else lambda x: x)(csv.reader(f, delimiter=';')):
if first_line:
# Skip first line
first_line = False
continue
@ -128,6 +137,10 @@ def parse_trains(flush: bool = False):
print("Duplicate:", train_id)
continue
if train_id in already_updated:
# Already updated by the simulator
continue
train = Train(
id=train_id,
day=date.fromisoformat(line[0]),
@ -155,57 +168,69 @@ def parse_trains(flush: bool = False):
db.session.commit()
def find_routes(day: date, origin: str, destination: str | None):
trains = db.session.query(Train).filter_by(day=day, tgvmax=True).all()
def find_routes(day: date | datetime, origin: str, destination: str | None,
verbose: bool = False, min_dep: time = time(0, 0),
explored: dict | None = None):
if isinstance(day, datetime):
day = day.date()
if explored is None:
explored = {}
if origin not in explored:
explored[origin] = (min_dep, None)
valid_routes = []
max_dep = time(23, 59)
else:
max_dep, valid_routes = explored[origin]
if max_dep < min_dep:
# Already parsed these trains
return {destination: valid_routes}
explored[origin] = min_dep, None
trains = db.session.query(Train).filter_by(day=day, tgvmax=True, orig=origin)\
.filter(Train.dep >= min_dep, Train.dep < max_dep).all()
if not trains:
# No train in the requested interval
explored[origin] = (min_dep, valid_routes)
return {destination: valid_routes}
trains.sort(key=lambda train: train.dep)
db.session.commit()
# For better results later, fetch all trains from the origin or to the destination
# This is not exhaustive, but can be a good approximation
queue_routes(day, origin=origin)
if destination:
queue_routes(day, destination=destination)
per_arr_explore = {}
valid_routes = []
for train in tqdm(trains):
if train.orig == origin:
# Update from the TGVMax simulator
queue_route(day, train.orig_iata, train.dest_iata)
it = [train]
if train.dest == destination:
# We hope that we have a direct train
valid_routes.append(it)
else:
per_arr_explore.setdefault(train.dest, [])
per_arr_explore[train.dest].append(it)
continue
for it in list(per_arr_explore.get(train.orig, [])):
if any(train.dest == tr.dest or train.dest == origin for tr in it):
# Avoid loops
for train in (t := tqdm(trains, desc=origin) if verbose else trains):
if train.dest == destination:
# We hope that we have a direct train
valid_routes.append([train])
else:
if train.dest in explored and explored[train.dest][1] is None:
# This is a loop
continue
elif train.arr < min_dep:
# The train is not direct and arrives on the next day, we avoid that
continue
find_routes(day, train.dest, destination, verbose, train.arr, explored)
last_train = it[-1]
# Filter unusuable trains
valid_routes += [[train] + it for it in explored[train.dest][1] if it[0].dep >= train.arr]
if last_train.arr <= train.dep:
# Update from the TGVMax simulator, this line can be useful later
queue_route(day, train.orig_iata, train.dest_iata)
explored[origin] = (min_dep, valid_routes)
new_it = it + [train]
if train.dest == destination:
# Goal is achieved
valid_routes.append(new_it)
else:
per_arr_explore.setdefault(train.dest, [])
per_arr_explore[train.dest].append(new_it)
# Send queued trains to the database
db.session.commit()
return {destination: valid_routes} if destination else per_arr_explore
return {destination: valid_routes} if destination else {}
def queue_route(day: date | datetime, origin: str, destination: str, verbose: bool = False):
# Don't use the decorator to keep the function callable
cli.command('find-routes')(click.argument('day', type=click.DateTime(formats=['%Y-%m-%d']))
(click.argument('origin', type=str)
(click.argument('destination', type=str, default=None)
(click.option('--verbose', '-v', type=bool, is_flag=True, help="Display errors.")
(find_routes)))))
def queue_route(day: date | datetime, origin: str, destination: str, verbose: bool = False, autocommit: bool = True):
"""
Fetch the TGVMax simulator to refresh data.
@ -218,23 +243,14 @@ def queue_route(day: date | datetime, origin: str, destination: str, verbose: bo
if isinstance(day, datetime):
day = day.date()
query = db.session.query(RouteQueue).filter_by(day=day, origin=origin, destination=destination, response_time=None)
query = db.session.query(RouteQueue).filter_by(day=day, origin=origin, destination=destination)\
.filter((RouteQueue.response_time == None) | (RouteQueue.expiration_time >= datetime.now(timezone('UTC'))))
if query.count():
if verbose:
print("Already queued")
return
query = db.session.query(RouteQueue).filter(RouteQueue.day == day,
RouteQueue.origin == origin,
RouteQueue.destination == destination,
RouteQueue.expiration_time >= datetime.now(timezone('UTC')))
if query.count():
if verbose:
print("Using recent value")
return
db.session.add(RouteQueue(day=day, origin=origin, destination=destination))
db.session.commit()
if autocommit:
db.session.commit()
# Don't use the decorator to keep the function callable
@ -246,17 +262,27 @@ cli.command('queue-route')(click.argument('day', type=click.DateTime(formats=['%
def queue_routes(day: date | datetime, origin: str | None = None,
destination: str | None = None, verbose: bool = False):
destination: str | None = None, verbose: bool = False,
autocommit: bool = True):
if isinstance(day, datetime):
day = day.date()
valid_routes = set(db.session.query(RouteQueue).filter_by(day=day)\
.filter((RouteQueue.response_time == None) | (RouteQueue.expiration_time >= datetime.now(timezone('UTC'))))\
.values(RouteQueue.origin, RouteQueue.destination))
query = db.session.query(Train).filter((Train.day == day))
if origin:
query = query.filter((Train.orig_iata == origin) | (Train.orig == origin))
if destination:
query = query.filter((Train.dest_iata == destination) | (Train.dest == destination))
for train in query.all():
queue_route(day, train.orig_iata, train.dest_iata, verbose)
query = query.all()
for train in (t := tqdm(query) if verbose else query):
if verbose:
t.set_description(f"{day}: {train.orig} --> {train.dest}")
if (train.orig_iata, train.dest_iata) not in valid_routes:
queue_route(day, train.orig_iata, train.dest_iata, verbose, autocommit)
valid_routes.add((train.orig_iata, train.dest_iata))
# Same as above
@ -269,27 +295,38 @@ cli.command('queue-routes')(click.argument('day', type=click.DateTime(formats=['
@cli.command('process-queue', help="Process the waiting list to refresh from the simulator.")
@click.argument('number', default=30, type=int)
def process_queue(number: int):
queue = db.session.query(RouteQueue).filter_by(response_time=None).order_by(RouteQueue.queue_time)
@click.option('--verbose', '-v', type=bool, is_flag=True, help="Display errors.")
def process_queue(number: int, verbose: bool = False):
queue = db.session.query(RouteQueue).filter_by(response_time=None).order_by(RouteQueue.queue_time).all()
if number > 0:
queue = queue[:number]
URL = "https://www.maxjeune-tgvinoui.sncf/api/public/refdata/search-freeplaces-proposals"
for req in queue:
if verbose:
query = db.session.query(Train).with_entities(Train.orig_iata, Train.orig).distinct()
iata_to_names = {k: v for (k, v) in query.all()}
for i, req in enumerate(t := tqdm(queue) if verbose else queue):
req: RouteQueue
resp = requests.post(URL, json={
'departureDateTime': req.day.isoformat(),
'origin': req.origin,
'destination': req.destination,
})
if verbose:
t.set_description(f"{req.day:%d/%m/%Y}: {iata_to_names[req.origin]} --> {iata_to_names[req.destination]}")
resp = None
while resp is None or resp.status_code == 429:
resp = requests.post(URL, json={
'departureDateTime': req.day.isoformat(),
'origin': req.origin,
'destination': req.destination,
})
if resp.status_code == 429:
sleep(1)
if resp.status_code == 404:
# No travel found
req.response_time = datetime.now()
req.expiration_time = datetime.now() + timedelta(hours=1)
req.expiration_time = datetime.now() + timedelta(hours=3)
db.session.add(req)
db.session.commit()
continue
resp.raise_for_status()
@ -305,9 +342,11 @@ def process_queue(number: int):
for proposal in data['proposals']:
train = db.session.query(Train).filter_by(day=req.day, number=int(proposal['trainNumber']),
orig_iata=req.origin, dest_iata=req.destination).first()
orig_iata=proposal['origin']['rrCode'],
dest_iata=proposal['destination']['rrCode']).first()
if train is None:
# In a city with multiple stations
print("ERROR")
print(proposal)
continue
train.tgvmax = True
@ -316,7 +355,10 @@ def process_queue(number: int):
train.expiration_time = req.expiration_time
db.session.add(train)
db.session.commit()
if i % 50 == 0:
db.session.commit()
db.session.commit()
@app.get('/')

1
static/bootstrap5 Symbolic link
View File

@ -0,0 +1 @@
/usr/share/javascript/bootstrap5/

View File

@ -2,53 +2,50 @@
<html lang="fr">
<head>
<title>Calculateur TGVMax</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css"
rel="stylesheet"
integrity="sha384-GLhlTQ8iRABdZLl6O3oVMWSktQOp6b7In1Zl3/Jr59b6EGGoI1aFkw7cmDA6j6gD"
crossorigin="anonymous">
<link href="/static/bootstrap5/css/bootstrap.min.css" rel="stylesheet">
</head>
<body>
<form id="form" action="#">
<main class="container">
<form id="form" action="#">
<datalist id="iataCodes">
<option value="Chargement…">
</datalist>
<div class="mb-3">
<label for="origin" class="form-label">Origine :</label>
<input type="text" class="form-control" list="iataCodes" id="origin"
placeholder="Origine…" aria-describedby="originHelp">
placeholder="Origine…" aria-describedby="originHelp">
<input type="hidden" class="form-control" id="originIata">
<div id="originHelp" class="form-text">Le point de départ de votre trajet.</div>
</div>
<div class="mb-3">
<label for="destination" class="form-label">Destination :</label>
<input type="text" class="form-control" list="iataCodes" id="destination"
placeholder="Destination…" aria-describedby="destinationHelp">
placeholder="Destination…" aria-describedby="destinationHelp">
<input type="hidden" class="form-control" id="destinationIata">
<div id="destinationHelp" class="form-text">Le point d'arrivée de votre trajet.</div>
</div>
<div class="mb-3">
<label for="day" class="form-label">Jour de départ :</label>
<input type="date" class="form-control" id="day" aria-describedby="dayHelp"
min="{{ today }}" max="{{ max_day }}" value="{{ today }}">
min="{{ today }}" max="{{ max_day }}" value="{{ today }}">
<div id="dayHelp" class="form-text">Le jour de votre départ.</div>
</div>
<div class="mb-3">
<input type="submit" class="form-control btn btn-primary" value="Rechercher…">
</div>
</form>
<div id="result"></div>
</form>
<div id="result"></div>
</main>
</body>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"
integrity="sha384-w76AqPfDkMBDXo30jS1Sgez6pr3x5MlQ1ZAGC+nuZB+EYdgRZgiwxhTBTkF7CXvN"
crossorigin="anonymous"></script>
<script src="/static/bootstrap5/js/bootstrap.bundle.min.js"></script>
<script>
fetch('/api/iata-codes/').then(res => res.json()).then(out => {
let datalist = document.getElementById('iataCodes')
datalist.innerHTML = ''
for (let iata in out.iata2name) {
let name = out.iata2name[iata]
for (let name in out.name2iata) {
let iata = out.name2iata[name]
let elem = document.createElement('option')
elem.value = name
elem.setAttribute('data-iata', iata)
@ -77,7 +74,7 @@
let day_elem = document.getElementById('day')
let result_elem = document.getElementById('result')
document.getElementById('form').addEventListener('submit', () => {
result_elem.innerHTML = 'Chargement…'
result_elem.innerHTML = '<div class="spinner-border" role="status"><span class="visually-hidden">Chargement…</span></div>'
fetch('/api/routes/' + day_elem.value + '/' + origin_elem.value + '/' + (destination_elem.value || 'undefined') + '/')
.then(resp => resp.json())
.then(routes => {
@ -138,7 +135,7 @@
let text = route[0].origin
for (let train of route) {
text += " (" + train.departure + ") --> (" + train.arrival + ") " + train.destination + ", "
text += " (" + train.departure + ") --> (" + train.arrival + ") " + train.destination + " "
}
route_elem.textContent = text
}