Add option to update dataset silently
This commit is contained in:
parent
4f326626bf
commit
9fbf4fb172
67
app.py
67
app.py
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from contextlib import nullcontext
|
||||||
import csv
|
import csv
|
||||||
from datetime import date, datetime, time, timedelta
|
from datetime import date, datetime, time, timedelta
|
||||||
import os
|
import os
|
||||||
|
@ -59,54 +60,58 @@ class RouteQueue(db.Model):
|
||||||
|
|
||||||
|
|
||||||
@cli.command("update-dataset")
|
@cli.command("update-dataset")
|
||||||
def update_dataset():
|
@click.option('--verbose', '-v', is_flag=True, help="Display errors.")
|
||||||
|
def update_dataset(verbose: bool = False):
|
||||||
"""
|
"""
|
||||||
Query the latest version of the SNCF OpenData dataset, as a CSV file.
|
Query the latest version of the SNCF OpenData dataset, as a CSV file.
|
||||||
"""
|
"""
|
||||||
try:
|
resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/')
|
||||||
resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/')
|
content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip()
|
||||||
content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip()
|
content = content.replace('\r', '')
|
||||||
content = content.replace('\r', '')
|
content = content.replace('" \n', '" \\n')
|
||||||
content = content.replace('" \n', '" \\n')
|
content = content.replace('.\n', '.\\n')
|
||||||
content = content.replace('.\n', '.\\n')
|
content = content.replace('\n\n \nLa', '\\n\\n \\nLa')
|
||||||
content = content.replace('\n\n \nLa', '\\n\\n \\nLa')
|
content = content.replace('\n"', '\\n"')
|
||||||
content = content.replace('\n"', '\\n"')
|
|
||||||
|
|
||||||
info = json.loads(content)
|
info = json.loads(content)
|
||||||
modified_date = datetime.fromisoformat(info['dateModified'])
|
modified_date = datetime.fromisoformat(info['dateModified'])
|
||||||
|
|
||||||
utc = timezone('UTC')
|
utc = timezone('UTC')
|
||||||
last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile(
|
last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile(
|
||||||
'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc)
|
'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc)
|
||||||
|
|
||||||
if last_modified < modified_date:
|
if last_modified < modified_date:
|
||||||
|
if verbose:
|
||||||
print("Updating tgvmax.csv…")
|
print("Updating tgvmax.csv…")
|
||||||
with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp:
|
with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
with open('tgvmax.csv', 'wb') as f:
|
with open('tgvmax.csv', 'wb') as f:
|
||||||
with tqdm(unit='io', unit_scale=True) as t:
|
with tqdm(unit='io', unit_scale=True) if verbose else nullcontext() as t:
|
||||||
for chunk in resp.iter_content(chunk_size=512 * 1024):
|
for chunk in resp.iter_content(chunk_size=512 * 1024):
|
||||||
if chunk:
|
if chunk:
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
|
if verbose:
|
||||||
t.update(len(chunk))
|
t.update(len(chunk))
|
||||||
os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp()))
|
os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp()))
|
||||||
|
if verbose:
|
||||||
print("Done")
|
print("Done")
|
||||||
|
else:
|
||||||
print("Last modification:", modified_date)
|
if verbose:
|
||||||
except Exception as e:
|
print("Last modification:", modified_date)
|
||||||
print("An error occured while updating tgvmax.csv")
|
exit(2)
|
||||||
print(e)
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command("parse-csv")
|
@cli.command("parse-csv")
|
||||||
@click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.")
|
@click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.")
|
||||||
def parse_trains(flush: bool = False):
|
@click.option('--verbose', '-v', is_flag=True, help="Display errors.")
|
||||||
|
def parse_trains(flush: bool = False, verbose: bool = False):
|
||||||
"""
|
"""
|
||||||
Parse the CSV file and store it to the database.
|
Parse the CSV file and store it to the database.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if flush:
|
if flush:
|
||||||
print("Flush database…")
|
if verbose:
|
||||||
|
print("Flush database…")
|
||||||
db.session.query(Train).delete()
|
db.session.query(Train).delete()
|
||||||
|
|
||||||
last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC'))
|
last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC'))
|
||||||
|
@ -114,7 +119,7 @@ def parse_trains(flush: bool = False):
|
||||||
with open('tgvmax.csv') as f:
|
with open('tgvmax.csv') as f:
|
||||||
first_line = True
|
first_line = True
|
||||||
already_seen = set()
|
already_seen = set()
|
||||||
for line in tqdm(csv.reader(f, delimiter=';')):
|
for line in (tqdm if verbose else lambda x: x)(csv.reader(f, delimiter=';')):
|
||||||
if first_line:
|
if first_line:
|
||||||
first_line = False
|
first_line = False
|
||||||
continue
|
continue
|
||||||
|
|
Loading…
Reference in New Issue