From 9fbf4fb172c6409220d09974e278581c62fba7b1 Mon Sep 17 00:00:00 2001 From: Emmy D'Anello Date: Mon, 13 Feb 2023 13:12:39 +0100 Subject: [PATCH] Add option to update dataset silently --- app.py | 67 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/app.py b/app.py index 5913a53..7bc0848 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +from contextlib import nullcontext import csv from datetime import date, datetime, time, timedelta import os @@ -59,54 +60,58 @@ class RouteQueue(db.Model): @cli.command("update-dataset") -def update_dataset(): +@click.option('--verbose', '-v', is_flag=True, help="Display errors.") +def update_dataset(verbose: bool = False): """ Query the latest version of the SNCF OpenData dataset, as a CSV file. """ - try: - resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/') - content = resp.content.decode().split('')[0].strip() - content = content.replace('\r', '') - content = content.replace('" \n', '" \\n') - content = content.replace('.\n', '.\\n') - content = content.replace('\n\n \nLa', '\\n\\n \\nLa') - content = content.replace('\n"', '\\n"') + resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/') + content = resp.content.decode().split('')[0].strip() + content = content.replace('\r', '') + content = content.replace('" \n', '" \\n') + content = content.replace('.\n', '.\\n') + content = content.replace('\n\n \nLa', '\\n\\n \\nLa') + content = content.replace('\n"', '\\n"') - info = json.loads(content) - modified_date = datetime.fromisoformat(info['dateModified']) + info = json.loads(content) + modified_date = datetime.fromisoformat(info['dateModified']) - utc = timezone('UTC') - last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile( - 'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc) + utc = timezone('UTC') + last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile( + 'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc) - if last_modified < modified_date: + if last_modified < modified_date: + if verbose: print("Updating tgvmax.csv…") - with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp: - resp.raise_for_status() - with open('tgvmax.csv', 'wb') as f: - with tqdm(unit='io', unit_scale=True) as t: - for chunk in resp.iter_content(chunk_size=512 * 1024): - if chunk: - f.write(chunk) + with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp: + resp.raise_for_status() + with open('tgvmax.csv', 'wb') as f: + with tqdm(unit='io', unit_scale=True) if verbose else nullcontext() as t: + for chunk in resp.iter_content(chunk_size=512 * 1024): + if chunk: + f.write(chunk) + if verbose: t.update(len(chunk)) - os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp())) + os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp())) + if verbose: print("Done") - - print("Last modification:", modified_date) - except Exception as e: - print("An error occured while updating tgvmax.csv") - print(e) + else: + if verbose: + print("Last modification:", modified_date) + exit(2) @cli.command("parse-csv") @click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.") -def parse_trains(flush: bool = False): +@click.option('--verbose', '-v', is_flag=True, help="Display errors.") +def parse_trains(flush: bool = False, verbose: bool = False): """ Parse the CSV file and store it to the database. """ if flush: - print("Flush database…") + if verbose: + print("Flush database…") db.session.query(Train).delete() last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC')) @@ -114,7 +119,7 @@ def parse_trains(flush: bool = False): with open('tgvmax.csv') as f: first_line = True already_seen = set() - for line in tqdm(csv.reader(f, delimiter=';')): + for line in (tqdm if verbose else lambda x: x)(csv.reader(f, delimiter=';')): if first_line: first_line = False continue