Add option to update dataset silently
This commit is contained in:
		
							
								
								
									
										67
									
								
								app.py
									
									
									
									
									
								
							
							
						
						
									
										67
									
								
								app.py
									
									
									
									
									
								
							@@ -1,5 +1,6 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
from contextlib import nullcontext
 | 
			
		||||
import csv
 | 
			
		||||
from datetime import date, datetime, time, timedelta
 | 
			
		||||
import os
 | 
			
		||||
@@ -59,54 +60,58 @@ class RouteQueue(db.Model):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@cli.command("update-dataset")
 | 
			
		||||
def update_dataset():
 | 
			
		||||
@click.option('--verbose', '-v', is_flag=True, help="Display errors.")
 | 
			
		||||
def update_dataset(verbose: bool = False):
 | 
			
		||||
    """
 | 
			
		||||
    Query the latest version of the SNCF OpenData dataset, as a CSV file.
 | 
			
		||||
    """
 | 
			
		||||
    try:
 | 
			
		||||
        resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/')
 | 
			
		||||
        content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip()
 | 
			
		||||
        content = content.replace('\r', '')
 | 
			
		||||
        content = content.replace('" \n', '" \\n')
 | 
			
		||||
        content = content.replace('.\n', '.\\n')
 | 
			
		||||
        content = content.replace('\n\n \nLa', '\\n\\n \\nLa')
 | 
			
		||||
        content = content.replace('\n"', '\\n"')
 | 
			
		||||
    resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/')
 | 
			
		||||
    content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip()
 | 
			
		||||
    content = content.replace('\r', '')
 | 
			
		||||
    content = content.replace('" \n', '" \\n')
 | 
			
		||||
    content = content.replace('.\n', '.\\n')
 | 
			
		||||
    content = content.replace('\n\n \nLa', '\\n\\n \\nLa')
 | 
			
		||||
    content = content.replace('\n"', '\\n"')
 | 
			
		||||
 | 
			
		||||
        info = json.loads(content)
 | 
			
		||||
        modified_date = datetime.fromisoformat(info['dateModified'])
 | 
			
		||||
    info = json.loads(content)
 | 
			
		||||
    modified_date = datetime.fromisoformat(info['dateModified'])
 | 
			
		||||
 | 
			
		||||
        utc = timezone('UTC')
 | 
			
		||||
        last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile(
 | 
			
		||||
            'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc)
 | 
			
		||||
    utc = timezone('UTC')
 | 
			
		||||
    last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile(
 | 
			
		||||
        'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc)
 | 
			
		||||
 | 
			
		||||
        if last_modified < modified_date:
 | 
			
		||||
    if last_modified < modified_date:
 | 
			
		||||
        if verbose:
 | 
			
		||||
            print("Updating tgvmax.csv…")
 | 
			
		||||
            with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp:
 | 
			
		||||
                resp.raise_for_status()
 | 
			
		||||
                with open('tgvmax.csv', 'wb') as f:
 | 
			
		||||
                    with tqdm(unit='io', unit_scale=True) as t:
 | 
			
		||||
                        for chunk in resp.iter_content(chunk_size=512 * 1024):
 | 
			
		||||
                            if chunk:
 | 
			
		||||
                                f.write(chunk)
 | 
			
		||||
        with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp:
 | 
			
		||||
            resp.raise_for_status()
 | 
			
		||||
            with open('tgvmax.csv', 'wb') as f:
 | 
			
		||||
                with tqdm(unit='io', unit_scale=True) if verbose else nullcontext() as t:
 | 
			
		||||
                    for chunk in resp.iter_content(chunk_size=512 * 1024):
 | 
			
		||||
                        if chunk:
 | 
			
		||||
                            f.write(chunk)
 | 
			
		||||
                            if verbose:
 | 
			
		||||
                                t.update(len(chunk))
 | 
			
		||||
            os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp()))
 | 
			
		||||
        os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp()))
 | 
			
		||||
        if verbose:
 | 
			
		||||
            print("Done")
 | 
			
		||||
 | 
			
		||||
        print("Last modification:", modified_date)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        print("An error occured while updating tgvmax.csv")
 | 
			
		||||
        print(e)
 | 
			
		||||
    else:
 | 
			
		||||
        if verbose:
 | 
			
		||||
            print("Last modification:", modified_date)
 | 
			
		||||
        exit(2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@cli.command("parse-csv")
 | 
			
		||||
@click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.")
 | 
			
		||||
def parse_trains(flush: bool = False):
 | 
			
		||||
@click.option('--verbose', '-v', is_flag=True, help="Display errors.")
 | 
			
		||||
def parse_trains(flush: bool = False, verbose: bool = False):
 | 
			
		||||
    """
 | 
			
		||||
    Parse the CSV file and store it to the database.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    if flush:
 | 
			
		||||
        print("Flush database…")
 | 
			
		||||
        if verbose:
 | 
			
		||||
            print("Flush database…")
 | 
			
		||||
        db.session.query(Train).delete()
 | 
			
		||||
 | 
			
		||||
    last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC'))
 | 
			
		||||
@@ -114,7 +119,7 @@ def parse_trains(flush: bool = False):
 | 
			
		||||
    with open('tgvmax.csv') as f:
 | 
			
		||||
        first_line = True
 | 
			
		||||
        already_seen = set()
 | 
			
		||||
        for line in tqdm(csv.reader(f, delimiter=';')):
 | 
			
		||||
        for line in (tqdm if verbose else lambda x: x)(csv.reader(f, delimiter=';')):
 | 
			
		||||
            if first_line:
 | 
			
		||||
                first_line = False
 | 
			
		||||
                continue
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user