Add option to update dataset silently
This commit is contained in:
		
							
								
								
									
										67
									
								
								app.py
									
									
									
									
									
								
							
							
						
						
									
										67
									
								
								app.py
									
									
									
									
									
								
							| @@ -1,5 +1,6 @@ | |||||||
| #!/usr/bin/env python3 | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
|  | from contextlib import nullcontext | ||||||
| import csv | import csv | ||||||
| from datetime import date, datetime, time, timedelta | from datetime import date, datetime, time, timedelta | ||||||
| import os | import os | ||||||
| @@ -59,54 +60,58 @@ class RouteQueue(db.Model): | |||||||
|  |  | ||||||
|  |  | ||||||
| @cli.command("update-dataset") | @cli.command("update-dataset") | ||||||
| def update_dataset(): | @click.option('--verbose', '-v', is_flag=True, help="Display errors.") | ||||||
|  | def update_dataset(verbose: bool = False): | ||||||
|     """ |     """ | ||||||
|     Query the latest version of the SNCF OpenData dataset, as a CSV file. |     Query the latest version of the SNCF OpenData dataset, as a CSV file. | ||||||
|     """ |     """ | ||||||
|     try: |     resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/') | ||||||
|         resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/') |     content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip() | ||||||
|         content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip() |     content = content.replace('\r', '') | ||||||
|         content = content.replace('\r', '') |     content = content.replace('" \n', '" \\n') | ||||||
|         content = content.replace('" \n', '" \\n') |     content = content.replace('.\n', '.\\n') | ||||||
|         content = content.replace('.\n', '.\\n') |     content = content.replace('\n\n \nLa', '\\n\\n \\nLa') | ||||||
|         content = content.replace('\n\n \nLa', '\\n\\n \\nLa') |     content = content.replace('\n"', '\\n"') | ||||||
|         content = content.replace('\n"', '\\n"') |  | ||||||
|  |  | ||||||
|         info = json.loads(content) |     info = json.loads(content) | ||||||
|         modified_date = datetime.fromisoformat(info['dateModified']) |     modified_date = datetime.fromisoformat(info['dateModified']) | ||||||
|  |  | ||||||
|         utc = timezone('UTC') |     utc = timezone('UTC') | ||||||
|         last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile( |     last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile( | ||||||
|             'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc) |         'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc) | ||||||
|  |  | ||||||
|         if last_modified < modified_date: |     if last_modified < modified_date: | ||||||
|  |         if verbose: | ||||||
|             print("Updating tgvmax.csv…") |             print("Updating tgvmax.csv…") | ||||||
|             with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp: |         with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp: | ||||||
|                 resp.raise_for_status() |             resp.raise_for_status() | ||||||
|                 with open('tgvmax.csv', 'wb') as f: |             with open('tgvmax.csv', 'wb') as f: | ||||||
|                     with tqdm(unit='io', unit_scale=True) as t: |                 with tqdm(unit='io', unit_scale=True) if verbose else nullcontext() as t: | ||||||
|                         for chunk in resp.iter_content(chunk_size=512 * 1024): |                     for chunk in resp.iter_content(chunk_size=512 * 1024): | ||||||
|                             if chunk: |                         if chunk: | ||||||
|                                 f.write(chunk) |                             f.write(chunk) | ||||||
|  |                             if verbose: | ||||||
|                                 t.update(len(chunk)) |                                 t.update(len(chunk)) | ||||||
|             os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp())) |         os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp())) | ||||||
|  |         if verbose: | ||||||
|             print("Done") |             print("Done") | ||||||
|  |     else: | ||||||
|         print("Last modification:", modified_date) |         if verbose: | ||||||
|     except Exception as e: |             print("Last modification:", modified_date) | ||||||
|         print("An error occured while updating tgvmax.csv") |         exit(2) | ||||||
|         print(e) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @cli.command("parse-csv") | @cli.command("parse-csv") | ||||||
| @click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.") | @click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.") | ||||||
| def parse_trains(flush: bool = False): | @click.option('--verbose', '-v', is_flag=True, help="Display errors.") | ||||||
|  | def parse_trains(flush: bool = False, verbose: bool = False): | ||||||
|     """ |     """ | ||||||
|     Parse the CSV file and store it to the database. |     Parse the CSV file and store it to the database. | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     if flush: |     if flush: | ||||||
|         print("Flush database…") |         if verbose: | ||||||
|  |             print("Flush database…") | ||||||
|         db.session.query(Train).delete() |         db.session.query(Train).delete() | ||||||
|  |  | ||||||
|     last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC')) |     last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC')) | ||||||
| @@ -114,7 +119,7 @@ def parse_trains(flush: bool = False): | |||||||
|     with open('tgvmax.csv') as f: |     with open('tgvmax.csv') as f: | ||||||
|         first_line = True |         first_line = True | ||||||
|         already_seen = set() |         already_seen = set() | ||||||
|         for line in tqdm(csv.reader(f, delimiter=';')): |         for line in (tqdm if verbose else lambda x: x)(csv.reader(f, delimiter=';')): | ||||||
|             if first_line: |             if first_line: | ||||||
|                 first_line = False |                 first_line = False | ||||||
|                 continue |                 continue | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user