Skip to content

Instantly share code, notes, and snippets.

@Tristramg
Created March 30, 2020 12:33
Show Gist options
  • Save Tristramg/1c673329d14ea3584fc471947d2e50ce to your computer and use it in GitHub Desktop.
Save Tristramg/1c673329d14ea3584fc471947d2e50ce to your computer and use it in GitHub Desktop.
import csv
import json
import os
import shutil
import subprocess
import time
import urllib.request
tartare = "../tartare-tools/target/release/gtfs2netexfr"
url = "https://transport.data.gouv.fr/api/datasets"
with urllib.request.urlopen(url) as f, open('report.csv', 'w', newline='') as csvfile:
text = f.read().decode('utf-8')
datasets = json.loads(text)
report = csv.writer(csvfile)
for dataset in datasets:
publisher = dataset["publisher"]["name"]
print("Processing dataset “{}” ({})".format(dataset["title"], publisher))
for resource in dataset["resources"]:
if resource["format"] != "GTFS" or not resource["url"]:
continue
gtfs = "/tmp/{}.zip".format(resource["content_hash"])
urllib.request.urlretrieve(resource["url"], gtfs)
gtfs_size = os.stat(gtfs).st_size / 1000 / 1000
os.mkdir("/tmp/tartare-out")
start = time.time()
ret = subprocess.run([tartare, "--input", gtfs, "--output", "/tmp/tartare-out", "--participant", publisher])
duration = time.time() - start
if ret.returncode == 0:
netex = "/tmp/tartare-out/netex.zip"
ret = subprocess.run(["zip {} -r /tmp/tartare-out/*".format(netex)], shell=True)
print(ret.args)
netex_size = os.stat(netex).st_size / 1000 / 1000
report.writerow([publisher, dataset["title"], resource["url"], gtfs_size, netex_size, duration])
else:
report.writerow([publisher, dataset["title"], resource["url"], gtfs_size, '', duration])
shutil.rmtree("/tmp/tartare-out")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment