Skip to content

Instantly share code, notes, and snippets.

@dogrdon
Last active November 7, 2016 16:21
Show Gist options
  • Save dogrdon/cd7b3648efae1cc1d6305690a677dc48 to your computer and use it in GitHub Desktop.
Save dogrdon/cd7b3648efae1cc1d6305690a677dc48 to your computer and use it in GitHub Desktop.
Getting only the official datasets in a socrata portal (or anything that follows the data.json standard - https://project-open-data.cio.gov/v1.1/api/)
import csv, requests
with open('./canonical_datasets_nycopen_07NOV2016.csv', 'w') as w:
writer = csv.writer(w)
writer.writerow(['access', 'issued', 'modified', 'publisher', 'title', 'location', 'identifier', 'id', 'theme', 'description'])
data_location = 'https://nycopendata.socrata.com/data.json'
res = requests.get(data_location)
data = res.json()
datasets = data['dataset']
for d in datasets:
keys = d.keys()
try:
access = d['accessLevel']
issued = d['issued']
modified = d['modified']
publisher = d['publisher']['name']
title = d['title']
location = d['landingPage']
identifier = d['identifier']
dataset_id = location.rsplit('/', 1)[-1]
if 'theme' in keys:
theme = ";".join(d['theme'])
else:
theme = ''
if 'description' in keys:
description = d['description']
else:
description = ''
dataset_res = requests.get(identifier)
dataset_details = dataset_res.json()
view_type = dataset_details['viewType']
if 'displayType' in dataset_details.keys():
display_type = dataset_details['displayType']
else:
display_type = ''
row = [r.encode('utf-8') for r in [access, issued, modified, publisher, title, location, identifier, dataset_id, theme, description, view_type, display_type]]
writer.writerow(row)
except Exception as e:
sys.exit("ERROR on {0} - {1}: {2}".format(d['identifier'], d['title'], e))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment