Created
January 15, 2019 16:19
-
-
Save tom-montgomery/1d81aef984824f45cf82d2d6f6c16424 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def gather_socrata_assets(): | |
"""Gathers assets from socrata for metadata generation and returns a list of dictionaries. Dictionaries keys are | |
field names as required by DataCite and values are from Socrata""" | |
global count | |
global assets | |
# limits query to 2000 results | |
url = 'http://api.us.socrata.com/api/catalog/v1?limit=2000&domains=data.austintexas.gov' | |
r = requests.get(url).json() | |
for item in r['results']: | |
try: | |
# retrieve department name by looking for field with Department in name. Domain metadata field order varies | |
for dictionary in item['classification']['domain_metadata']: | |
for key in dictionary.keys(): | |
if 'Department' in dictionary['key']: | |
department = dictionary['value'] | |
# assemble dictionary and append to asset list | |
if item['resource']['type'] == 'dataset': | |
assets.append({'socrata_4x4': item['resource']['id'], | |
'name': item['resource']['name'], | |
'department': department, | |
'type': item['resource']['type'], | |
'year': item['resource']['createdAt'].split('-')[0], | |
'permalink': item['permalink'], | |
'desc': item['resource']['description']}) | |
except IndexError: | |
# count assets that do not have fields above by throwing an IndexError | |
count += 1 | |
return assets |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment