Created
November 10, 2013 17:58
-
-
Save cclauss/7401534 to your computer and use it in GitHub Desktop.
austinWaterQuality
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Socrata Open Data datasets are cached locally and printed out. | |
Datasets published by City of Austin @ http://data.AustinTexas.gov | |
""" | |
import collections, contextlib, datetime, json, os.path, time | |
import urllib2 | |
rawData = False # set to False for namedTuples to be printed | |
fmtDict = { 'cityName' : 'austintexas', | |
'dataFormat': 'json', | |
'dataSet' : None } | |
dataSets = { 'b6cd-bhbk': '{} {} {} {}', | |
'fksj-fw68': '{:>7} {} {:<24} {}', | |
'hh3n-3s7c': '{:>7} {} {} {}', | |
'iuw2-kwij': '{} {} {:>9}', | |
'uvma-gv9c': '{} {} {} {}' } | |
dataSets = { 'b6cd-bhbk': '{sample_site_no} {sample_date} {depth_in_meters:<4} {result:<5} {parameter}' } | |
fileNameFmt = '{dataSet}.{dataFormat}.py' | |
urlFmt = 'https://data.{cityName}.gov/api/views/{dataSet}/rows.{dataFormat}' | |
headerFmt = '{attribution} ({category}) - {id}: {name}' | |
def hoursAgo(inHoursAgo = 24): | |
deltaT = datetime.timedelta(hours=inHoursAgo) | |
return time.time() - deltaT.total_seconds() | |
def fileMustBeRefreshed(inFileName): | |
if (os.path.isfile(inFileName) | |
and os.path.getmtime(inFileName) > hoursAgo(24)): | |
return False # file exists and modified in past 24hrs | |
return True | |
def getWebPageSource(inURL): | |
with contextlib.closing(urllib2.urlopen(inURL)) as inFile: | |
return inFile.read() | |
def getDataDict(inFileName): | |
with open(inFileName) as inFile: | |
return json.loads(inFile.read()) | |
def dataRowNamedTuple(inDataDict): | |
dataColumns = [] | |
for columnsDict in inDataDict['meta']['view']['columns']: | |
if 'cachedContents' in columnsDict: | |
dataColumns.append(columnsDict['fieldName']) | |
return collections.namedtuple('dataRow', dataColumns) | |
for dataSet in sorted(dataSets): | |
print('=' * 80) | |
fmtDict['dataSet'] = dataSet | |
fileName = fileNameFmt.format(**fmtDict) | |
if fileMustBeRefreshed(fileName): | |
theURL = urlFmt.format(**fmtDict) | |
print('Writing {} --> {}'.format(theURL, fileName)) | |
with open(fileName, 'w') as outFile: | |
outFile.write(getWebPageSource(theURL)) | |
dataDict = getDataDict(fileName) | |
dataRow = dataRowNamedTuple(dataDict) | |
dictView = dataDict['meta']['view'] | |
print(headerFmt.format(**dictView)) | |
print('> {description}'.format(**dictView)) | |
recordFmt = dataSets[dataSet] | |
for theItem in dataDict['data']: | |
#theDict = dataRow(*theItem[8:])._asdict() | |
print(recordFmt.format(**dataRow(*theItem[8:])._asdict())) | |
# if rawData: | |
# print(recordFmt.format(*theItem[8:])) | |
# else: | |
# print(dataRow(*theItem[8:])) | |
#import pprint; pprint.pprint(dataDict['data']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment