Skip to content

Instantly share code, notes, and snippets.

@marks
Created February 25, 2016 21:07
Show Gist options
  • Save marks/a75772155b82952968db to your computer and use it in GitHub Desktop.
Save marks/a75772155b82952968db to your computer and use it in GitHub Desktop.
SPSS and Stata code for importing a Socrata open dataset using SODA CSV APIs
# Requires official IBM SPSS extension to be installed [https://www.ibm.com/developerworks/community/wikis/home?lang=en#!/wiki/We70df3195ec8_4f95_9773_42e448fa9029/page/Downloads%20for%20IBM%C2%AE%20SPSS%C2%AE%20Statistics]
* Encoding: UTF-8.
BEGIN PROGRAM Python.
# Import python dependencies
import urllib2
import csv
import spss
import re
# set URL to CSV using Socrata API
base_url = "https://mydata.iadb.org/resource/kbuu-6kud.csv"
params = urllib.urlencode({'$limit': 1000000}) # limit to 1 million rows for now
## BEGIN HELPER FUNCTION
# from https://andrewpwheeler.wordpress.com/2014/09/19/turning-data-from-python-into-spss-data/
def SPSSData(data,vars,types,name=None):
VarDict = zip(vars,types) #combining variables and formats into tuples
spss.StartDataStep()
datasetObj = spss.Dataset(name=name)
#appending variables to dataset
for i in VarDict:
datasetObj.varlist.append(i[0],i[1])
#now the data
for j in data:
datasetObj.cases.append(list(j))
spss.EndDataStep()
## END HELPER FUNCTION
# Form request URL and read in as CSV
response = urllib.urlopen(base_url + "?%s" % params)
csv_reader = csv.reader(response)
# Set up variables we will populate
rows_from_csv = []
header_types = []
header_vars = []
# Populate array with CSV's concepts
for row in csv_reader:
rows_from_csv.append(row)
spss.StartDataStep()
MyDatasetObj = spss.Dataset(name=None)
# Create variables for the columns
for column_name in rows_from_csv[0]:
column_name = re.sub(r'\W+', '', column_name)
MyDatasetObj.varlist.append(column_name,50)
header_types.append(50)
header_vars.append(column_name)
spss.EndDataStep()
# Use helper function to import the data
SPSSData(data=rows_from_csv[1:],vars=header_vars,types=header_types,name=None)
END PROGRAM.
clear
. copy "https://mydata.iadb.org/resource/kbuu-6kud.csv?%24limit=100000000" data_from_socrata.csv
. import delimited data_from_socrata
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment