Created
February 25, 2016 21:07
-
-
Save marks/a75772155b82952968db to your computer and use it in GitHub Desktop.
SPSS and Stata code for importing a Socrata open dataset using SODA CSV APIs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requires official IBM SPSS extension to be installed [https://www.ibm.com/developerworks/community/wikis/home?lang=en#!/wiki/We70df3195ec8_4f95_9773_42e448fa9029/page/Downloads%20for%20IBM%C2%AE%20SPSS%C2%AE%20Statistics] | |
* Encoding: UTF-8. | |
BEGIN PROGRAM Python. | |
# Import python dependencies | |
import urllib2 | |
import csv | |
import spss | |
import re | |
# set URL to CSV using Socrata API | |
base_url = "https://mydata.iadb.org/resource/kbuu-6kud.csv" | |
params = urllib.urlencode({'$limit': 1000000}) # limit to 1 million rows for now | |
## BEGIN HELPER FUNCTION | |
# from https://andrewpwheeler.wordpress.com/2014/09/19/turning-data-from-python-into-spss-data/ | |
def SPSSData(data,vars,types,name=None): | |
VarDict = zip(vars,types) #combining variables and formats into tuples | |
spss.StartDataStep() | |
datasetObj = spss.Dataset(name=name) | |
#appending variables to dataset | |
for i in VarDict: | |
datasetObj.varlist.append(i[0],i[1]) | |
#now the data | |
for j in data: | |
datasetObj.cases.append(list(j)) | |
spss.EndDataStep() | |
## END HELPER FUNCTION | |
# Form request URL and read in as CSV | |
response = urllib.urlopen(base_url + "?%s" % params) | |
csv_reader = csv.reader(response) | |
# Set up variables we will populate | |
rows_from_csv = [] | |
header_types = [] | |
header_vars = [] | |
# Populate array with CSV's concepts | |
for row in csv_reader: | |
rows_from_csv.append(row) | |
spss.StartDataStep() | |
MyDatasetObj = spss.Dataset(name=None) | |
# Create variables for the columns | |
for column_name in rows_from_csv[0]: | |
column_name = re.sub(r'\W+', '', column_name) | |
MyDatasetObj.varlist.append(column_name,50) | |
header_types.append(50) | |
header_vars.append(column_name) | |
spss.EndDataStep() | |
# Use helper function to import the data | |
SPSSData(data=rows_from_csv[1:],vars=header_vars,types=header_types,name=None) | |
END PROGRAM. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clear | |
. copy "https://mydata.iadb.org/resource/kbuu-6kud.csv?%24limit=100000000" data_from_socrata.csv | |
. import delimited data_from_socrata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment