Created
May 4, 2015 18:37
-
-
Save technickle/18d066d7278471cc38ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this script parses a CSV download from | |
# https://data.ny.gov/Government-Finance/New-York-State-Locality-Hierarchy-with-Websites/55k6-h6qq | |
# and parses each row's URI to add path, hostname, and global top-level domain columns | |
# the resulting file is written back to the current path | |
require 'uri' | |
require 'CSV' | |
# load an array of (row) arrays from "locals.csv" | |
localgovs = CSV.read("locals.csv") | |
# process each row of the array | |
localgovs.each{ |x| | |
# URI is assumed to be found in 9th column. | |
case x[8] | |
# if this is the column header row, append the 3 new column headers | |
when "Website" | |
x.push("path") | |
x.push("host") | |
x.push("gTLD") | |
# if this row has no website entry, just append 3 blank values to the row | |
when nil | |
x.push("").push("").push("") | |
# otherwise we assume a valid URI is available and process it | |
else | |
x.push(URI(x[8]).path) | |
x.push(URI(x[8]).host) | |
x.push(x.last.split(".").last) | |
end | |
} | |
# write the results to "locals-processed.csv" | |
CSV.open("locals-processed.csv","wb") do |csv| | |
localgovs.each {|x| | |
csv << x | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment