Skip to content

Instantly share code, notes, and snippets.

@alastaircoote
Last active December 17, 2015 23:08
Show Gist options
  • Save alastaircoote/5686805 to your computer and use it in GitHub Desktop.
Save alastaircoote/5686805 to your computer and use it in GitHub Desktop.
Quick script to parse NYC subway turnstile data
# As the following indicates, this script replies on the csv and async npm modules.
# It reads all the files placed inside a "csvs" directory, in the same directory as this file.
csv = require "csv"
fs = require "fs"
async = require "async"
columns = ["C/A","UNIT","SCP","DATE1","TIME1","DESC1","ENTRIES1","EXITS1","DATE2","TIME2","DESC2","ENTRIES2","EXITS2","DATE3","TIME3","DESC3","ENTRIES3","EXITS3","DATE4","TIME4","DESC4","ENTRIES4","EXITS4","DATE5","TIME5","DESC5","ENTRIES5","EXITS5","DATE6","TIME6","DESC6","ENTRIES6","EXITS6","DATE7","TIME7","DESC7","ENTRIES7","EXITS7","DATE8","TIME8","DESC8","ENTRIES8","EXITS8"]
outputArray = ['"ca","unit","scp","dt","desc","entries", "exits"']
files = fs.readdirSync "./csvs"
if fs.exists "./output.csv" then fs.delete "./output.csv"
str = fs.createWriteStream "./output.csv", {encoding:"UTF8"}
str.write outputArray.join(",")
async.eachSeries files, (f,cb) ->
console.log "Processing #{f}..."
toAdd = []
csv()
.from.path("./csvs/"+f,{columns:columns})
.on "record", (r) ->
for i in [1..8]
if !r["DATE#{i}"] then break
dateSplit = r["DATE#{i}"].split("-")
dt = "2013/#{dateSplit[0]}/#{dateSplit[1]} " + r["TIME#{i}"]
toAdd.push '"' + [r["C/A"], r["UNIT"], r["SCP"],dt,r["DESC#{i}"], parseInt(r["ENTRIES#{i}"]),parseInt(r["EXITS#{i}"])].join('","') + '"'
.on "end", () ->
str.write("\n" + toAdd.join("\n"))
cb()
, () ->
str.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment