Skip to content

Instantly share code, notes, and snippets.

@lawrencejones
Last active August 29, 2015 14:04
Show Gist options
  • Save lawrencejones/85dde80a50300e2a9709 to your computer and use it in GitHub Desktop.
Save lawrencejones/85dde80a50300e2a9709 to your computer and use it in GitHub Desktop.
Small CSV filtering script, experimenting with promises

CSV Processor

Small script to help process data from a CSV file. Messaround with promises also.

First part of script is just helpers- skip to Start Filtering to see the body.

fs = require 'fs'
_  = require 'underscore'
Q  = require 'q'
StreamSplitter = require 'stream-splitter'

Given a string line, will generate object from csv.

extractRow = (str) ->
  datum = _.object [
    'noSearches', 'hr', 'dy', 'mn', 'yr', 'time'
  ], str.split ','

Given the data csv file, returns a promise that is resolved with an array of records.

readData = (file = './data.csv') ->

  ds = fs
    .createReadStream(file, flags: 'r')
    .pipe StreamSplitter '\n'
  ds.encoding = 'utf8'

  ds.on 'token', (buf) ->
    records.push\
    ( ( buf .split('\n')
            .filter (line) -> not /^\s*$/.test line
            .map extractRow )... )

  ds.on 'done', -> def.resolve records
  ds.on 'error', (e) -> def.reject e

  # Create an array for the data records
  records = []
  (def = Q.defer()).promise

Reads a line from stdin.

readline = (msg) ->
  process.stdout.write msg if msg?
  process.stdin.resume()
  process.stdin.setEncoding 'utf8'
  process.stdin.once 'data', (data) ->
    def.resolve data.trim()
  (def = Q.defer()).promise

Wrapper around readline to take accept.

yn = ->
  readline.apply arguments
  .then (res) ->
    ys = /^(?:1|t(?:rue)?|y(?:es)?|ok(?:ay)?)$/i
    if not ys.test res
      throw Error 'User declined'
    else res

Produces a function that generates an array filter method, which will filter all duplicate elements from an array of records given equality on the configured key (default 'time').

filterDuplicates = (key = 'time') -> (records) ->
  kv = {}
  kv[v[key]] = v for v in records
  return _.values kv

Outputs readable records

printRecords = (records) ->
  console.log records
  console.log """\n
  Found #{records.length} records in the supplied data.\n"""
  return records

Converts an array of records to CSV string.

csvify = (records) ->
  _.keys(records[0]).join(',') +
  records.reduce ((a,r) -> "#{a}\n#{_.values(r).join ','}"), ''

Offers user chance to write new csv file, returning write promise.

offerCsv = (records) ->

  yn readline 'Would you like to write a new CSV file? '
  .then -> readline '\nPlease enter filename: '

  .then (file = './output.csv') ->
    console.log "Writing to #{file}..."
    Q.nfcall fs.writeFile, file, csvify(records), 'utf8'

  .then -> console.log 'Done!\n'
  .catch (err) -> true

Start filtering

readData()

.then               filterDuplicates 'time'
.then               printRecords
.then               offerCsv

.catch              (err) -> console.log err

.finally            (err) -> console.log 'Exiting.'; process.exit 0
.done()
{
"name": "csv-processing",
"version": "0.0.0",
"description": "Small csv filter script, promise experimenting",
"main": "filter.litcoffee",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"dependencies": {
"underscore": "*",
"q": "*",
"stream-splitter": "*"
},
"author": "Lawrence Jones",
"license": "MIT"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment