Last active
November 12, 2015 11:28
-
-
Save divergentdave/df9d3dcebe1f571a1d11 to your computer and use it in GitHub Desktop.
eCFR test harness for citation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
var Citation = require('../citation'); | |
var fs = require('fs'); | |
var sax = require('sax'); | |
var unzip = require('unzip2'); | |
function parserFactory() { | |
var stream = sax.createStream(true, {trim: true}); | |
var in_contents = 0; | |
var title; | |
stream.on('opentag', function(node) { | |
if (node.name == 'CONTENTS') { | |
in_contents++; | |
} | |
}); | |
stream.on('closetag', function(name) { | |
if (name == 'CONTENTS') { | |
in_contents--; | |
} | |
}); | |
stream.on('text', function(text) { | |
if (in_contents != 0) { | |
return; | |
} | |
var tag_name = stream._parser.tag.name; | |
if (tag_name == 'SECTNO') { | |
var input = title + ' CFR ' + text; | |
var output = Citation.find(input).citations; | |
if (output.length == 0) { | |
console.log("Did not parse " + input); | |
} else if (output.length == 1) { | |
if (input == output[0].match) { | |
} else { | |
console.log("Incorrect match found for " + input); | |
} | |
} else { | |
console.log("More than one match found for " + input); | |
console.log(output); | |
} | |
} else if (tag_name == 'TITLENUM') { | |
title = text.replace('Title ', ''); | |
} | |
}); | |
return stream | |
} | |
if (process.argv.length <= 2) { | |
console.error("Usage: Download a bulk data zip file from http://www.gpo.gov/fdsys/bulkdata/CFR, then pass its name as a command line argument"); | |
} | |
process.argv.slice(2).forEach(function(path) { | |
fs.createReadStream(path) | |
.pipe(unzip.Parse()) | |
.on('entry', function(entry) { | |
var name = entry.path; | |
var type = entry.type; | |
if (type == 'File') { | |
console.log(name); | |
var parser = parserFactory(); | |
entry.pipe(parser); | |
} else { | |
entry.autodrain(); | |
} | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment