Skip to content

Instantly share code, notes, and snippets.

@au5ton
Created April 22, 2016 02:54
Show Gist options
  • Save au5ton/7075f9b1d48d708a4a5c28fd503c9fe1 to your computer and use it in GitHub Desktop.
Save au5ton/7075f9b1d48d708a4a5c28fd503c9fe1 to your computer and use it in GitHub Desktop.
Pumping out JSON objects
var fs = require('fs');
var bytes = require('bytes');
var sizeof = require('object-sizeof');
var readline = require('readline');
var exec = require('child_process').exec;
var filePath = 'tpb2.json';
var buf = '';
var cat = 0;
var total = 0;
var lineProgress = 0;
var totalLines = fs.statSync(filePath)['size'];
var subCats = [];
var cats = [];
//console.log(fs.statSync(filePath));
console.log('Analyzing...');
exec('wc -l '+filePath, function (error, results) {
totalLines = parseInt(results.trim().split(' ')[0]);
console.log(totalLines+' lines in file.');
console.log('Progress:');
var stream = fs.createReadStream(filePath, {flags: 'r', encoding: 'utf-8'});
stream.on('data', function(d) {
buf += d.toString(); // when data is read, stash it in a string buffer
pump(); // then process the buffer
});
stream.on('end', function() {
console.log('Categorized: '+cat/total+'% ('+cat+'/'+total+')');
console.log('Found categories: '+JSON.stringify(cats));
console.log('Found subcategories: '+JSON.stringify(subCats));
});
function pump() {
var pos;
while ((pos = buf.indexOf('\n')) >= 0) { // keep going while there's a newline somewhere in the buffer
if (pos == 0) { // if there's more than one newline in a row, the buffer will now start with a newline
buf = buf.slice(1); // discard it
continue; // so that the next iteration will start with data
}
processLine(buf.slice(0,pos)); // hand off the line
buf = buf.slice(pos+1); // and slice the processed data off the buffer
}
}
function processLine(line) { // here's where we do something with a line
if (line[line.length-1] == '\r') line=line.substr(0,line.length-1); // discard CR (0x0D)
if (line.length > 0) { // ignore empty lines
var obj = JSON.parse(line); // parse the JSON
if(obj['Subcategory'] !== 'Unknown') {
//console.log(obj['Subcategory']);
if(subCats.indexOf(obj['Subcategory']) == -1) {
subCats.push(obj['Subcategory']);
}
cat++;
}
else if(obj['Category'] !== 'Unknown') {
//console.log(obj['Category']);
if(cats.indexOf(obj['Category']) == -1) {
cats.push(obj['Category']);
}
cat++;
}
total++;
lineProgress++;
readline.clearLine(process.stdout,0);
readline.cursorTo(process.stdout,0);
process.stdout.write('['+(parseInt((lineProgress/totalLines*100)*1000)/1000)+'%]');
//console.log(obj['Subcategory']); // do something with the data here!
}
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment