Skip to content

Instantly share code, notes, and snippets.

@gasp
Created July 19, 2012 18:09
Show Gist options
  • Save gasp/3145739 to your computer and use it in GitHub Desktop.
Save gasp/3145739 to your computer and use it in GitHub Desktop.
run it with nodejs
//
// split.js
// parse large files by new lines.
// uses buffer for very long lines
//
// Created by gaspard on 2012-07-19.
// wtfpl
//
var fs = require('fs');
var file = __dirname + '/database.diff';
var stat = fs.statSync(file);
var line = 0;
var buffer = {size:2048,lines:[],superfluous:''};
console.log(stat.size+"octets");
var rs = fs.createReadStream(file); //readSream
rs.setEncoding('ascii');
rs.on('data',function(content){
console.log('reading ' + content.length + 'bytes');
buffer.lines = content.split('\n');
console.log(buffer.lines.length + ' lines found');
if(content.length>buffer.size){
console.log(' > new buffer iteration');
rs.pause();
// adding previous garbage stuff to the first stream
buffer.lines[0] = buffer.superfluous + buffer.lines[0];
var ll =buffer.lines.length;
if(ll>1){
//storing superfluous data into the buffer for next iteration
buffer.superfluous += buffer.lines[(ll-1)];
buffer.lines.splice((ll-1),1);
var ws =[]; //initializing several write streams
// should be buffer.lines.length (or ll-1) instead of 4
for(i=0;i<(ll-1); i++){
console.log(' + writing line '+line);
console.log(' it contains '+buffer.lines[i].length + ' bytes');
ws[i] = fs.createWriteStream(file + '-line'+line+'.txt',{flags:'w',mode:'0777'}),
//write is synchronous
ws[i].write(buffer.lines[i]);
line++;
}
}
// let's go for another iteration
rs.resume();
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment