Created
July 19, 2012 18:09
-
-
Save gasp/3145739 to your computer and use it in GitHub Desktop.
run it with nodejs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// split.js | |
// parse large files by new lines. | |
// uses buffer for very long lines | |
// | |
// Created by gaspard on 2012-07-19. | |
// wtfpl | |
// | |
var fs = require('fs'); | |
var file = __dirname + '/database.diff'; | |
var stat = fs.statSync(file); | |
var line = 0; | |
var buffer = {size:2048,lines:[],superfluous:''}; | |
console.log(stat.size+"octets"); | |
var rs = fs.createReadStream(file); //readSream | |
rs.setEncoding('ascii'); | |
rs.on('data',function(content){ | |
console.log('reading ' + content.length + 'bytes'); | |
buffer.lines = content.split('\n'); | |
console.log(buffer.lines.length + ' lines found'); | |
if(content.length>buffer.size){ | |
console.log(' > new buffer iteration'); | |
rs.pause(); | |
// adding previous garbage stuff to the first stream | |
buffer.lines[0] = buffer.superfluous + buffer.lines[0]; | |
var ll =buffer.lines.length; | |
if(ll>1){ | |
//storing superfluous data into the buffer for next iteration | |
buffer.superfluous += buffer.lines[(ll-1)]; | |
buffer.lines.splice((ll-1),1); | |
var ws =[]; //initializing several write streams | |
// should be buffer.lines.length (or ll-1) instead of 4 | |
for(i=0;i<(ll-1); i++){ | |
console.log(' + writing line '+line); | |
console.log(' it contains '+buffer.lines[i].length + ' bytes'); | |
ws[i] = fs.createWriteStream(file + '-line'+line+'.txt',{flags:'w',mode:'0777'}), | |
//write is synchronous | |
ws[i].write(buffer.lines[i]); | |
line++; | |
} | |
} | |
// let's go for another iteration | |
rs.resume(); | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment