Created
August 21, 2014 20:46
-
-
Save myndzi/384850ecddd39190e9c7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
var Transform = require('stream').Transform, | |
inherits = require('util').inherits; | |
var noBuf = new Buffer(0); | |
module.exports = factory; | |
function factory(start, end) { | |
var start = new BadMatchTable(start), | |
end = new BadMatchTable(end); | |
return function () { | |
return new SpliceStream(start, end); | |
} | |
} | |
function SpliceStream(start, end) { | |
Transform.call(this); | |
this.startTable = start; | |
this.endTable = end; | |
this.state = 0; | |
this.prevChunk = noBuf; | |
this.chunks = [ ]; | |
} | |
inherits(SpliceStream, Transform); | |
SpliceStream.prototype._transform = function (chunk, encoding, callback) { | |
this.searchChunk(chunk, 0, 0); | |
callback(); | |
} | |
SpliceStream.prototype.searchChunk = function (chunk, searchIdx) { | |
var match = 0; | |
//console.log('state', this.state, 'chunk:', chunk.toString()); | |
switch (this.state) { | |
// looking for start of data | |
case 0: | |
// pathological case -- stored chunk and current chunk together | |
// are shorter than search string: store their concatenation and | |
// wait for another chunk | |
if (this.startTable.length > chunk.length + this.prevChunk.length) { | |
//console.log('needle longer than haystack:', this.startTable.length, chunk.length + this.prevChunk.length); | |
this.prevChunk = Buffer.concat([this.prevChunk, chunk]); | |
break; | |
} | |
// look for our starting search pattern | |
match = this.search(chunk, this.startTable, searchIdx); | |
//console.log('search returned', match); | |
if (match === -1) { | |
// store the current chunk for searching back into in | |
// case the match spans a seam | |
this.prevChunk = chunk; | |
break; | |
} | |
// match points to start of match, we want to | |
// record data starting after match; adjust accordingly | |
searchIdx = match + this.startTable.length; | |
// ensure we don't search "backwards" any more | |
this.prevChunk = noBuf; | |
this.state++; | |
this.searchChunk(chunk, match + this.startTable.length); | |
break; | |
// recording all data, looking for end of data | |
case 1: | |
// look for our ending search pattern | |
//console.log('searching for end from', searchIdx); | |
//console.log('searching in', chunk.slice(searchIdx).toString()); | |
match = this.search(chunk, this.endTable, searchIdx); | |
//console.log('ending search returned', match); | |
// no match, record everything from searchIdx on | |
// searchIdx is 0 unless this chunk was also | |
// the chunk which the start pattern was found in | |
if (match === -1) { | |
this.chunks.push(chunk.slice(searchIdx)); | |
break; | |
} | |
// match, push the last of the data | |
this.chunks.push(chunk.slice(searchIdx, match)); | |
this.push(Buffer.concat(this.chunks)); | |
this.chunks.length = 0; | |
// go back to looking for a beginning | |
this.state--; | |
this.searchChunk(chunk, match + this.endTable.length); | |
// signal end of stream | |
//this.push(null); | |
break; | |
} | |
}; | |
SpliceStream.prototype.search = function (chunk, table, idx) { | |
//console.log('searching for', table.needle.toString());//, 'in', chunk.toString()); | |
var prevChunk = this.prevChunk, | |
needle = table.needle, | |
last = table.last, | |
length = table.length, | |
plen = prevChunk.length, | |
clen = chunk.length, | |
// whether to search back into 'this.prevChunk' or not | |
searchBack = (this.prevChunk !== noBuf); | |
var i = idx, n = 0, m = 0, f = 0, found = false; | |
// i = start point of reverse search | |
// n = current position in needle | |
// m = current position in chunk / prevChunk | |
// f = first matched character / bad match lookup index | |
if (!searchBack) { | |
// skip the first few characters | |
i = Math.max(i, last); | |
} | |
var toSearch; | |
//console.log('needle:', needle.toString()); | |
all: | |
while (i < clen && !found) { | |
// if we fail a match spanning the seam, toSearch | |
// can still be prevChunk when we reset the pointer | |
// for another try; make sure it starts in 'chunk' | |
toSearch = chunk; | |
//console.log('searching from chunk @', i, ':', toSearch.slice(i).toString()); | |
m = i; | |
n = last; | |
f = toSearch[m]; | |
// match characters backwards from end of needle | |
//console.log('start comparison', String.fromCharCode(toSearch[m]), String.fromCharCode(needle[n]), m, n); | |
while (toSearch[m--] === needle[n--]) { | |
//console.log('comparing', String.fromCharCode(toSearch[m+1]), String.fromCharCode(needle[n+1]), m+1, n+1); | |
if (n < 0) { | |
// we ran out of needle to compare, everything matched | |
//console.log('found a match from', m+1, n+1, 'in', toSearch.toString(), needle.toString()); | |
found = true; | |
break all; | |
} else if (m < 0) { | |
// we ran of the front of the current chunk | |
if (!searchBack) { break all; } | |
// we have a previous chunk to search back into | |
// set the chunk pointer to the end of that chunk | |
m += plen; | |
// and set the chunk we're searching in | |
toSearch = prevChunk; | |
} | |
} | |
// skip ahead as much as we can | |
i += table[f]; | |
} | |
// we can get here by completing | |
if (!found) { return -1; } | |
if (toSearch === prevChunk) { | |
// match started in previous chunk, but obviously | |
// ended in this one; adjust pointer back | |
m -= plen; | |
} | |
// return the offset | |
return m + 1; | |
}; | |
function BadMatchTable(needle) { | |
if (!Buffer.isBuffer(needle)) { | |
needle = new Buffer(needle); | |
} | |
var i, len = needle.length, last = len - 1; | |
for (i = 0; i < 256; i++) { | |
this[i] = len; | |
} | |
// do not calculate a value for the last character | |
for (i = 0; i < last; i++) { | |
this[needle[i]] = last - i; | |
} | |
this.needle = needle; | |
this.length = len; | |
this.last = last; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment