Skip to content

Instantly share code, notes, and snippets.

@myndzi
Created August 21, 2014 20:46
Show Gist options
  • Save myndzi/384850ecddd39190e9c7 to your computer and use it in GitHub Desktop.
Save myndzi/384850ecddd39190e9c7 to your computer and use it in GitHub Desktop.
'use strict';
var Transform = require('stream').Transform,
inherits = require('util').inherits;
var noBuf = new Buffer(0);
module.exports = factory;
function factory(start, end) {
var start = new BadMatchTable(start),
end = new BadMatchTable(end);
return function () {
return new SpliceStream(start, end);
}
}
function SpliceStream(start, end) {
Transform.call(this);
this.startTable = start;
this.endTable = end;
this.state = 0;
this.prevChunk = noBuf;
this.chunks = [ ];
}
inherits(SpliceStream, Transform);
SpliceStream.prototype._transform = function (chunk, encoding, callback) {
this.searchChunk(chunk, 0, 0);
callback();
}
SpliceStream.prototype.searchChunk = function (chunk, searchIdx) {
var match = 0;
//console.log('state', this.state, 'chunk:', chunk.toString());
switch (this.state) {
// looking for start of data
case 0:
// pathological case -- stored chunk and current chunk together
// are shorter than search string: store their concatenation and
// wait for another chunk
if (this.startTable.length > chunk.length + this.prevChunk.length) {
//console.log('needle longer than haystack:', this.startTable.length, chunk.length + this.prevChunk.length);
this.prevChunk = Buffer.concat([this.prevChunk, chunk]);
break;
}
// look for our starting search pattern
match = this.search(chunk, this.startTable, searchIdx);
//console.log('search returned', match);
if (match === -1) {
// store the current chunk for searching back into in
// case the match spans a seam
this.prevChunk = chunk;
break;
}
// match points to start of match, we want to
// record data starting after match; adjust accordingly
searchIdx = match + this.startTable.length;
// ensure we don't search "backwards" any more
this.prevChunk = noBuf;
this.state++;
this.searchChunk(chunk, match + this.startTable.length);
break;
// recording all data, looking for end of data
case 1:
// look for our ending search pattern
//console.log('searching for end from', searchIdx);
//console.log('searching in', chunk.slice(searchIdx).toString());
match = this.search(chunk, this.endTable, searchIdx);
//console.log('ending search returned', match);
// no match, record everything from searchIdx on
// searchIdx is 0 unless this chunk was also
// the chunk which the start pattern was found in
if (match === -1) {
this.chunks.push(chunk.slice(searchIdx));
break;
}
// match, push the last of the data
this.chunks.push(chunk.slice(searchIdx, match));
this.push(Buffer.concat(this.chunks));
this.chunks.length = 0;
// go back to looking for a beginning
this.state--;
this.searchChunk(chunk, match + this.endTable.length);
// signal end of stream
//this.push(null);
break;
}
};
SpliceStream.prototype.search = function (chunk, table, idx) {
//console.log('searching for', table.needle.toString());//, 'in', chunk.toString());
var prevChunk = this.prevChunk,
needle = table.needle,
last = table.last,
length = table.length,
plen = prevChunk.length,
clen = chunk.length,
// whether to search back into 'this.prevChunk' or not
searchBack = (this.prevChunk !== noBuf);
var i = idx, n = 0, m = 0, f = 0, found = false;
// i = start point of reverse search
// n = current position in needle
// m = current position in chunk / prevChunk
// f = first matched character / bad match lookup index
if (!searchBack) {
// skip the first few characters
i = Math.max(i, last);
}
var toSearch;
//console.log('needle:', needle.toString());
all:
while (i < clen && !found) {
// if we fail a match spanning the seam, toSearch
// can still be prevChunk when we reset the pointer
// for another try; make sure it starts in 'chunk'
toSearch = chunk;
//console.log('searching from chunk @', i, ':', toSearch.slice(i).toString());
m = i;
n = last;
f = toSearch[m];
// match characters backwards from end of needle
//console.log('start comparison', String.fromCharCode(toSearch[m]), String.fromCharCode(needle[n]), m, n);
while (toSearch[m--] === needle[n--]) {
//console.log('comparing', String.fromCharCode(toSearch[m+1]), String.fromCharCode(needle[n+1]), m+1, n+1);
if (n < 0) {
// we ran out of needle to compare, everything matched
//console.log('found a match from', m+1, n+1, 'in', toSearch.toString(), needle.toString());
found = true;
break all;
} else if (m < 0) {
// we ran of the front of the current chunk
if (!searchBack) { break all; }
// we have a previous chunk to search back into
// set the chunk pointer to the end of that chunk
m += plen;
// and set the chunk we're searching in
toSearch = prevChunk;
}
}
// skip ahead as much as we can
i += table[f];
}
// we can get here by completing
if (!found) { return -1; }
if (toSearch === prevChunk) {
// match started in previous chunk, but obviously
// ended in this one; adjust pointer back
m -= plen;
}
// return the offset
return m + 1;
};
function BadMatchTable(needle) {
if (!Buffer.isBuffer(needle)) {
needle = new Buffer(needle);
}
var i, len = needle.length, last = len - 1;
for (i = 0; i < 256; i++) {
this[i] = len;
}
// do not calculate a value for the last character
for (i = 0; i < last; i++) {
this[needle[i]] = last - i;
}
this.needle = needle;
this.length = len;
this.last = last;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment