Last active
August 29, 2015 14:21
-
-
Save Gooseus/f499ae90889694965c89 to your computer and use it in GitHub Desktop.
Finding Frequent Pairs in Lists of Items
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
/***** | |
Node Pair Stream | |
by Shawn Marincas | |
Consumes a CSV as a stream of item sets and outputs lines of comma-separated pairs of items that appear on the specified number of lines | |
- input : Stream input source, either a filename or defaults to STDIN for pipes | |
- threshold : Number of lines a pairs must appear on before being emitted | |
******/ | |
var input = input = process.argv[2] ? require("fs").createReadStream(process.argv[2]) : process.stdin, | |
threshold = process.argv[3] ? parseInt(process.argv[3],10) : 50, | |
// our line emitter, sweet built-in node module I didn't know about until today | |
reader = require("readline").createInterface({ | |
input: input, | |
terminal: false | |
}), | |
// holds ours pair counts | |
counters = {}; | |
// our main algorithm | |
function accumulate_pairs(set) { | |
var size = set.length, | |
i = 0, | |
j = 0; | |
if(size>1) { | |
for(; i<size; i++) { | |
for(; j<size; j++) { | |
// skip if the artists are the same | |
if(set[i]==set[j]) { | |
continue; | |
} | |
// do a quick alphabetical check to avoid counting reversed pairs | |
var pair = set[i] > set[j] ? set[i]+','+set[j] : set[j]+','+set[i]; | |
// either create our pair, or increment it; | |
counters[pair] = counters[pair] ? counters[pair]+1 : 1; | |
// if we've hit our threshold, we can emit it, it's ok | |
if(counters[pair] == threshold) { | |
// write the pair to stdout | |
process.stdout.write(pair+'\n'); | |
} | |
} | |
} | |
} | |
} | |
// called for each line emitted by text stream | |
function consume_line(line) { | |
var current = line.toString().split(","); | |
accumulate_pairs(current); | |
} | |
// listen to consume emitted lines and exit process with closed stream | |
reader.on("line", consume_line); | |
reader.on("close", process.exit); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment