Skip to content

Instantly share code, notes, and snippets.

@Snack-X
Created March 6, 2014 14:13
Show Gist options
  • Save Snack-X/9390580 to your computer and use it in GitHub Desktop.
Save Snack-X/9390580 to your computer and use it in GitHub Desktop.
var fs = require("fs");
function to_4(i) { return Math.floor(i * 10000) / 10000; }
// Variables
var CHAR_FREQ = [];
var TOTAL_LINE = 0;
var TOTAL_CHAR = 0;
var TOTAL_KOR_CHAR = 0;
var TOTAL_ENG_CHAR = 0;
var TOTAL_TIME = 0;
var TOTAL_FILE = 0;
var MESSAGE_R = /([0-9]{2}):[0-9]{2} <[~&@%+]? *(.*?)> (\| )?(.*)/;
var KOR_R = /[ㄱ-ㅣ가-힣]+/;
var ENG_R = /[0-9A-Za-z]+/;
// --------------------------------------------------
console.log("Log process start.");
var START_TIME = Date.now();
// IRC log root folder
var log_folder_list = fs.readdirSync("/home/irclog/");
// All channels
for(var folder in log_folder_list) {
var directory = "/home/irclog/" + log_folder_list[folder] + "/";
var log_file_list = fs.readdirSync("/home/irclog/" + log_folder_list[folder]);
// All files
for(var file in log_file_list) {
var log_file = log_file_list[file];
var log_content = fs.readFileSync(directory + log_file, "utf8");
var log_lines = log_content.split("\n");
TOTAL_FILE += 1;
for(var l in log_lines) {
var line = log_lines[l];
// This line is message
var msg_match = line.match(MESSAGE_R);
if(!msg_match) continue;
var message = msg_match[4];
TOTAL_LINE += 1;
TOTAL_CHAR += message.length;
for(var i = 0 ; i < message.length ; i++) {
if(CHAR_FREQ[message[i]] === undefined) CHAR_FREQ[message[i]] = 1;
else CHAR_FREQ[message[i]] += 1;
if(KOR_R.test(message[i])) TOTAL_KOR_CHAR += 1;
else if(ENG_R.test(message[i])) TOTAL_ENG_CHAR += 1;
}
}
if(TOTAL_FILE % 250 === 0) console.log(TOTAL_FILE + " files processed.");
}
}
var END_TIME = Date.now();
TOTAL_TIME = (END_TIME - START_TIME) / 1000;
console.log("Log process done.");
// Sorting
var CHAR_TUPLE = [];
for(var k in CHAR_FREQ) CHAR_TUPLE.push([k, CHAR_FREQ[k]]);
CHAR_TUPLE.sort(function(a, b) { return b[1] - a[1]; });
// Output message
var output = [
"Total files - " + TOTAL_FILE
, "Total messages - " + TOTAL_LINE
, "Total characters - " + TOTAL_CHAR
, "Time took to process - " + TOTAL_TIME + " second(s)"
, "--------------------------------------------------"
, "Rank\tChar\tFreq\t\t%\tC%"
];
var output_kor = [
"Total files - " + TOTAL_FILE
, "Total messages - " + TOTAL_LINE
, "Total korean characters - " + TOTAL_KOR_CHAR
, "Time took to process - " + TOTAL_TIME + " second(s)"
, "--------------------------------------------------"
, "Rank\tChar\tFreq\t\t%\tC%"
];
var output_eng = [
"Total files - " + TOTAL_FILE
, "Total messages - " + TOTAL_LINE
, "Total english characters - " + TOTAL_ENG_CHAR
, "Time took to process - " + TOTAL_TIME + " second(s)"
, "--------------------------------------------------"
, "Rank\tChar\tFreq\t\t%\tC%"
];
// Total character rank
var rank = 1;
var crate = 0;
for(var ch in CHAR_TUPLE) {
rate = (CHAR_TUPLE[ch][1] / TOTAL_CHAR) * 100;
crate += rate;
output.push([
rank, "\t"
, CHAR_TUPLE[ch][0], "\t"
, CHAR_TUPLE[ch][1], "\t\t"
, to_4(rate), "\t"
, to_4(crate)
].join(""));
rank += 1;
}
fs.writeFileSync("output.txt", output.join("\n"));
console.log("output.txt generated.");
// Korean character rank
rank = 1;
crate = 0;
for(var ch in CHAR_TUPLE) {
if(!KOR_R.test(CHAR_TUPLE[ch][0])) continue;
rate = (CHAR_TUPLE[ch][1] / TOTAL_KOR_CHAR) * 100;
crate += rate;
output_kor.push([
rank, "\t"
, CHAR_TUPLE[ch][0], "\t"
, CHAR_TUPLE[ch][1], "\t\t"
, to_4(rate), "\t"
, to_4(crate)
].join(""));
rank += 1;
}
fs.writeFileSync("output_kor.txt", output_kor.join("\n"));
console.log("output_kor.txt generated.");
// English character rank
rank = 1;
crate = 0;
for(var ch in CHAR_TUPLE) {
if(!ENG_R.test(CHAR_TUPLE[ch][0])) continue;
rate = (CHAR_TUPLE[ch][1] / TOTAL_ENG_CHAR) * 100;
crate += rate;
output_eng.push([
rank, "\t"
, CHAR_TUPLE[ch][0], "\t"
, CHAR_TUPLE[ch][1], "\t\t"
, to_4(rate), "\t"
, to_4(crate)
].join(""));
rank += 1;
}
fs.writeFileSync("output_eng.txt", output_eng.join("\n"));
console.log("output_eng.txt generated.");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment