Created
March 6, 2014 14:13
-
-
Save Snack-X/9390580 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require("fs"); | |
function to_4(i) { return Math.floor(i * 10000) / 10000; } | |
// Variables | |
var CHAR_FREQ = []; | |
var TOTAL_LINE = 0; | |
var TOTAL_CHAR = 0; | |
var TOTAL_KOR_CHAR = 0; | |
var TOTAL_ENG_CHAR = 0; | |
var TOTAL_TIME = 0; | |
var TOTAL_FILE = 0; | |
var MESSAGE_R = /([0-9]{2}):[0-9]{2} <[~&@%+]? *(.*?)> (\| )?(.*)/; | |
var KOR_R = /[ㄱ-ㅣ가-힣]+/; | |
var ENG_R = /[0-9A-Za-z]+/; | |
// -------------------------------------------------- | |
console.log("Log process start."); | |
var START_TIME = Date.now(); | |
// IRC log root folder | |
var log_folder_list = fs.readdirSync("/home/irclog/"); | |
// All channels | |
for(var folder in log_folder_list) { | |
var directory = "/home/irclog/" + log_folder_list[folder] + "/"; | |
var log_file_list = fs.readdirSync("/home/irclog/" + log_folder_list[folder]); | |
// All files | |
for(var file in log_file_list) { | |
var log_file = log_file_list[file]; | |
var log_content = fs.readFileSync(directory + log_file, "utf8"); | |
var log_lines = log_content.split("\n"); | |
TOTAL_FILE += 1; | |
for(var l in log_lines) { | |
var line = log_lines[l]; | |
// This line is message | |
var msg_match = line.match(MESSAGE_R); | |
if(!msg_match) continue; | |
var message = msg_match[4]; | |
TOTAL_LINE += 1; | |
TOTAL_CHAR += message.length; | |
for(var i = 0 ; i < message.length ; i++) { | |
if(CHAR_FREQ[message[i]] === undefined) CHAR_FREQ[message[i]] = 1; | |
else CHAR_FREQ[message[i]] += 1; | |
if(KOR_R.test(message[i])) TOTAL_KOR_CHAR += 1; | |
else if(ENG_R.test(message[i])) TOTAL_ENG_CHAR += 1; | |
} | |
} | |
if(TOTAL_FILE % 250 === 0) console.log(TOTAL_FILE + " files processed."); | |
} | |
} | |
var END_TIME = Date.now(); | |
TOTAL_TIME = (END_TIME - START_TIME) / 1000; | |
console.log("Log process done."); | |
// Sorting | |
var CHAR_TUPLE = []; | |
for(var k in CHAR_FREQ) CHAR_TUPLE.push([k, CHAR_FREQ[k]]); | |
CHAR_TUPLE.sort(function(a, b) { return b[1] - a[1]; }); | |
// Output message | |
var output = [ | |
"Total files - " + TOTAL_FILE | |
, "Total messages - " + TOTAL_LINE | |
, "Total characters - " + TOTAL_CHAR | |
, "Time took to process - " + TOTAL_TIME + " second(s)" | |
, "--------------------------------------------------" | |
, "Rank\tChar\tFreq\t\t%\tC%" | |
]; | |
var output_kor = [ | |
"Total files - " + TOTAL_FILE | |
, "Total messages - " + TOTAL_LINE | |
, "Total korean characters - " + TOTAL_KOR_CHAR | |
, "Time took to process - " + TOTAL_TIME + " second(s)" | |
, "--------------------------------------------------" | |
, "Rank\tChar\tFreq\t\t%\tC%" | |
]; | |
var output_eng = [ | |
"Total files - " + TOTAL_FILE | |
, "Total messages - " + TOTAL_LINE | |
, "Total english characters - " + TOTAL_ENG_CHAR | |
, "Time took to process - " + TOTAL_TIME + " second(s)" | |
, "--------------------------------------------------" | |
, "Rank\tChar\tFreq\t\t%\tC%" | |
]; | |
// Total character rank | |
var rank = 1; | |
var crate = 0; | |
for(var ch in CHAR_TUPLE) { | |
rate = (CHAR_TUPLE[ch][1] / TOTAL_CHAR) * 100; | |
crate += rate; | |
output.push([ | |
rank, "\t" | |
, CHAR_TUPLE[ch][0], "\t" | |
, CHAR_TUPLE[ch][1], "\t\t" | |
, to_4(rate), "\t" | |
, to_4(crate) | |
].join("")); | |
rank += 1; | |
} | |
fs.writeFileSync("output.txt", output.join("\n")); | |
console.log("output.txt generated."); | |
// Korean character rank | |
rank = 1; | |
crate = 0; | |
for(var ch in CHAR_TUPLE) { | |
if(!KOR_R.test(CHAR_TUPLE[ch][0])) continue; | |
rate = (CHAR_TUPLE[ch][1] / TOTAL_KOR_CHAR) * 100; | |
crate += rate; | |
output_kor.push([ | |
rank, "\t" | |
, CHAR_TUPLE[ch][0], "\t" | |
, CHAR_TUPLE[ch][1], "\t\t" | |
, to_4(rate), "\t" | |
, to_4(crate) | |
].join("")); | |
rank += 1; | |
} | |
fs.writeFileSync("output_kor.txt", output_kor.join("\n")); | |
console.log("output_kor.txt generated."); | |
// English character rank | |
rank = 1; | |
crate = 0; | |
for(var ch in CHAR_TUPLE) { | |
if(!ENG_R.test(CHAR_TUPLE[ch][0])) continue; | |
rate = (CHAR_TUPLE[ch][1] / TOTAL_ENG_CHAR) * 100; | |
crate += rate; | |
output_eng.push([ | |
rank, "\t" | |
, CHAR_TUPLE[ch][0], "\t" | |
, CHAR_TUPLE[ch][1], "\t\t" | |
, to_4(rate), "\t" | |
, to_4(crate) | |
].join("")); | |
rank += 1; | |
} | |
fs.writeFileSync("output_eng.txt", output_eng.join("\n")); | |
console.log("output_eng.txt generated."); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment