-
-
Save lancejpollard/11241722 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var test = | |
[ | |
['label0', 100, 200, 300, 400, 500], | |
['label0', 200, 100, 400, 300, 500], | |
['label1', 100, 100, 100, 100, 100], | |
['label1', 100, 100, 100, 100, 100], | |
['label2', 0, 0, 0, 0, 0], | |
['label2', 0, 1, 1, 0, 0], | |
]; | |
var real = require('fs').readFileSync('train.csv', 'utf8'); | |
real = real.split('\n'); | |
real.splice(0, 1); // lop off headers | |
real.splice(real.length - 1, 1); // lop off empty trailer | |
real = real.splice(0, 1000); // shorten train | |
for (var r = 0; r < real.length; r++) { | |
real[r] = real[r].split(',') | |
for (var v = 0; v < real[r].length; v++) { | |
real[r][v] = parseInt(real[r][v]); | |
} | |
} | |
var real_test = require('fs').readFileSync('test.csv', 'utf8'); | |
real_test = real_test.split('\n'); | |
real_test.splice(0, 1); // lop off headers | |
real_test.splice(real_test.length - 1, 1); // lop off empty trailer | |
for (var r = 0; r < real_test.length; r++) { | |
real_test[r] = real_test[r].split(',') | |
for (var v = 0; v < real_test[r].length; v++) { | |
real_test[r][v] = parseInt(real_test[r][v]); | |
} | |
} | |
function train (set, exit_tolerance) { | |
// set vector size to first vector length | |
var vector_size = set[0].length - 1; // less one for label | |
// collect cases by labels | |
var cases = {}; | |
for (var s = 0; s < set.length; s++) { | |
// remove the label from this case | |
var label = set[s].splice(0, 1)[0]; | |
// if we dont already have this label create it | |
if (!cases.hasOwnProperty(label)) cases[label] = []; | |
// add this case to the label | |
cases[label].push(set[s]); | |
} | |
// create the initial program and rating | |
var t = new Date().getTime(); | |
var program = ''; | |
var program_rating = rate_program_on_cases(program, cases); | |
console.log(new Date().getTime() - t); | |
// track fails for logging | |
var fails = 0; | |
// genetic loop while program rating is less than exit tolerance | |
while (program_rating > exit_tolerance) { | |
// create and rate mutant program | |
var mutant_program = create_mutant_program(program, vector_size); | |
var mutant_program_rating = rate_program_on_cases(mutant_program, cases); | |
// if mutant program is better than original program | |
if (mutant_program_rating < program_rating) { | |
// log success | |
console.log('(' + program + ')' + program_rating + '->(' + mutant_program + ')' + mutant_program_rating); | |
// replace program and rating with mutants | |
program = mutant_program; | |
program_rating = mutant_program_rating; | |
} else { | |
// log failure | |
fails++; | |
if (fails == 100000) { | |
console.log('(' + program + ')' + program_rating + '-(' + mutant_program + ')' + mutant_program_rating); | |
fails = 0; | |
} | |
} | |
} | |
// exit tolerance reached! | |
// create scores per label | |
var scores_per_label = {}; | |
// loop through each label in cases | |
for (var label in cases) { | |
// add label to scores per label | |
scores_per_label[label] = []; | |
// loop through each case in this label | |
for (var c = 0; c < cases[label].length; c++) { | |
// get the score for this case, add it to the scores for this label | |
scores_per_label[label].push(run_program_on_case(program, cases[label][c])); | |
} | |
} | |
// return scores per label and program | |
return [scores_per_label, program, cases]; | |
}; | |
function run_multi (program, multi_set, score_set) { | |
// create an array for answers | |
var answers = []; | |
// loop through each set in the multi_set | |
for (var m = 0; m < multi_set.length; m++) { | |
// calculate this set's answer | |
answers.push(run_one(program, multi_set[m], score_set)); | |
} | |
// return answers | |
return answers; | |
}; | |
function run_one (program, set, score_set) { | |
// first we run the program on our set to label | |
var score = run_program_on_case(program, set); | |
// next we find KNN neighbours | |
var confidence = []; | |
// loop through all the labels in our score set | |
for (var label in score_set) { | |
// loop through all score_set cases for this label | |
for (var c = 0; c < score_set[label].length; c++) { | |
// find the KNN distance and add it to the confidence | |
confidence.push([label, Math.abs(score_set[label][c] - score)]); | |
} | |
} | |
// sort the confidence by nearest KNN | |
confidence.sort(function (a, b) { | |
return a[1] - b[1]; | |
}); | |
// knn number (median over knn) | |
var knn = 10; | |
var occ = {}; | |
// loop through top knn confidence to find most occuring (median) | |
for (var c = 0; c < knn && c < confidence.length; c++) { | |
// if we dont already have this label in occurences make it | |
if (!occ.hasOwnProperty(confidence[c][0])) occ[confidence[c][0]] = 0; | |
// increment label occurence | |
occ[confidence[c][0]]++; | |
// NOTE | |
// this incrementation could be weighted mariokart style | |
// maybe a golden ratio? fib? | |
} | |
// find highest occurence | |
var highest = null; | |
var at = null; | |
// loop through all labels in occurences | |
for (var label in occ) { | |
// if we dont yet have a highest or if this label is greater than the highest | |
if (highest == null || occ[label] > at) { | |
// update the highest | |
highest = label; | |
at = occ[label]; | |
} | |
} | |
// return the knn highest | |
return highest; | |
}; | |
function create_mutant_program (program, vector_size) { | |
// create a copy to be the mutant | |
var mutant = '' + program; | |
// mutations made | |
var mutations_made = 0; | |
// make mutation passes till enough stick | |
while (mutant == program || mutations_made < 2) { | |
// randomly decide to add or remove | |
if (mutant.length == 0 || Math.random() > 0.5) { | |
// add gene | |
// randomly choose a simple gene or a numbered gene | |
if (Math.random() > 0.5) { | |
// simple gene | |
// randomly choose a simple gene | |
var simple_genes = 'ajklmnopq'; | |
var gene = simple_genes[Math.floor(Math.random() * simple_genes.length)]; | |
// inject gene at end | |
mutant += gene; | |
// increment mutation counter | |
mutations_made++; | |
} else { | |
// numbered gene | |
// randomly choose a numbered gene | |
var numbered_genes = 'bcdefghi'; | |
var gene = numbered_genes[Math.floor(Math.random() * numbered_genes.length)]; | |
// randomly choose a vector x | |
var x = '' + Math.floor(Math.random() * vector_size); | |
// inject gene at end with vector x | |
mutant += gene + x; | |
// increment mutation counter | |
mutations_made++; | |
} | |
} else { | |
// remove gene | |
// choose a target zone | |
var target = Math.floor(Math.random() * mutant.length); | |
// if our target zone is a number retry mutation | |
if ('0123456789'.indexOf(mutant[target]) != -1) continue; | |
// we hit a letter, find the end of this command | |
var target_end = target; | |
// keep pushing the target end until it reaches the end of command | |
do { | |
// push target end further | |
target_end++; | |
// until we reach over the length (command at end of program) | |
// and until the target end is not on a number | |
} while (target_end < mutant.length && '0123456789'.indexOf(mutant[target_end]) != -1); | |
// remove target to target end | |
mutant = mutant.substr(0, target) + mutant.substr(target, target_end); | |
// increment mutation counter | |
mutations_made++; | |
} | |
} | |
// return the new mutant | |
return mutant; | |
}; | |
function rate_program_on_cases (program, cases) { | |
// create score per label | |
var scores_per_label = {}; | |
// loop through each label in cases | |
for (var label in cases) { | |
// add label to scores per label | |
scores_per_label[label] = []; | |
// loop through each case in this label | |
for (var c = 0; c < cases[label].length; c++) { | |
// get the score for this case, add it to the scores for this label | |
scores_per_label[label].push(run_program_on_case(program, cases[label][c])); | |
} | |
} | |
// create a rating | |
var rating = 0; | |
// a rating represents how close each alike case is, | |
// and how far apart each different case is | |
// sum of distance to alike cases / sum of distance to different case | |
// create register for ratios | |
var ratios = []; | |
// loop through each label left | |
for (var label_l in scores_per_label) { | |
// loop through each case left | |
for (var case_l = 0; case_l < scores_per_label[label_l].length; case_l++) { | |
// create registers for sums | |
var sum_alike = 0; | |
var sum_different = 0; | |
// loop through each label right | |
for (var label_r in scores_per_label) { | |
// loop through each case right | |
for (var case_r = 0; case_r < scores_per_label[label_r].length; case_r++) { | |
// calculate distance | |
var distance = Math.abs(scores_per_label[label_r][case_r] - scores_per_label[label_l][case_l]); | |
// if these cases have the same label | |
if (label_l == label_r) { | |
// add the distance to alike | |
sum_alike += distance; | |
// otherwise they have different labels | |
} else { | |
// add the distance to different | |
sum_different += distance; | |
} | |
} | |
} | |
// calculate and push ratio | |
ratios.push(sum_alike / sum_different); | |
// check for NaN replace with inf | |
if (isNaN(ratios[ratios.length - 1])) ratios[ratios.length - 1] = Infinity; | |
} | |
} | |
// average ratios | |
var ratio_sum = 0; | |
for (var r = 0; r < ratios.length; r++) { | |
ratio_sum += ratios[r]; | |
} | |
ratio_sum /= ratios.length; | |
return ratio_sum; | |
}; | |
function run_program_on_case (program, set) { | |
// copy the program to run it | |
var runner = program + ''; | |
// create the register and the final score | |
var register = 0; | |
var score = 0; | |
// walk through program one step at time | |
while (runner.length > 0) { | |
//console.log('walk with runner(' + runner + ')'); | |
// splice off command at head | |
var command = runner.substr(0, 1); | |
runner = runner.substr(1); | |
//console.log('walk after command(' + runner + ')'); | |
// if there are numerals (vector X reference) trailling command | |
// then grab vector x | |
var vector_x = ''; | |
// while we have a number at the head | |
while (runner.length > 0 && '0123456789'.indexOf(runner[0]) != -1) { | |
// append numeral to vector x and splice from head | |
vector_x += runner.substr(0, 1); | |
runner = runner.substr(1); | |
} | |
//console.log('walk after num(' + runner + ')'); | |
// parse our numeral if we have one | |
if (vector_x.length > 0) vector_x = parseInt(vector_x); | |
// switch based on command | |
switch (command) { | |
case 'a': // register = 0 | |
register = 0; | |
break; | |
case 'b': // register = register + vector x | |
register = register + set[vector_x]; | |
break; | |
case 'c': // register = register - vector x | |
register = register - set[vector_x]; | |
break; | |
case 'd': // register = vector x - register | |
register = set[vector_x] - register; | |
break; | |
case 'e': // register = register * vector x | |
register = register * set[vector_x]; | |
break; | |
case 'f': // register = register / vector x | |
register = register / set[vector_x]; | |
break; | |
case 'g': // register = vector x / register | |
register = set[vector_x] / register; | |
break; | |
case 'h': // register = register % vector x | |
register = register % set[vector_x]; | |
break; | |
case 'i': // register = vector x % register | |
register = set[vector_x] % register; | |
break; | |
case 'j': // score = score + register | |
score = score + register; | |
break; | |
case 'k': // score = score - register | |
score = score - register; | |
break; | |
case 'l': // score = register - score | |
score = register - score; | |
break; | |
case 'm': // score = score * register | |
score = score * register; | |
break; | |
case 'n': // score = score / register | |
score = score / register; | |
break; | |
case 'o': // score = register / score | |
score = register / score; | |
break; | |
case 'p': // score = score % register | |
score = score % register; | |
break; | |
case 'q': // score = register % score | |
score = register % score; | |
break; | |
} | |
//console.log('command(' + command + '),vector_x(' + vector_x + '),set[vector_x](' + set[vector_x] + '),reg(' + register + '),score(' + score + ')'); | |
} | |
//console.log('ran(' + program + '),set(' + set +'),score(' + score + ')'); | |
// return final score | |
return score; | |
}; | |
var score_set_and_prog = train(real, 0.08); | |
// test on a trainer | |
//console.log(run_one(score_set_and_prog[1], score_set_and_prog[2]['0'][0], score_set_and_prog[0])); | |
// run real test, save to readFileSync | |
console.log('running real test...'); | |
var answers = run_multi(score_set_and_prog[1], real_test, score_set_and_prog[0]); | |
// number answers | |
for (var a = 0; a < answers.length; a++) { | |
answers[a] = (a + 1) + ',' + answers[a]; | |
} | |
// seperate by newlines | |
answers = answers.join('\n'); | |
// prepend header | |
answers = 'ImageId,Label\n' + answers; | |
require('fs').writeFileSync('answer' + new Date().getTime() + '.csv', answers, 'utf8'); | |
console.log('done!'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment