Last active
December 26, 2015 01:59
-
-
Save Alrecenk/7074866 to your computer and use it in GitHub Desktop.
Pseudocode for a naive implementation of a decision tree learning algorithm.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int splitvariable=-1; // split on this variable | |
double splitvalue ;//split at this value | |
// total positives and negatives used for leaf node probabilities | |
int totalpositives,totalnegatives ; | |
Datapoint trainingdata[]; //the training data in this node | |
treenode leftnode,rightnode;//This node's children if it's a branch | |
//splits this node greedily using approximate information gain | |
public void split(){ | |
double bestscore = Maxvalue ;//lower is better so default is very high number | |
for(int k=0;k<inputs;k++){ | |
Sort(trainingdata, k);//sort on kth input | |
//start with everything on the right and nothing on the left | |
int Lp=0,Ln=0,Rp=totalpositive,Rn=totalnegative; | |
for(int j=0;j<trainingdata.length-1;j++){//can't split after the last data point | |
if(trainingdata[j].outputclass){ | |
Lp++;//a positive just passed from the right side to the left side | |
Rp--; | |
}else{ | |
Ln++;//a negative just passed from the right side to the left side | |
Rn--; | |
} | |
//score by a parabola approximating information gain | |
double score = Lp * Ln / (double)(Lp + Ln) + Rp * Rn / (double)(Rp + Rn); | |
if(score < bestscore){//save the best scoring split | |
bestscore = score ; | |
splitvariable = k ; | |
//split right between the two datapoints | |
splitvalue = 0.5*(trainingdata[j].input[k] + trainingdata[j].input[k+1]); | |
bestLp=Lp;//store the total counts that child node will want | |
bestLn=Ln; | |
} | |
} | |
} | |
//separate the data into the left and right sets | |
Datapoint[][] splitdata = splitdata(trainingdata, splitvariable, splitvalue); | |
//make new nodes which could themselves be split | |
leftnode = new treenode(splitdata[0]); | |
rightnode = new treenode(splitnode[1]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment