Skip to content

Instantly share code, notes, and snippets.

@Alrecenk
Last active December 26, 2015 01:59
Show Gist options
  • Save Alrecenk/7074866 to your computer and use it in GitHub Desktop.
Save Alrecenk/7074866 to your computer and use it in GitHub Desktop.
Pseudocode for a naive implementation of a decision tree learning algorithm.
int splitvariable=-1; // split on this variable
double splitvalue ;//split at this value
// total positives and negatives used for leaf node probabilities
int totalpositives,totalnegatives ;
Datapoint trainingdata[]; //the training data in this node
treenode leftnode,rightnode;//This node's children if it's a branch
//splits this node greedily using approximate information gain
public void split(){
double bestscore = Maxvalue ;//lower is better so default is very high number
for(int k=0;k<inputs;k++){
Sort(trainingdata, k);//sort on kth input
//start with everything on the right and nothing on the left
int Lp=0,Ln=0,Rp=totalpositive,Rn=totalnegative;
for(int j=0;j<trainingdata.length-1;j++){//can't split after the last data point
if(trainingdata[j].outputclass){
Lp++;//a positive just passed from the right side to the left side
Rp--;
}else{
Ln++;//a negative just passed from the right side to the left side
Rn--;
}
//score by a parabola approximating information gain
double score = Lp * Ln / (double)(Lp + Ln) + Rp * Rn / (double)(Rp + Rn);
if(score < bestscore){//save the best scoring split
bestscore = score ;
splitvariable = k ;
//split right between the two datapoints
splitvalue = 0.5*(trainingdata[j].input[k] + trainingdata[j].input[k+1]);
bestLp=Lp;//store the total counts that child node will want
bestLn=Ln;
}
}
}
//separate the data into the left and right sets
Datapoint[][] splitdata = splitdata(trainingdata, splitvariable, splitvalue);
//make new nodes which could themselves be split
leftnode = new treenode(splitdata[0]);
rightnode = new treenode(splitnode[1]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment