Alrecenk · December 26, 2015 01:59
diff --git a/pseudonaivetreelearn.java b/pseudonaivetreelearn.java
 int splitvariable=-1; // split on this variable
 double splitvalue ;//split at this value
 // total positives and negatives used for leaf node probabilities
 int totalpositives,totalnegatives ; 
 Datapoint trainingdata[]; //the training data in this node
 treenode leftnode,rightnode;//This node's children if it's a branch

 //splits this node greedily using approximate information gain
 public void split(){
  double bestscore = Maxvalue ;//lower is better so default is very high number
  for(int k=0;k<inputs;k++){
    Sort(trainingdata, k);//sort on kth input
    //start with everything on the right and nothing on the left
    int Lp=0,Ln=0,Rp=totalpositive,Rn=totalnegative;
    for(int j=0;j<trainingdata.length-1;j++){//can't split after the last data point
      if(trainingdata[j].outputclass){
        Lp++;//a positive just passed from the right side to the left side
        Rp--;
      }else{
        Ln++;//a negative just passed from the right side to the left side
        Rn--;
      }
      //score by a parabola approximating information gain
      double score = Lp * Ln / (double)(Lp + Ln) + Rp * Rn / (double)(Rp + Rn);
      if(score < bestscore){//save the best scoring split
        bestscore = score ;
        splitvariable = k ;
        //split right between the two datapoints
        splitvalue = 0.5*(trainingdata[j].input[k] + trainingdata[j].input[k+1]);
        bestLp=Lp;//store the total counts that child node will want
        bestLn=Ln;
      }
    }
  }
  //separate the data into the left and right sets
  Datapoint[][] splitdata = splitdata(trainingdata, splitvariable, splitvalue);
  //make new nodes which could themselves be split
  leftnode = new treenode(splitdata[0]);
  rightnode = new treenode(splitnode[1]);
 }
	int splitvariable=-1; // split on this variable
	double splitvalue ;//split at this value
	// total positives and negatives used for leaf node probabilities
	int totalpositives,totalnegatives ;
	Datapoint trainingdata[]; //the training data in this node
	treenode leftnode,rightnode;//This node's children if it's a branch

	//splits this node greedily using approximate information gain
	public void split(){
	double bestscore = Maxvalue ;//lower is better so default is very high number
	for(int k=0;k<inputs;k++){
	Sort(trainingdata, k);//sort on kth input
	//start with everything on the right and nothing on the left
	int Lp=0,Ln=0,Rp=totalpositive,Rn=totalnegative;
	for(int j=0;j<trainingdata.length-1;j++){//can't split after the last data point
	if(trainingdata[j].outputclass){
	Lp++;//a positive just passed from the right side to the left side
	Rp--;
	}else{
	Ln++;//a negative just passed from the right side to the left side
	Rn--;
	}
	//score by a parabola approximating information gain
	double score = Lp * Ln / (double)(Lp + Ln) + Rp * Rn / (double)(Rp + Rn);
	if(score < bestscore){//save the best scoring split
	bestscore = score ;
	splitvariable = k ;
	//split right between the two datapoints
	splitvalue = 0.5*(trainingdata[j].input[k] + trainingdata[j].input[k+1]);
	bestLp=Lp;//store the total counts that child node will want
	bestLn=Ln;
	}
	}
	}
	//separate the data into the left and right sets
	Datapoint[][] splitdata = splitdata(trainingdata, splitvariable, splitvalue);
	//make new nodes which could themselves be split
	leftnode = new treenode(splitdata[0]);
	rightnode = new treenode(splitnode[1]);
	}