Protonk · March 2, 2011 04:59
diff --git a/Bootstrap truncated.R b/Bootstrap truncated.R
 ####Playing with Truncated Normals####

 set.seed(42)
 x<-rnorm(200,0,4)
 y<-x[x>-2]

 plot(density(y),xlim=c(-16,16),main="Truncated Normal Distribution",xlab="x")
 lines(density(x),col=2)
 segments(x0=-2,y0=0,y1=0.15,lty=2,lwd=2)
 legend(6,0.125,legend=c("Original","Truncated","Truncation\nPoint"),col=c(2,1,1),lty=c(1,1,2));

 #Plot CDF because we will need the original CDF for the Inverse Mills Ratio
 plot(ecdf(x),col=2,main="Empirical CDF")
 lines(ecdf(y))
 legend(-10,0.8,legend=c("Original","Truncated"),col=c(2,1),lty=c(1,1))

 #Create 1000 Random normal variates and sample repeatedly
 x.source<-rnorm(1000,0,4)
 s.sample<-matrix(0,200,100)
 for(i in 1:100) {s.sample[,i]<-sample(x.source,200,replace=TRUE)}

 #Trunc Sum is our row means for various outputs from the bootstrap
 trunc.sum<-matrix(0,200,4)
 trunc.sum[,1]<-rowMeans(s.sample)
 for(i in 1:200) {trunc.sum[i,2]<-mean(s.sample[i,s.sample[i,]>-2])}

 #Alpha is basically a scale factor.  It gives us a scale free measure of the
 #truncation point
 alpha<-matrix(0,200,1)
 for (i in 1:200) {alpha[i]<-(-2-trunc.sum[i,1])/sd(s.sample[i,])}

 #This is the whole reason I created the file.  I didn't know of the pdf/cdf used
 #for the Inverse Mills ratio were standardized or not.  The original sd was 4,
 #standardized would be 1
 lambda4<-dnorm(alpha,0,4)/(1-pnorm(alpha,0,4))
 lambda1<-dnorm(alpha,0,1)/(1-pnorm(alpha,0,1))
 #Expected value of truncated distribution for each
 for (i in 1:200) {trunc.sum[i,3]<-trunc.sum[i,1]+sd(s.sample[i,])*lambda4[i]}
 for (i in 1:200) {trunc.sum[i,4]<-trunc.sum[i,1]+sd(s.sample[i,])*lambda1[i]}
 colnames(trunc.sum)<-c("Orig.mean","trunc.mean","sd4.mean","sd1.mean")

 #did this mostly because it looks purdy
 plot(density(s.sample[1,]),xlim=c(-20,20),type="n",main="100 Random Samples",xlab="x",ylim=c(0,0.225))
 for (i in 2:100) {lines(density(s.sample[i,]),lwd=0.2)}
 for (i in 1:100) {lines(density(s.sample[i,s.sample[i,]>-2]),lwd=0.2,col=2)};
 #Here is the meat of it.  I didn't do variance but because delta follows 
 #mechanically from lambda I didn't need to.
 plot(density(trunc.sum[,4]),type="l",xlim=c(-1.2,3.5),main="Comparison of Bootstraped Truncated Means",xlab="Conditional Mean")
 lines(density(trunc.sum[,2]),col=3)
 lines(density(trunc.sum[,3]),col=4)
 trunc.leg<-c("True Truncated Mean","Computed Mean\n with Standardization\n","Computed Mean\n without Standardization")
 legend(x=-1.25,y=1.39,legend=trunc.leg,col=c(1,3,4),lty=c(1,1,1),cex=0.65,bty="n");
	####Playing with Truncated Normals####

	set.seed(42)
	x<-rnorm(200,0,4)
	y<-x[x>-2]

	plot(density(y),xlim=c(-16,16),main="Truncated Normal Distribution",xlab="x")
	lines(density(x),col=2)
	segments(x0=-2,y0=0,y1=0.15,lty=2,lwd=2)
	legend(6,0.125,legend=c("Original","Truncated","Truncation\nPoint"),col=c(2,1,1),lty=c(1,1,2));

	#Plot CDF because we will need the original CDF for the Inverse Mills Ratio
	plot(ecdf(x),col=2,main="Empirical CDF")
	lines(ecdf(y))
	legend(-10,0.8,legend=c("Original","Truncated"),col=c(2,1),lty=c(1,1))

	#Create 1000 Random normal variates and sample repeatedly
	x.source<-rnorm(1000,0,4)
	s.sample<-matrix(0,200,100)
	for(i in 1:100) {s.sample[,i]<-sample(x.source,200,replace=TRUE)}

	#Trunc Sum is our row means for various outputs from the bootstrap
	trunc.sum<-matrix(0,200,4)
	trunc.sum[,1]<-rowMeans(s.sample)
	for(i in 1:200) {trunc.sum[i,2]<-mean(s.sample[i,s.sample[i,]>-2])}

	#Alpha is basically a scale factor. It gives us a scale free measure of the
	#truncation point
	alpha<-matrix(0,200,1)
	for (i in 1:200) {alpha[i]<-(-2-trunc.sum[i,1])/sd(s.sample[i,])}

	#This is the whole reason I created the file. I didn't know of the pdf/cdf used
	#for the Inverse Mills ratio were standardized or not. The original sd was 4,
	#standardized would be 1
	lambda4<-dnorm(alpha,0,4)/(1-pnorm(alpha,0,4))
	lambda1<-dnorm(alpha,0,1)/(1-pnorm(alpha,0,1))
	#Expected value of truncated distribution for each
	for (i in 1:200) {trunc.sum[i,3]<-trunc.sum[i,1]+sd(s.sample[i,])*lambda4[i]}
	for (i in 1:200) {trunc.sum[i,4]<-trunc.sum[i,1]+sd(s.sample[i,])*lambda1[i]}
	colnames(trunc.sum)<-c("Orig.mean","trunc.mean","sd4.mean","sd1.mean")

	#did this mostly because it looks purdy
	plot(density(s.sample[1,]),xlim=c(-20,20),type="n",main="100 Random Samples",xlab="x",ylim=c(0,0.225))
	for (i in 2:100) {lines(density(s.sample[i,]),lwd=0.2)}
	for (i in 1:100) {lines(density(s.sample[i,s.sample[i,]>-2]),lwd=0.2,col=2)};
	#Here is the meat of it. I didn't do variance but because delta follows
	#mechanically from lambda I didn't need to.
	plot(density(trunc.sum[,4]),type="l",xlim=c(-1.2,3.5),main="Comparison of Bootstraped Truncated Means",xlab="Conditional Mean")
	lines(density(trunc.sum[,2]),col=3)
	lines(density(trunc.sum[,3]),col=4)
	trunc.leg<-c("True Truncated Mean","Computed Mean\n with Standardization\n","Computed Mean\n without Standardization")
	legend(x=-1.25,y=1.39,legend=trunc.leg,col=c(1,3,4),lty=c(1,1,1),cex=0.65,bty="n");
No results found