nullren · December 22, 2015 14:49
diff --git a/fake_population.R b/fake_population.R
 # create random scattering of points, but cluster them together so
 # that it at least looks like there are something to seperate or
 # correlate. could have used rmvnorm probably, but did not know before
 # i already finished.

 nCenters <- 2 
 nPointsToGen <- 100
 nNearNeighbor <- 5

 # create 2 sets of 10 points (x,y). these will be the centers of the
 # clusters generated.
 m <- rnorm(4 * nCenters)
 dim(m) <- c(nCenters,4)

 # need to choose the centers at random. we want to generate 100
 # points for each color.
 sb <- sample(nrow(m), nPointsToGen, replace=T)
 sr <- sample(nrow(m), nPointsToGen, replace=T)

 d <- matrix(0, nPointsToGen, 4)

 # loop through the selected centers and generate a new random point
 # close to it.
 for(i in seq(sb)){
  for(j in c(1,2)){
    d[i,j] <- rnorm(1,mean=m[sb[i],j],sd=.2)
  }
  for(j in c(3,4)){
    d[i,j] <- rnorm(1,mean=m[sr[i],j],sd=.2)
  }
 }

 # create data frames with appropriate values (X, Y, 1, Z, color)
 # for plane equation Z = aX + bY + c
 t <- data.frame(d[,1],d[,2], rep(1,nrow(d)), rep(0,nrow(d)), rep("blue",nrow(d)))
 s <- data.frame(d[,3],d[,4], rep(1,nrow(d)), rep(1,nrow(d)), rep("red",nrow(d)))

 colnames(t) <- colnames(s) <- c("x", "y", "1", "value", "color")

 # combine into one data.frame
 points <- merge(t,s,all=T)

 # plot them
 plot(points$x, points$y, col=as.character(points$color))

 # blue circles represent Z = 0 and red Z = 1
 # so then we fit a plane to these points, then draw a line where
 # Z = .5. this should approximately "separate" the red and blue
 # circles.
 X <- as.matrix(points[,1:3])
 Y <- as.matrix(points[,4])
 B <- solve(t(X)%*%X) %*% t(X) %*% Y

 # need the equation of the line Y = bX + a
 #   .5 = X B_1 + Y B_2 + B_3, solve Y
 #   Y  = (-B_1/B_2) X + ((.5 - B_3)/B_2)
 abline(a = (.5 - B[3,1])/B[2,1], b = -B[1,1]/B[2,1])

 # nearest neighbor classifier: color the background of the plot with
 # squares that identify the region.
 ys <- seq(min(points$y),max(points$y),.1)
 xs <- seq(min(points$x),max(points$x),.1)
 hm <- matrix(0, length(ys), length(xs))

 # for each (x,y), find N nearest points
 N <- nNearNeighbor

 for(x in xs){ for(y in ys){
  # find distance to all points
  points$tmp <- apply(points,1,function(row) ((as.numeric(row[1])-x)^2+(as.numeric(row[2])-y)^2))

  # select top N
  t <- head(points[with(points,order(tmp,x,y)),],n=N)

  # color it
  if(mean(t$value)>.5){
    points(x,y, col=rgb(255,0,0,50,maxColorValue=255), pch=15)
  }else{
    points(x,y, col=rgb(0,0,255,50,maxColorValue=255), pch=15)
  }
 }}
	# create random scattering of points, but cluster them together so
	# that it at least looks like there are something to seperate or
	# correlate. could have used rmvnorm probably, but did not know before
	# i already finished.

	nCenters <- 2
	nPointsToGen <- 100
	nNearNeighbor <- 5

	# create 2 sets of 10 points (x,y). these will be the centers of the
	# clusters generated.
	m <- rnorm(4 * nCenters)
	dim(m) <- c(nCenters,4)

	# need to choose the centers at random. we want to generate 100
	# points for each color.
	sb <- sample(nrow(m), nPointsToGen, replace=T)
	sr <- sample(nrow(m), nPointsToGen, replace=T)

	d <- matrix(0, nPointsToGen, 4)

	# loop through the selected centers and generate a new random point
	# close to it.
	for(i in seq(sb)){
	for(j in c(1,2)){
	d[i,j] <- rnorm(1,mean=m[sb[i],j],sd=.2)
	}
	for(j in c(3,4)){
	d[i,j] <- rnorm(1,mean=m[sr[i],j],sd=.2)
	}
	}

	# create data frames with appropriate values (X, Y, 1, Z, color)
	# for plane equation Z = aX + bY + c
	t <- data.frame(d[,1],d[,2], rep(1,nrow(d)), rep(0,nrow(d)), rep("blue",nrow(d)))
	s <- data.frame(d[,3],d[,4], rep(1,nrow(d)), rep(1,nrow(d)), rep("red",nrow(d)))

	colnames(t) <- colnames(s) <- c("x", "y", "1", "value", "color")

	# combine into one data.frame
	points <- merge(t,s,all=T)

	# plot them
	plot(points$x, points$y, col=as.character(points$color))

	# blue circles represent Z = 0 and red Z = 1
	# so then we fit a plane to these points, then draw a line where
	# Z = .5. this should approximately "separate" the red and blue
	# circles.
	X <- as.matrix(points[,1:3])
	Y <- as.matrix(points[,4])
	B <- solve(t(X)%%X) %% t(X) %*% Y

	# need the equation of the line Y = bX + a
	# .5 = X B_1 + Y B_2 + B_3, solve Y
	# Y = (-B_1/B_2) X + ((.5 - B_3)/B_2)
	abline(a = (.5 - B[3,1])/B[2,1], b = -B[1,1]/B[2,1])

	# nearest neighbor classifier: color the background of the plot with
	# squares that identify the region.
	ys <- seq(min(points$y),max(points$y),.1)
	xs <- seq(min(points$x),max(points$x),.1)
	hm <- matrix(0, length(ys), length(xs))

	# for each (x,y), find N nearest points
	N <- nNearNeighbor

	for(x in xs){ for(y in ys){
	# find distance to all points
	points$tmp <- apply(points,1,function(row) ((as.numeric(row[1])-x)^2+(as.numeric(row[2])-y)^2))

	# select top N
	t <- head(points[with(points,order(tmp,x,y)),],n=N)

	# color it
	if(mean(t$value)>.5){
	points(x,y, col=rgb(255,0,0,50,maxColorValue=255), pch=15)
	}else{
	points(x,y, col=rgb(0,0,255,50,maxColorValue=255), pch=15)
	}
	}}
No results found