ajdamico · April 1, 2011 15:38
diff --git a/furman intro to r lecture.R b/furman intro to r lecture.R
 #three reasons to use R-

 #it's free

 #it's open source- package system

 #it's a programming language for statistics.

 x <- 1:5

 x

 length(x)

 class(x)

 is.numeric(x)

 y <- is.numeric(x)

 is.numeric(y)

 is.logical(y)

 w <- data.frame( name_of_person=c("betty","fred","sammy") , high_fiving_ability=c(50,50,100) )

 w

 class(w)

 is.numeric(w)

 w[2,]

 w[,2]

 w$high_fiving_ability

 w[,"high_fiving_ability"]

 w[3,2]

 w[2,3]

 is.numeric(w[,2])

 is.numeric(w[2,])

 nrow(w)

 ncol(w)

 names(w)

 rownames(w)

 colnames(w)

 names(w)[1]

 names(w)[2]

 length(w[,2])

 z <- w[2:3,]

 z

 z <- nrow(w)

 savehistory("C:\\Users\\AnthonyD\\Documents\\example 01.Rhistory")

 #make a 3 x 5 data table in m.  3 columns and 5 rows.  the three columns should be a person's name, sex (0 for male, 1 for female), and person's height in inches

 #then take their average height

 #then isolate the data table into another data table - n - of only the females

 #--together we're going to tack on the person's height in centimeters

 #use the transform function
 #and use m[,"cm"] <- m[,"inches"] / 2.3


 #download tax class 1 & 2/3/4 data from http://www.nyc.gov/html/dof/html/property/property_val_valuation.shtml
 x <- read.csv("TC.csv")

 table( x$BORO )

 class(x)

 head(x)

 y <- subset( x , EASE != "" )

 tapply( x$CUR_FV_T , x$BORO , mean )

 summary( x$CUR_FV_T )

 z <- subset( x , CUR_FV_T < 2000000 & GR_SQFT < 10000 )

 plot( z$CUR_FV_T , z$GR_SQFT )

 boxplot( z$CUR_FV_T ~ z$BORO )


 #for loop to separate land area by tax classes
 x <- transform( x , TXCL_1 = substr( TXCL , 1 , 1 ) )

 #check tax class recoding worked properly
 table( x$TXCL , x$TXCL_1 )

 #print the number of properties by tax class, 1-4
 for ( j in 1:4 ){
 	print( nrow( subset( x , TXCL_1 == j ) ) )
 }

 #same loop
 for ( j in unique(x$TXCL_1) ){
 	print( j )
 	print( nrow( subset( x , TXCL_1 == j ) ) )
 }

 #for loop to create new table
 date_built <- data.frame( value_increment = NULL , average_year_built=NULL )
 for (i in 1:20){
 	z <- subset( x , CUR_FV_T >= (i-1)*100000 & CUR_FV_T < i*100000 & YRB > 1800 )
 	date_built[i,"value_increment"] <- i*100000
 	date_built[i,"average_year_built"] <- mean(z$YRB)
 }

 #glm
 attach(x)
 glm( CUR_FV_T ~ factor(BORO) + GR_SQFT + factor(TXCL) )

 #download rolling sales data from http://www.nyc.gov/html/dof/html/property/property_val_sales.shtml
 #merge on other data sets
 library(gdata)
 queens <- read.xls("rollingsales_queens.xls",skip=4)

 names(queens)[1] <- "BORO"
 TC_queens <- merge( x , queens , by=c("BORO","BLOCK","LOT") , all.y=T )

 nrow(queens)
 nrow(TC_queens)

 #sql
 library(sqldf)
 a <- sqldf("select BORO , BLOCK, LOT , count(*) as count from x where BORO==4 group by BORO, BLOCK, LOT having count>1")
 unique_boroughs <- sqldf("select distinct BORO from x")
	#three reasons to use R-

	#it's free

	#it's open source- package system

	#it's a programming language for statistics.

	x <- 1:5

	x

	length(x)

	class(x)

	is.numeric(x)

	y <- is.numeric(x)

	is.numeric(y)

	is.logical(y)

	w <- data.frame( name_of_person=c("betty","fred","sammy") , high_fiving_ability=c(50,50,100) )

	w

	class(w)

	is.numeric(w)

	w[2,]

	w[,2]

	w$high_fiving_ability

	w[,"high_fiving_ability"]

	w[3,2]

	w[2,3]

	is.numeric(w[,2])

	is.numeric(w[2,])

	nrow(w)

	ncol(w)

	names(w)

	rownames(w)

	colnames(w)

	names(w)[1]

	names(w)[2]

	length(w[,2])

	z <- w[2:3,]

	z

	z <- nrow(w)

	savehistory("C:\\Users\\AnthonyD\\Documents\\example 01.Rhistory")

	#make a 3 x 5 data table in m. 3 columns and 5 rows. the three columns should be a person's name, sex (0 for male, 1 for female), and person's height in inches

	#then take their average height

	#then isolate the data table into another data table - n - of only the females

	#--together we're going to tack on the person's height in centimeters

	#use the transform function
	#and use m[,"cm"] <- m[,"inches"] / 2.3


	#download tax class 1 & 2/3/4 data from http://www.nyc.gov/html/dof/html/property/property_val_valuation.shtml
	x <- read.csv("TC.csv")

	table( x$BORO )

	class(x)

	head(x)

	y <- subset( x , EASE != "" )

	tapply( x$CUR_FV_T , x$BORO , mean )

	summary( x$CUR_FV_T )

	z <- subset( x , CUR_FV_T < 2000000 & GR_SQFT < 10000 )

	plot( z$CUR_FV_T , z$GR_SQFT )

	boxplot( z$CUR_FV_T ~ z$BORO )


	#for loop to separate land area by tax classes
	x <- transform( x , TXCL_1 = substr( TXCL , 1 , 1 ) )

	#check tax class recoding worked properly
	table( x$TXCL , x$TXCL_1 )

	#print the number of properties by tax class, 1-4
	for ( j in 1:4 ){
	print( nrow( subset( x , TXCL_1 == j ) ) )
	}

	#same loop
	for ( j in unique(x$TXCL_1) ){
	print( j )
	print( nrow( subset( x , TXCL_1 == j ) ) )
	}

	#for loop to create new table
	date_built <- data.frame( value_increment = NULL , average_year_built=NULL )
	for (i in 1:20){
	z <- subset( x , CUR_FV_T >= (i-1)100000 & CUR_FV_T < i100000 & YRB > 1800 )
	date_built[i,"value_increment"] <- i*100000
	date_built[i,"average_year_built"] <- mean(z$YRB)
	}

	#glm
	attach(x)
	glm( CUR_FV_T ~ factor(BORO) + GR_SQFT + factor(TXCL) )

	#download rolling sales data from http://www.nyc.gov/html/dof/html/property/property_val_sales.shtml
	#merge on other data sets
	library(gdata)
	queens <- read.xls("rollingsales_queens.xls",skip=4)

	names(queens)[1] <- "BORO"
	TC_queens <- merge( x , queens , by=c("BORO","BLOCK","LOT") , all.y=T )

	nrow(queens)
	nrow(TC_queens)

	#sql
	library(sqldf)
	a <- sqldf("select BORO , BLOCK, LOT , count(*) as count from x where BORO==4 group by BORO, BLOCK, LOT having count>1")
	unique_boroughs <- sqldf("select distinct BORO from x")