Created
April 1, 2011 15:38
-
-
Save ajdamico/898355 to your computer and use it in GitHub Desktop.
introduction to r lecture notes - furman center for real estate and urban policy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#three reasons to use R- | |
#it's free | |
#it's open source- package system | |
#it's a programming language for statistics. | |
x <- 1:5 | |
x | |
length(x) | |
class(x) | |
is.numeric(x) | |
y <- is.numeric(x) | |
is.numeric(y) | |
is.logical(y) | |
w <- data.frame( name_of_person=c("betty","fred","sammy") , high_fiving_ability=c(50,50,100) ) | |
w | |
class(w) | |
is.numeric(w) | |
w[2,] | |
w[,2] | |
w$high_fiving_ability | |
w[,"high_fiving_ability"] | |
w[3,2] | |
w[2,3] | |
is.numeric(w[,2]) | |
is.numeric(w[2,]) | |
nrow(w) | |
ncol(w) | |
names(w) | |
rownames(w) | |
colnames(w) | |
names(w)[1] | |
names(w)[2] | |
length(w[,2]) | |
z <- w[2:3,] | |
z | |
z <- nrow(w) | |
savehistory("C:\\Users\\AnthonyD\\Documents\\example 01.Rhistory") | |
#make a 3 x 5 data table in m. 3 columns and 5 rows. the three columns should be a person's name, sex (0 for male, 1 for female), and person's height in inches | |
#then take their average height | |
#then isolate the data table into another data table - n - of only the females | |
#--together we're going to tack on the person's height in centimeters | |
#use the transform function | |
#and use m[,"cm"] <- m[,"inches"] / 2.3 | |
#download tax class 1 & 2/3/4 data from http://www.nyc.gov/html/dof/html/property/property_val_valuation.shtml | |
x <- read.csv("TC.csv") | |
table( x$BORO ) | |
class(x) | |
head(x) | |
y <- subset( x , EASE != "" ) | |
tapply( x$CUR_FV_T , x$BORO , mean ) | |
summary( x$CUR_FV_T ) | |
z <- subset( x , CUR_FV_T < 2000000 & GR_SQFT < 10000 ) | |
plot( z$CUR_FV_T , z$GR_SQFT ) | |
boxplot( z$CUR_FV_T ~ z$BORO ) | |
#for loop to separate land area by tax classes | |
x <- transform( x , TXCL_1 = substr( TXCL , 1 , 1 ) ) | |
#check tax class recoding worked properly | |
table( x$TXCL , x$TXCL_1 ) | |
#print the number of properties by tax class, 1-4 | |
for ( j in 1:4 ){ | |
print( nrow( subset( x , TXCL_1 == j ) ) ) | |
} | |
#same loop | |
for ( j in unique(x$TXCL_1) ){ | |
print( j ) | |
print( nrow( subset( x , TXCL_1 == j ) ) ) | |
} | |
#for loop to create new table | |
date_built <- data.frame( value_increment = NULL , average_year_built=NULL ) | |
for (i in 1:20){ | |
z <- subset( x , CUR_FV_T >= (i-1)*100000 & CUR_FV_T < i*100000 & YRB > 1800 ) | |
date_built[i,"value_increment"] <- i*100000 | |
date_built[i,"average_year_built"] <- mean(z$YRB) | |
} | |
#glm | |
attach(x) | |
glm( CUR_FV_T ~ factor(BORO) + GR_SQFT + factor(TXCL) ) | |
#download rolling sales data from http://www.nyc.gov/html/dof/html/property/property_val_sales.shtml | |
#merge on other data sets | |
library(gdata) | |
queens <- read.xls("rollingsales_queens.xls",skip=4) | |
names(queens)[1] <- "BORO" | |
TC_queens <- merge( x , queens , by=c("BORO","BLOCK","LOT") , all.y=T ) | |
nrow(queens) | |
nrow(TC_queens) | |
#sql | |
library(sqldf) | |
a <- sqldf("select BORO , BLOCK, LOT , count(*) as count from x where BORO==4 group by BORO, BLOCK, LOT having count>1") | |
unique_boroughs <- sqldf("select distinct BORO from x") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment