xiaodaigh · January 22, 2014 12:26
diff --git a/intro.r b/intro.r
 # reading and writing files
 # press ctrl+enter to send 
 iris
 fix(iris)

 write.csv(iris,"c:/temp/iris.csv")
 iris2 = read.csv("c:/temp/iris.csv")
 nrow(iris2)

 # get help
 ?write.csv
 # www.rdocumentations.org

 # set row names
 rownames(iris2) = paste0(letters[seq(1,26,length.out=nrow(iris2))],1:300)

 # extract by rownames 
 iris2["a1","Sepal.Length"]
 iris2[c("a1","a6"),"Sepal.Length"]

 # install packages --l need internet
 install.packages("Hmisc")

 # sequences
 x = 1:100
 y = matrix(x,ncol = 10)

 x[10]
 y[1,9]

 length(x)
 sum(x)
 mean(x)
 cumsum(x)

 #
 d = rnorm(1000)*1000 + 500
 pd = pretty(d)
 cpd = cut(d,pd)
 levels(cpd)
 ff = c("a","b","c")
 ff.f = as.factor(ff)

 # data coersion
 a = c("1.1","2")
 a.nu = as.numeric(a)
 a.c = as.character(a.nu)

 # arrays
 a = c(a,3)
 print(a)
 a = a[-1]
 print(a) 
 a = a[-c(1,2)]
 print(a)

 # TRUE FALSE indexing
 xx = c(1,2,3)
 print(xx)
 x.tf = x[c(TRUE,FALSE,FALSE)]
 x1 = x[!(x>90)]

 # data.frame
 iris2 = rbind(iris, iris) # same as SAS append
 rn = data.frame(random = rnorm(nrow(iris)), another.var = 1:nrow(iris))
 iris2.r = cbind(iris2, rn )

 # sql
 install.packages("sqldf")
 require(sqldf) #y or library(sqldf)
 merged.data = sqldf("select * from dataframe1, datamframe2")

 # lists
 ll = list(a=1,b=rnorm(1000),c =iris)
 str(ll)

 # simple r things
 summary(iris)
 str(iris) # structure of iris
 names(iris) # column names
 hist(iris$Sepal.Length)
 hist(iris$Sepal.Width)
 boxplot(iris$Sepal.Length)
 plot(iris$Petal.Length)
 lines(lowess(1:nrow(iris),iris$Petal.Length))

 # subsetting
 ## select only 2 columns of iris
 iris_2cols <- iris[c("Sepal.Length","Sepal.width")]


 # freqs 
 table(iris$Species)
 # similar to by statement in SAS
 mean.by = by(iris$Petal.Length,iris$Species,mean)
 by(iris$Petal.Length,iris$Species,max)

 first.one = function(x) {
 	x[1]
 }

 by(iris$Petal.Length,iris$Species,first.one)

 iris2$mean.by = as.vector(mean.by[iris$Species])

 first.3.and.last.3 = function(x) {
 	sort(x)[c(1:3,(length(x)-2):length(x))]
 }

 by(iris$Petal.Length,iris$Species,first.3.and.last.3)
 by(iris$Petal.Length,iris$Species,first.3.and.last.3)
	# reading and writing files
	# press ctrl+enter to send
	iris
	fix(iris)

	write.csv(iris,"c:/temp/iris.csv")
	iris2 = read.csv("c:/temp/iris.csv")
	nrow(iris2)

	# get help
	?write.csv
	# www.rdocumentations.org

	# set row names
	rownames(iris2) = paste0(letters[seq(1,26,length.out=nrow(iris2))],1:300)

	# extract by rownames
	iris2["a1","Sepal.Length"]
	iris2[c("a1","a6"),"Sepal.Length"]

	# install packages --l need internet
	install.packages("Hmisc")

	# sequences
	x = 1:100
	y = matrix(x,ncol = 10)

	x[10]
	y[1,9]

	length(x)
	sum(x)
	mean(x)
	cumsum(x)

	#
	d = rnorm(1000)*1000 + 500
	pd = pretty(d)
	cpd = cut(d,pd)
	levels(cpd)
	ff = c("a","b","c")
	ff.f = as.factor(ff)

	# data coersion
	a = c("1.1","2")
	a.nu = as.numeric(a)
	a.c = as.character(a.nu)

	# arrays
	a = c(a,3)
	print(a)
	a = a[-1]
	print(a)
	a = a[-c(1,2)]
	print(a)

	# TRUE FALSE indexing
	xx = c(1,2,3)
	print(xx)
	x.tf = x[c(TRUE,FALSE,FALSE)]
	x1 = x[!(x>90)]

	# data.frame
	iris2 = rbind(iris, iris) # same as SAS append
	rn = data.frame(random = rnorm(nrow(iris)), another.var = 1:nrow(iris))
	iris2.r = cbind(iris2, rn )

	# sql
	install.packages("sqldf")
	require(sqldf) #y or library(sqldf)
	merged.data = sqldf("select * from dataframe1, datamframe2")

	# lists
	ll = list(a=1,b=rnorm(1000),c =iris)
	str(ll)

	# simple r things
	summary(iris)
	str(iris) # structure of iris
	names(iris) # column names
	hist(iris$Sepal.Length)
	hist(iris$Sepal.Width)
	boxplot(iris$Sepal.Length)
	plot(iris$Petal.Length)
	lines(lowess(1:nrow(iris),iris$Petal.Length))

	# subsetting
	## select only 2 columns of iris
	iris_2cols <- iris[c("Sepal.Length","Sepal.width")]


	# freqs
	table(iris$Species)
	# similar to by statement in SAS
	mean.by = by(iris$Petal.Length,iris$Species,mean)
	by(iris$Petal.Length,iris$Species,max)

	first.one = function(x) {
	x[1]
	}

	by(iris$Petal.Length,iris$Species,first.one)

	iris2$mean.by = as.vector(mean.by[iris$Species])

	first.3.and.last.3 = function(x) {
	sort(x)[c(1:3,(length(x)-2):length(x))]
	}

	by(iris$Petal.Length,iris$Species,first.3.and.last.3)
	by(iris$Petal.Length,iris$Species,first.3.and.last.3)
No results found