Skip to content

Instantly share code, notes, and snippets.

@geofferyzh
Created April 16, 2012 20:54
Show Gist options
  • Save geofferyzh/2401442 to your computer and use it in GitHub Desktop.
Save geofferyzh/2401442 to your computer and use it in GitHub Desktop.
RinAction - R Data Manipulation - Subsetting data
#################################################
## Subsetting Dataset ##
#################################################
####################
# Keeping variables
####################
# method 1
newdata <- leadership[, c(6:9)]
# method 2
myvars <- c("q1", "q2", "q3", "q4", "q5")
newdata <- leadership[myvars]
# method 3
myvars <- paste("q", 1:5, sep = "")
newdata <- leadership[myvars]
# method 4 -- using the subset(function)
newdata <- subset(leadership, select=c(6:9))
newdata <- subset(leadership, select=c(q1:q5))
####################
# Dropping variables
####################
# method 1
myvars <- names(leadership) %in% c("q3", "q4")
myvars
newdata <- leadership[!myvars]
# method 2 (if you know the position of the variables to exclude)
newdata <- leadership[c(-7, -8)]
# You could use the following to delete q3 and q4
# from the leadership dataset (commented out so
# the rest of the code in this file will work)
#
# leadership$q3 <- leadership$q4 <- NULL
########################
# Filtering Observations
########################
# method 1
newdata <- leadership[1:3, ]
# method 2
newdata <- leadership[which(leadership$gender == "M" &
leadership$age > 30), ]
newdata
# method 3
attach(leadership)
newdata <- leadership[which(leadership$gender == "M" &
leadership$age > 30), ]
detach(leadership)
# method 4 -- using subset() function
newdata <- subset(leadership, age >= 35 | age < 24)
# Selecting observations based on dates
leadership$date <- as.Date(leadership$date, "%m/%d/%y")
startdate <- as.Date("2009-01-01")
enddate <- as.Date("2009-10-31")
newdata <- leadership[leadership$date >= startdate &
leadership$date <= enddate, ]
########################
# the Subset() function
########################
newdata <- subset(leadership, age >= 35 | age < 24, select = c(q1, q2, q3, q4))
newdata <- subset(leadership, gender == "M" & age > 25, select = gender:q4)
#######################################
# Using SQL to manipulate data frames
#######################################
library(sqldf)
newdf <- sqldf("select * from mtcars where carb=1 order by mpg",
row.names = TRUE)
newdf <- sqldf("select avg(mpg) as avg_mpg, avg(disp) as avg_disp,
gear from mtcars where cyl in (4, 6) group by gear")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment