Created
February 21, 2017 15:38
-
-
Save BioSciEconomist/a77f5ccf5b618e1ebeab2bbb6c0ea74c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------ | |
# |PROGRAM NAME: R basic data manipulation | |
# |DATE: 2/20/17 | |
# |CREATED BY: MATT BOGARD | |
# |PROJECT FILE: | |
# |---------------------------------------------------------------- | |
# | PURPOSE: BASIC DATA MANAGEMENT AND STATS IN R | |
# |---------------------------------------------------------------- | |
# create some toy data | |
GARST <- c(150,140,145,137,141,145,149,153,157,161) | |
PIO <- c(160,150,146,138,142,146,150,154,158,162) | |
MYC <- c(137,148,151,139,143,120,115,136,130,129) | |
DEK <- c(150,149,145,140,144,148,152,156,160,164) | |
PLOT <- c(1,2,3,4,5,6,7,8,9,10) | |
BT <- c('Y','Y','N','N','N','N','Y','N','Y','Y') | |
RR <- c('Y','N','Y','N','N','N','N','Y','Y','N') | |
yield_data <- data.frame(GARST,PIO,MYC,DEK,PLOT,BT,RR) | |
#--------------------------- | |
# subsetting data | |
#--------------------------- | |
# subset data via variable selection | |
my_hybrids <- yield_data[ c("GARST", "PIO")] | |
print(my_hybrids) | |
# subset based on variable values | |
high_yields <- yield_data [ yield_data$GARST==150 & yield_data$PIO==160,] | |
print(high_yields) | |
stacked_traits <-yield_data[ yield_data$BT =="Y" & yield_data$RR =="Y",] | |
stacked_traits | |
#-------------------------------------- | |
# creating and adding new variables | |
#-------------------------------------- | |
yield_data$d_grst_pio <- yield_data$GARST - yield_data$PIO | |
#---------------------------------- | |
# conditional processing | |
#---------------------------------- | |
yield_data$GMO <- ifelse(yield_data$BT == 'Y' & yield_data$RR == 'Y','Stacked Trait', | |
ifelse(yield_data$RR == "Y" , 'Single Trait ', | |
ifelse(yield_data$BT =="Y",'Single Trait ', 'Non-GMO '))) | |
#----------------------------------------- | |
# stacking and merging data | |
#----------------------------------------- | |
# create two data sets to stack | |
top <- yield_data [ yield_data$PLOT <= 5,] | |
bottom <- yield_data [ yield_data$PLOT > 5,] | |
stack <- rbind(top,bottom) | |
# create two separate data sources to join | |
my_hybrids <- yield_data[ c("GARST", "PIO")] | |
hybrid <- yield_data[c("GARST","PLOT")] | |
traits <- yield_data[c("GMO","PLOT")] | |
# join this data on PLOT as a key | |
hybrid_traits <-merge(hybrid,traits,by=c("PLOT")) | |
#------------------------------------- | |
# sorting data | |
#------------------------------------ | |
# sort in descending order by prefacing with (-) | |
hybrid_traits <- hybrid_traits[order(-hybrid_traits$PLOT),] | |
# sort ascending by trait and by descending GARST yield_data | |
hybrid_traits <- hybrid_traits[order(hybrid_traits$GMO,-hybrid_traits$GARST),] | |
R basic data manipulation.txt | |
Displaying R basic data manipulation.txt. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment