Created
May 26, 2014 13:54
-
-
Save digdeep/b3bf591a2d5e8e026b87 to your computer and use it in GitHub Desktop.
Functions for working with data in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("/YOUR/WORKING/DIRECTORY") | |
file_list <- list.files() | |
## for individual files | |
dataset <- lapply(file_list,FUN=function(files){read.table(files,header=TRUE, sep=",")}) | |
## If your CSV column structure is same across all csv's | |
dataset <- do.call("rbind",lapply(file_list,FUN=function(files){read.table(files,header=TRUE, sep=",")})) | |
temp <- list.files(pattern = "*.csv") | |
## for individual files | |
dataset <- lapply(temp,FUN=function(files){read.table(files,header=TRUE, sep=",")}) | |
dataset[1] ## for specific files of interest, OR | |
## If your CSV column structure is same across all csv's bind them all into 1 file | |
dataset <- do.call("rbind",lapply(temp,FUN=function(files){read.table(files,header=TRUE, sep=",")})) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NAsummary = function (df, include.nan = FALSE) | |
{ | |
newdf = data.frame(col = 1:ncol(df), Count = nrow(df), nNA = sapply(df, | |
function(x) length(x[is.na(x)]))) | |
newdf$rNA = newdf$nNA/newdf$Count | |
newdf$rNA = trunc(newdf$rNA * 10000)/10000 | |
if (include.nan) { | |
newdf$nNan = sapply(df, function(x) length(x[is.nan(x)])) | |
newdf$rNan = newdf$nNan/newdf$Count | |
newdf$rNan = trunc(newdf$rNan * 10000)/10000 | |
} | |
newdf$nUnique = sapply(df, function(x) length(unique(x))) | |
newdf$rUnique = newdf$nUnique/newdf$Count | |
newdf$rUnique = trunc(newdf$rUnique * 10000)/10000 | |
rownames(newdf) = colnames(df) | |
return(newdf) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
spread <- function(x) { | |
n <- length(x) | |
n.med <- (n + 1)/2 | |
n.fourth <- (floor(n.med) + 1)/2 | |
y <- sort(x)[c(floor(n.fourth), ceiling(n.fourth), | |
floor(n+1 - n.fourth), ceiling(n+1 - n.fourth))] | |
return( y %*% c(-1,-1,1,1)/2 ) | |
} | |
years <- floor((1:length(x) - 1) / 12) | |
z <- split(x, years) | |
boxplot(z, names=(min(years):max(years))+2010, ylab="y") | |
#Spread VS Lever Plot | |
z.med <- unlist(lapply(z, median)) | |
z.spread <- unlist(lapply(z, spread)) | |
fit <- lm(log(z.spread) ~ log(z.med)) | |
plot(log(z.med), log(z.spread), xlab="Log Level", ylab="Log Spread", | |
main="Spread vs. Level Plot") | |
abline(fit, lwd=2, col="Red") | |
#LAMBDA | |
lambda <- 1 - coef(fit)[2] | |
boxplot(lapply(z, function(u) u^lambda), names=(min(years):max(years))+2010, | |
ylab=paste("y^", round(lambda, 2), sep=""), | |
main="Boxplots of Re-expressed Values") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment