Skip to content

Instantly share code, notes, and snippets.

@morenoh149
Created October 18, 2012 21:41
Show Gist options
  • Save morenoh149/3914917 to your computer and use it in GitHub Desktop.
Save morenoh149/3914917 to your computer and use it in GitHub Desktop.
How do you write a ECDF function for an arbitrary dataset
#!/usr/bin/env/Rscript
squareCDF <- function(n=100){
# create random data
user_id <- seq(1,n)
payment_id <- seq(101,n+100)
payment_amount <- rnorm(n, mean= 100, sd=50)
is_card_present <- sample(0:1, n, replace=T)
data <- data.frame(user_id, payment_id, payment_amount, is_card_present)
# calculate ECDF
less100 <- subset(data, payment_amount < 100)
more100 <- subset(data, payment_amount > 100)
sortedless <- sort(less100$is_card_present)
sortedmore <- sort(more100$is_card_present)
less_nrow <- nrow(less100)
more_nrow <- nrow(more100)
ecdfless <- (1:less_nrow)/less_nrow
ecdfmore <- (1:more_nrow)/more_nrow
# function for returning area left of percentile
cp <- function(percentile, dataPassed){
n <- length(dataPassed)
numerator <- length(dataPassed[dataPassed == 0])
if (percentile * .01 < 1){
return(numerator/n)
} else {
return(1)
}
}
# print ECDFs in text format
print("Users who processed less than $100")
print("percentile % cp")
for (i in 1:100){
cat(sprintf("\"%d\" \"%f\"\n",i, cp(i,sortedless)))
}
print("Users who processed more than $100")
print("percentile % cp")
for (i in 1:100){
cat(sprintf("\"%d\" \"%f\"\n",i, cp(i,sortedmore)))
}
# Plot ECDFs in R
#par(mfrow=c(1,2))
#plot(sortedless, ecdfless, type="n")
#lines(sortedless, ecdfless)
#plot(sortedmore, ecdfmore, type="n")
#lines(sortedmore, ecdfmore)
#plot(ecdf(sortedless))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment