Created
June 17, 2015 07:54
-
-
Save whilo/ed2db43d8bc0aa7849bb to your computer and use it in GitHub Desktop.
Some R examples
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
click1 <- function(){ | |
x <- runif(1);y <- runif(1) | |
plot(x = x, y = y, xlim = c(0, 1), ylim = c(0, 1), | |
main = "Bitte auf den Punkt klicken", | |
xlab = '', ylab = '', | |
axes = FALSE, frame.plot = TRUE) | |
clicktime <- system.time(xyclick <- locator(1)) | |
c(timestamp = Sys.time(), | |
x = x, y = y, | |
xclick = xyclick$x, yclick = xyclick$y, | |
tclick = clicktime[3]) | |
} | |
# 1 click wegwerfen, 4-5 einfuehrungsclicks | |
# lm modell, laufende nummer des clicks muss aufgenommen werden | |
# besserer vergleich als qqplot ist linearisieren | |
# vergleich gerade | |
click <- function(count) { | |
# 5 einfuehrungsklicks | |
initClicks <- Map(function(i) {c(index=i, click1())}, 0:5) | |
#clicks <- Map(function(i) { c(i, click1())}, 1:count) | |
clicks <- list() | |
lastClick <- tail(initClicks,1)[[1]] # truely elegant ... | |
prevX <- lastClick["x"] | |
prevY <- lastClick["y"] | |
for (i in 1:count) { | |
click <- c(index=i, click1()) | |
xclick <- click["xclick"] | |
yclick <- click["yclick"] | |
click <- c(click, dist=(click["x"] - xclick)**2 + (click["y"] - yclick)**2) | |
click <- c(click, way=(xclick-prevX)**2 + (yclick - prevY)**2) | |
clicks <- append(clicks, list(click)) | |
prevX <<- xclick | |
prevY <<- yclick | |
} | |
clicks <- do.call(rbind.data.frame, clicks) | |
colnames(clicks) <- c("index", "timestamp", "x", "y", "xclick", "yclick", "tclick", "dist", "way") | |
clicks | |
} | |
# wie viele clicks sinnvoll fuer lm? ermuedung, test auf signifikanz? | |
# rerun experiment | |
write.table(click(20), "click20r") | |
write.table(click(20), "click20l") | |
write.table(click(20), "click20r2") | |
write.table(click(20), "click20l2") | |
# read stored data | |
right <- rbind(read.table("click20r"), read.table("click20r2")) | |
left <- rbind(read.table("click20l"), read.table("click20l2")) | |
# not what is difference between left and right? | |
# 0-hypothesis: are identical distributed | |
# alternative | |
# shift? in linear model | |
# x_i = mu + e_i i=1,...,n1=n_x | |
# y_i = mu + a + e_i i=1,.., n_2=n_y | |
# t-test detects shift alternative | |
# alternative | |
# different distribution function | |
# KS test between ecdfs, continuous cdfs necessary (?) | |
# appending of lists and dataframes | |
stack(list(left=left$tclick, right=right$tclick)) | |
# idea, compare time / way, should be normal distributed? t-test for significance | |
?sample | |
sample(1:12) # permutation | |
wilcox.test( right$tclick, left$tclick ) # stetigkeitskorrektur | |
require(coin) | |
wilcox_test(right$tclick, left$tclick ) | |
hist(right$tclick / right$way) | |
# empirical cdf, shift between left and right clearly visible | |
plot(ecdf(left$tclick), col="green") | |
lines(ecdf(right$tclick), col="red") | |
legend(x=1,y=1,legend=c("left", "right"), fill=c("green", "red")) | |
ks.test(left$tclick, right$tclick) | |
# p-value: 0.001 (that dists are the same) | |
# t-test: | |
# empirical estimator for mean of two groups | |
# simple case, both same variance, estimate variance | |
t.test(left$tclick, right$tclick) | |
t.test(left$tclick, right$tclick, var.equal=TRUE) | |
# more degrees of freedom | |
# linear model: (only if normal distributed) | |
# qqplot comparison of time to click | |
qqplot(right$tclick, left$tclick) | |
ppplot(right$tclick, distf=rnorm) | |
rlm <- lm(formula= right$tclick ~ right$way + right$index + right$dist) | |
llm <- lm(formula= left$tclick ~ left$way + left$index + left$dist) | |
require(graphics) | |
par(mfrow = c(2,2)) | |
plot(rlm) | |
par(mfrow = c(2,2)) | |
plot(llm) | |
# monte carlo confidence bounds for theoretical distribution | |
# qqplot 300 beobachtungen | |
# genauigkeit, laenge des weges, anzahl der clicks | |
unif50 <- runif(50) | |
unif100 <- runif(100) | |
norm50 <- rnorm(50) | |
norm100 <- rnorm(100) | |
lognorm50 <- exp(rnorm(50)) | |
lognorm100 <- exp( rnorm(100)) | |
oldpar <- par(mfrow = c(2, 3)) | |
plot(ecdf(unif50), pch = "[") | |
plot(ecdf(norm50), pch = "[") | |
plot(ecdf(lognorm50), pch = "[") | |
plot(ecdf(unif100), pch = "[") | |
plot(ecdf(norm100), pch = "[") | |
plot(ecdf(lognorm100), pch = "[") | |
par(oldpar) | |
oldpar <- par(mfrow = c(2, 3)) | |
qqnorm(unif50, main = "Normal Q-Q unif50") | |
qqnorm(norm50, main = "Normal Q-Q norm50") | |
qqnorm(lognorm50, main = "Normal Q-Q lognorm50") | |
qqnorm(unif100, main = "Normal Q-Q unif100") | |
qqnorm(norm100, main = "Normal Q-Q norm100") | |
qqnorm(lognorm100, main = "Normal Q-Q lognorm100") | |
par(oldpar) | |
tq99 <- function(n, df=1) { | |
t100 <- rt(n, df) | |
q <- quantile(t100, probs = seq(0.01, 0.99, 0.98)) | |
st100 <- sort(t100) | |
ust100 <- st100[q["1%"]<st100] | |
cst100 <- ust100[q["99%"]>ust100] | |
cst100 | |
} | |
# TODO why 19? Aufgabe 1.10: abstract P(T(X0) > T(Xi)) | |
ppplot <- function(x, y=NULL, samps = 19, distf=rnorm) { | |
if(is.null(y)) { | |
y <- sort(distf(length(x))) #(1:length(x))/length(x) | |
} else { | |
y <- sort(y) | |
} | |
plot(sort(x), y, xlab = substitute(x), ylab = expression(F[n]), | |
main = "Verteilungsfunktion mit Monte-Carlo-Band", | |
type = "s") | |
mtext(paste(samps, "Monte-Carlo-Stichproben"), side = 3) | |
samples <- matrix(distf(length(x)* samps), nrow = length(x), ncol = samps) | |
samples <- apply(samples, 2, sort) | |
envelope <- t(apply(samples, 1, range)) | |
lines(envelope[, 1], y, type = "s", col = "red"); | |
lines(envelope[, 2], y, type = "s", col = "red") | |
} | |
ppplot(norm100, tq99(100)) | |
ppplot(rnorm(300), rnorm(300)) | |
ks.test(rnorm(300), runif(300)) | |
# chisq vs. ks | |
# https://stackoverflow.com/questions/21655653/r-chi-square-goodness-of-fit-for-random-numbers-generated | |
set.seed(1) | |
df <- data.frame(y = rexp(1000), | |
z = rcauchy(1000, 100, 100)) | |
ks.test(df$y,"pexp") | |
# One-sample Kolmogorov-Smirnov test | |
# | |
# data: df$y | |
# D = 0.0387, p-value = 0.1001 | |
# alternative hypothesis: two-sided | |
ks.test(df$z,"pcauchy",100,100) | |
# One-sample Kolmogorov-Smirnov test | |
# | |
# data: df$z | |
# D = 0.0296, p-value = 0.3455 | |
# alternative hypothesis: two-sided | |
breaks <- c(seq(0,10,by=1)) | |
O <- table(cut(df$y,breaks=breaks)) | |
p <- diff(pexp(breaks)) | |
chisq.test(O,p=p, rescale.p=T) | |
# Chi-squared test for given probabilities | |
# | |
# data: O | |
# X-squared = 7.9911, df = 9, p-value = 0.535 | |
par(mfrow=c(1,2)) | |
plot(qexp(seq(0,1,0.01)),quantile(df$y,seq(0,1,0.01)), | |
main="Q-Q Plot",ylab="df$Y", xlab="Exponential", | |
xlim=c(0,5),ylim=c(0,5)) | |
plot(qcauchy(seq(0,.99,0.01),100,100),quantile(df$z,seq(0,.99,0.01)), | |
main="Q-Q Plot",ylab="df$Z",xlab="Cauchy", | |
xlim=c(-1000,1000),ylim=c(-1000,1000)) | |
# lm | |
x <- 1:100 | |
err <- rnorm(100, mean = 0, sd = 10) | |
y <- 2.5*x + err | |
lm(y ~ x) | |
lmres <- lm(y ~ x) | |
plot(x, y) | |
abline(lmres) | |
attach(hills) | |
scotlm<-lm(time ~ dist + climb) | |
attach(scotlm) | |
oldpar <- par(mfrow=c(1,2)) | |
plot(fitted.values, residuals,xlim=c(0,200), main="Scottish Hill Races", pch=19, cex.lab = 1.5) | |
text.id(fitted.values, residuals, c(7,11,18,31,33,35, 16,14), row.names(hills), cex.id=1.0) | |
sres<-stdres(scotlm) | |
plot(fitted.values,sres,xlim=c(0,200), main="Scottish Hill Races", | |
ylab="MASS::stdres", pch=19, cex.lab = 1.5) | |
text.id(fitted.values,sres, c(7,11,18,31,33,35, 16,14), row.names(hills), cex.id=1.0) | |
par(oldpar) | |
oldpar <- par(mfrow=c(1,2)) | |
scotdffits <-dffits(scotlm) | |
plot(fitted.values, scotdffits,ylab="DFFITS",xlim=c(0,200), | |
main="Scottish Hill Races", pch=19, cex.lab = 1.5) | |
text.id(fitted.values, scotdffits, c(7,11, 18,31,33,35), row.names(hills), cex.id=1.2) | |
scotcooks <-cooks.distance(scotlm) | |
plot(fitted.values, scotcooks,ylab="Cooks",xlim=c(0,200), | |
main="Scottish Hill Races", pch=19, cex.lab = 1.5) | |
text.id(fitted.values, scotcooks, c(7,11, 18), row.names(hills), cex.id=1.2) | |
par(oldpar) | |
library(forward) | |
fwdhills <- fwdlm(time ~dist + climb, data=hills) | |
summary(hills[,1:3]) | |
# Guete Simulation p.126 | |
# task view | |
foo <- Map(function(e) { t.test(exp(rnorm(1000)), exp(rnorm(1000)))$p.value }, 1:1000) | |
length(foo[foo<0.05]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment