mickeypash · March 4, 2014 23:11
diff --git a/mickey_tea_tasting b/mickey_tea_tasting
 ## T-test

 library(plyr)
 load(file="preprocessed.RData")

 # Read data
 trials <- tinfo4
 responses <- dat4

 # Change column names
 names(trials)[4]<-"Start"
 names(trials)[6]<-"End"

 # Compute relative Onset
 trials <- transform(trials, End = End - Start)
 # Remove unneeded columns
 trials$Start <- NULL
 trials$Onset <- NULL
 trials$Choice <- NULL
 trials$X <- NULL
 responses$X <- NULL
 # Merge in the actual observations
 all.data <- merge(trials, responses, by="RespID", all=TRUE)
 # Compute time realtive to start
 all.data <- transform(all.data, ms = End + ms)
 # Group each session/trial and sort them interally by ms
 all.data <- all.data[with(all.data, order(SessionID, RespID, ms)),]
 # Find the ending time for each response (i.e. the starting time of the
 # next response or the end of the trial).

 all.data <- ddply(all.data, c(.(SessionID), .(RespID)), transform, to=c(tail(ms,-1), head(End,1)))

 # Clean up 
 all.data <- all.data[ ,c("SessionID", "RespID", "ms", "to", "AOI", "ItemID")]
 # Compute amount of time in the interval 200-1000ms

 window <- function(xs, from, to) pmax(from, pmin(to, xs))

 all.data <- transform(all.data, in.interval = window(to, 200, 1000) - window(ms, 200, 1000))
 # Extract the value with highest "in.interval" for each RespID, including none whenever
 # there is no observation in the interval
 all.data <- ddply( all.data, .(RespID), transform
                   , is.most = max(in.interval) > 0 & in.interval == max(in.interval) )
 # Check whether you have to break ties, this will give an error if you do
 stopifnot(all(!duplicated(subset(all.data, is.most, RespID))))

 # Here we compute, seperately, the proportions for the 200-1000ms responses
 # and the non 200-1000ms responses, for each participant
 per.participant.props <- ddply( all.data, c(.(SessionID), .(is.most))
                                , function(xs) table(xs$AOI)/nrow(xs) )

 # Filling missing data with NA
 per.participant.props <-
  merge(expand.grid( SessionID=unique(per.participant.props$SessionID)
                     , is.most=c(TRUE, FALSE) ), per.participant.props, all=TRUE)

 # comparing the different proportion informations to each other
 with( subset(per.participant.props, is.most) # Extract the 200-1000ms responses
      , t.test(PComp, Unrl) )                  # Compare mean for PComp with mean for Unrl

 t.test( paired=T
        # Proportion of PComp among PComp Unrl observations within 200-1000ms interval
        , with(subset(per.participant.props,  is.most), PComp/(PComp+Unrl))
        # Proportion of PComp among PComp Unrl observations outside 200-1000ms interval
        , with(subset(per.participant.props, !is.most), PComp/(PComp+Unrl)) )
	## T-test

	library(plyr)
	load(file="preprocessed.RData")

	# Read data
	trials <- tinfo4
	responses <- dat4

	# Change column names
	names(trials)[4]<-"Start"
	names(trials)[6]<-"End"

	# Compute relative Onset
	trials <- transform(trials, End = End - Start)
	# Remove unneeded columns
	trials$Start <- NULL
	trials$Onset <- NULL
	trials$Choice <- NULL
	trials$X <- NULL
	responses$X <- NULL
	# Merge in the actual observations
	all.data <- merge(trials, responses, by="RespID", all=TRUE)
	# Compute time realtive to start
	all.data <- transform(all.data, ms = End + ms)
	# Group each session/trial and sort them interally by ms
	all.data <- all.data[with(all.data, order(SessionID, RespID, ms)),]
	# Find the ending time for each response (i.e. the starting time of the
	# next response or the end of the trial).

	all.data <- ddply(all.data, c(.(SessionID), .(RespID)), transform, to=c(tail(ms,-1), head(End,1)))

	# Clean up
	all.data <- all.data[ ,c("SessionID", "RespID", "ms", "to", "AOI", "ItemID")]
	# Compute amount of time in the interval 200-1000ms

	window <- function(xs, from, to) pmax(from, pmin(to, xs))

	all.data <- transform(all.data, in.interval = window(to, 200, 1000) - window(ms, 200, 1000))
	# Extract the value with highest "in.interval" for each RespID, including none whenever
	# there is no observation in the interval
	all.data <- ddply( all.data, .(RespID), transform
	, is.most = max(in.interval) > 0 & in.interval == max(in.interval) )
	# Check whether you have to break ties, this will give an error if you do
	stopifnot(all(!duplicated(subset(all.data, is.most, RespID))))

	# Here we compute, seperately, the proportions for the 200-1000ms responses
	# and the non 200-1000ms responses, for each participant
	per.participant.props <- ddply( all.data, c(.(SessionID), .(is.most))
	, function(xs) table(xs$AOI)/nrow(xs) )

	# Filling missing data with NA
	per.participant.props <-
	merge(expand.grid( SessionID=unique(per.participant.props$SessionID)
	, is.most=c(TRUE, FALSE) ), per.participant.props, all=TRUE)

	# comparing the different proportion informations to each other
	with( subset(per.participant.props, is.most) # Extract the 200-1000ms responses
	, t.test(PComp, Unrl) ) # Compare mean for PComp with mean for Unrl

	t.test( paired=T
	# Proportion of PComp among PComp Unrl observations within 200-1000ms interval
	, with(subset(per.participant.props, is.most), PComp/(PComp+Unrl))
	# Proportion of PComp among PComp Unrl observations outside 200-1000ms interval
	, with(subset(per.participant.props, !is.most), PComp/(PComp+Unrl)) )