Skip to content

Instantly share code, notes, and snippets.

@mickeypash
Created February 24, 2014 06:27
Show Gist options
  • Save mickeypash/9182870 to your computer and use it in GitHub Desktop.
Save mickeypash/9182870 to your computer and use it in GitHub Desktop.
preprocessing script for raw data from Lex app
library(XML)
atime <- read.csv("audiotimings.csv")
atime$ItemID <- as.numeric(substr(atime$Wavfile, 2, 3))
options(stringsAsFactors=FALSE)
getfile <- function(x, ext=NULL) {
return(paste(paste(mypath, x$base, sep="/"), ext, "txt", sep="."))
}
loadit <- function(x) {
t0 <- read.table(getfile(x, "begin"))
if (is.null(tryCatch(t1 <- read.table(getfile(x, "select")),error=function(e) {NULL}))) {
res <- data.frame(SessionID=NA,ItemID=NA,t0=NA,t1=NA,sync=NA,wav=NA,Choice=NA)[-1,]
} else {
if (nrow(t1)!=96) {
res <- data.frame(SessionID=NA,ItemID=NA,t0=NA,t1=NA,sync=NA,wav=NA,Choice=NA)[-1,]
} else {
t2 <- read.table(getfile(x, "sync"))
res <- data.frame(SessionID=x$SessionID, ItemID=t0$V3, t0=t0$V1, t1=t1$V1,
sync=t2$V1, wav=paste(substr(t2$V3,1,3), "mp3", sep="."),
Choice=t1$V3)
}
}
return(res)
}
loaddat <- function(x) {
ff <- read.table(getfile(x, "dat"))
data.frame(SessionID=x$SessionID, Msec=ff$V1, Loc=(ff$V4 > 0)*2 + (ff$V3 > 0))
}
itmloc <- xmlToDataFrame("items.xml")
itm <- read.csv(file="items.csv", header=FALSE)
colnames(itm) <- c("ItemID","CfgID","Targ","PComp","SComp","Unr")
itm2 <- with(itm,
rbind(data.frame(ItemID=ItemID, AOI="Targ", Res=Targ),
data.frame(ItemID=ItemID, AOI="PComp", Res=PComp),
data.frame(ItemID=ItemID, AOI="SComp", Res=SComp),
data.frame(ItemID=ItemID, AOI="Unrl", Res=Unr)))
itm2 <- itm2[order(itm2$ItemID, itm2$AOI),]
itmloc2 <- with(itmloc,
rbind(data.frame(ItemID=1:nrow(itmloc), Loc=0, Res=aoi0),
data.frame(ItemID=1:nrow(itmloc), Loc=1, Res=aoi1),
data.frame(ItemID=1:nrow(itmloc), Loc=2, Res=aoi2),
data.frame(ItemID=1:nrow(itmloc), Loc=3, Res=aoi3)))
itmall <- merge(itm2, itmloc2)
itmall <- itmall[order(itmall$ItemID, itmall$AOI),]
itmall <- subset(itmall, ItemID<=48)
basefiles <- sub("\\.txt$", "", list.files("response", "[^\\.begin|\\.dat|\\.sync]\\.txt$"))
mypath <- "response"
sessIDs <- as.numeric(substr(basefiles, 2, 8))
todo <- data.frame(SessionID=sessIDs,
base=basefiles, stringsAsFactors=FALSE)
tinfo <- subset(do.call("rbind", by(todo, todo$SessionID, loadit)), ItemID<=48)
todo2 <- merge(todo, data.frame(SessionID=unique(tinfo$SessionID)))
dat <- do.call("rbind", by(todo2, todo2$SessionID, loaddat))
tinfo$RespID <- 1:nrow(tinfo)
dat2 <- do.call("rbind", by(tinfo, tinfo$RespID, function(x) {
ff <- subset(dat, Msec>=x$t0 & Msec<=x$t1)
ff$RespID <- x$RespID
return(ff)
}))
rownames(dat2) <- NULL
dat2a <- rbind(dat2,
data.frame(SessionID=tinfo$SessionID, Msec=tinfo$t1,
Loc=tinfo$Choice, RespID=tinfo$RespID))
dat2a <- dat2a[order(dat2a$RespID, dat2a$Msec),]
tinfo.atime <- merge(tinfo[,c("RespID","ItemID","sync")], atime[,c("ItemID","Onset_CW")])
tinfo.atime$Onset <- tinfo.atime$sync+tinfo.atime$Onset_CW
tinfo2 <- merge(tinfo.atime[,c("RespID","Onset")], tinfo)
tinfo3 <- merge(tinfo2, itmall[,c("ItemID","AOI","Loc")], by.x=c("ItemID","Choice"),
by.y=c("ItemID","Loc"))
tinfo4 <- tinfo3[order(tinfo3$RespID),c("SessionID","RespID","ItemID","t0","Onset","t1","AOI")]
colnames(tinfo4) <- sub("AOI","Choice",colnames(tinfo4))
rownames(tinfo4) <- NULL
dat3 <- merge(dat2a, tinfo.atime[,c("RespID","ItemID","Onset")])
dat3$ms <- dat3$Msec-dat3$Onset
dat4 <- merge(dat3[,c("RespID","ms","ItemID","Loc")], itmall[,c("ItemID","Loc","AOI")])
dat4 <- dat4[order(dat4$RespID, dat4$ms),c("RespID","ms","AOI")]
rownames(dat4) <- NULL
save(tinfo4, dat4, file="preprocessed.RData")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment