Skip to content

Instantly share code, notes, and snippets.

@dcoeurjo
Created February 22, 2024 08:40
Show Gist options
  • Save dcoeurjo/aa455ff855c2d774b2e814e325e48c2b to your computer and use it in GitHub Desktop.
Save dcoeurjo/aa455ff855c2d774b2e814e325e48c2b to your computer and use it in GitHub Desktop.
> library(ggplot2)
> library(reshape2)
> library(glue)
> library(dplyr)
> library(ggpubr)
> #we create some empty data frames to hold all events, the summary of time data, and the error data
> fullTimeData <- read.table("logfiles/log/_globalTimeDummy.txt",header=TRUE,sep="\t",fill=TRUE,blank.lines.skip=TRUE,as.is=TRUE)
> summaryTimeData <- read.table("logfiles/log/_summaryTimeDummy.txt",header=TRUE,sep="\t",fill=TRUE,blank.lines.skip=TRUE,as.is=TRUE)
Warning message:
In read.table("logfiles/log/_summaryTimeDummy.txt", header = TRUE, :
incomplete final line found by readTableHeader on 'logfiles/log/_summaryTimeDummy.txt'
>
> fullErrorData <-read.table("logfiles/log/_globalErrorDummy.txt",
+ header=TRUE,
+ sep="\t",
+ fill=TRUE,
+ blank.lines.skip=TRUE,
+ as.is=TRUE
+ )
Warning message:
In read.table("logfiles/log/_globalErrorDummy.txt", header = TRUE, :
incomplete final line found by readTableHeader on 'logfiles/log/_globalErrorDummy.txt'
>
>
> #now reading the logfiles
> files <- (Sys.glob("logfiles/log/*.csv"))
> print("Reading logfiles..." )
[1] "Reading logfiles..."
>
> for (file in files){
+
+ data <- read.table(file,header=TRUE,sep=",",fill=TRUE,blank.lines.skip=TRUE,as.is=TRUE)
+ #we remove the last line that contains the error data
+ events <-tail(data, 2)
+
+ #from the events we extract the time the trial took
+ endTime <- as.numeric(events[events$Lable=="End","Time"])
+
+
+ #now because the last line has 10 entries and not 8 as the header suggests we have to read the last line again
+ s<-nrow(data)
+ lastline <- read.table(file,header=FALSE,sep=",",skip=s,col.names=c("V1","TruePositives","V2","TrueNegatives","V3","FalseNegatives","V4","FalsePositives"))
+
+ #now generate some new data frames from the extracted data
+ participantId <- data[2,2]
+ techniqueId <- data[2,4]
+ datasetId <- data[2,3]
+ repetitionId <- data[2,5]
+
+ #one for holding all the timing information
+ summaryTime <- data.frame(participantId,techniqueId,datasetId,repetitionId,c(endTime))
+ colnames(summaryTime) <- c("ParticipantID","TechniqueID","DatasetID","RepetitionID","Time")
+
+ #one for the error data
+ div <- 1000 #the division factor
+ fp <- lastline$FalsePositives / div
+ tp <- lastline$TruePositives / div
+ fn <- lastline$FalseNegatives / div
+ tn <- lastline$TrueNegatives / div
+ error <- data.frame(participantId,techniqueId,datasetId,repetitionId,tp,tn,fp,fn)
+ colnames(error) <- c("ParticipantID","TechniqueID","DatasetID","RepetitionID","TP","TN","FP","FN")
+
+
+
+ #add the data from this logfile to the global tables
+ fullTimeData <- rbind(fullTimeData,events)
+ fullErrorData <- rbind(fullErrorData,error)
+ summaryTimeData <- rbind(summaryTimeData,summaryTime)
+
+ }
>
> print("done reading logfiles. Now combining and preparing the data")
[1] "done reading logfiles. Now combining and preparing the data"
>
> #some data massaging here
>
> #1: add a column in seconds for plotting purposes
> summaryTimeData$TimeInS = summaryTimeData$Time
>
> #2: logtransform time before averaging
> summaryTimeData$LogTime=log(summaryTimeData$TimeInS)
>
> #3: make the ids a factor
> summaryTimeData$TechniqueID <- factor(summaryTimeData$TechniqueID)
> fullErrorData$TechniqueID <- factor(fullErrorData$TechniqueID)
> fullTimeData$TechniqueID <- factor(fullTimeData$TechniqueID)
> summaryTimeData$RepetitionID <- factor(summaryTimeData$RepetitionID)
>
>
>
> ###################################################
>
>
> createErrorStats <- function(errorDataSubset,filenamePrefix){
+
+ ############## Calculate error stats ########################
+ errorDataSubset$Precision = errorDataSubset$TP / (errorDataSubset$TP + errorDataSubset$FP)
+ errorDataSubset$Recall = errorDataSubset$TP / (errorDataSubset$TP + errorDataSubset$FN)
+ errorDataSubset$F1 = 2 * (errorDataSubset$Precision * errorDataSubset$Recall) / (errorDataSubset$Precision + errorDataSubset$Recall)
+ errorDataSubset$MCC = ((errorDataSubset$TP * errorDataSubset$TN) - (errorDataSubset$FP * errorDataSubset$FN)) /
+ sqrt((errorDataSubset$TP + errorDataSubset$FP)*(errorDataSubset$TP + errorDataSubset$FN)*(errorDataSubset$TN+errorDataSubset$FP)*(errorDataSubset$TN+errorDataSubset$FN))
+
+ #replace NAs with 0s
+ e <- errorDataSubset
+ e[is.na(e <- errorDataSubset)] <- 0
+
+ errorMelt <- melt(e,id=c("ParticipantID","TechniqueID","DatasetID","RepetitionID"),measure.vars=c("F1","MCC"))
+ errorPerParticipant <- as.data.frame(acast(errorMelt,ParticipantID ~ TechniqueID ~ variable,mean))
+ #Selection technique: 0 MeTaPoint, 1 MeTaBrush, 2 MeTaPaint, 3 BaseLine
+ colnames(errorPerParticipant) <- c("T0_F1","T1_F1","T2_F1","T3_F1","T0_MCC","T1_MCC","T2_MCC","T3_MCC")
+
+ error_F1_mean0 <- bootstrapMeanCI(errorPerParticipant$T0_F1)
+ cat("The mean F1 error rate for technique 0 is ", formatCI(error_F1_mean0, ""), ", ", sep = "")
+ cat("\n")
+ error_F1_mean1 <- bootstrapMeanCI(errorPerParticipant$T1_F1)
+ cat("The mean F1 error rate for technique 1 is ", formatCI(error_F1_mean1, ""), ", ", sep = "")
+ cat("\n")
+ error_F1_mean2 <- bootstrapMeanCI(errorPerParticipant$T2_F1)
+ cat("The mean F1 error rate for technique 2 is ", formatCI(error_F1_mean2, ""), ", ", sep = "")
+ cat("\n")
+ error_F1_mean3 <- bootstrapMeanCI(errorPerParticipant$T3_F1)
+ cat("The mean F1 error rate for technique 3 is ", formatCI(error_F1_mean3, ""), ", ", sep = "")
+ cat("\n")
+
+
+ F1resultTable <- data.frame(error_F1_mean0,error_F1_mean1,error_F1_mean2,error_F1_mean3)
+ colnames(F1resultTable) <- c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine")
+ row.names(F1resultTable) <- c("mean_F1","lowerBound_CI","upperBound_CI")
+
+ cat("F1 Table\n")
+ print(F1resultTable)
+
+ write.table(F1resultTable, paste(filenamePrefix, "Means_F1.csv", sep=""), sep=",")
+
+ error_MCC_mean0 <- bootstrapMeanCI(errorPerParticipant$T0_MCC)
+ cat("The mean MCC error rate for technique 0 is ", formatCI(error_MCC_mean0, ""), ", ", sep = "")
+ cat("\n")
+ error_MCC_mean1 <- bootstrapMeanCI(errorPerParticipant$T1_MCC)
+ cat("The mean MCC error rate for technique 1 is ", formatCI(error_MCC_mean1, ""), ", ", sep = "")
+ cat("\n")
+ error_MCC_mean2 <- bootstrapMeanCI(errorPerParticipant$T2_MCC)
+ cat("The mean MCC error rate for technique 2 is ", formatCI(error_MCC_mean2, ""), ", ", sep = "")
+ cat("\n")
+ error_MCC_mean3 <- bootstrapMeanCI(errorPerParticipant$T3_MCC)
+ cat("The mean MCC error rate for technique 3 is ", formatCI(error_MCC_mean3, ""), ", ", sep = "")
+ cat("\n")
+
+
+ MCCresultTable <- data.frame(error_MCC_mean0,error_MCC_mean1,error_MCC_mean2,error_MCC_mean3)
+
+ colnames(MCCresultTable) <-c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine")
+ row.names(MCCresultTable) <- c("mean_MCC","lowerBound_CI","upperBound_CI")
+
+ cat("-------------------------------------\n")
+ cat("MCC Table\n")
+ print(MCCresultTable)
+
+ write.table(MCCresultTable, paste(filenamePrefix, "Means_MCC.csv", sep=""), sep=",")
+
+
+ pdf(file=paste(filenamePrefix, "F1Distribution.pdf", sep=""))
+ F1Distribution(errorDataSubset)
+ dev.off()
+
+ pdf(file=paste(filenamePrefix, "MCCDistribution.pdf", sep=""))
+ MCCDistribution(errorDataSubset)
+ dev.off()
+
+ pdf(file=paste(filenamePrefix, "barChartF1.pdf", sep=""), width=8, height=2)
+ barChartF1(F1resultTable)
+ dev.off()
+
+ pdf(file=paste(filenamePrefix, "barChartMCC.pdf", sep=""), width=8, height=2)
+ barChartMCC(MCCresultTable)
+ dev.off()
+
+ }
>
> ############## Calculate time stats #########################
>
> createTimeStats <- function(summaryTimeDataSubset,filenamePrefix){
+
+
+ timeMelt <- melt(summaryTimeDataSubset,id=c("ParticipantID","TechniqueID","DatasetID","RepetitionID"),measure.vars=c("LogTime"))
+ participantPerTechnique <- as.data.frame(acast(timeMelt,ParticipantID ~ TechniqueID ~ variable,mean))
+ colnames(participantPerTechnique) <- c("T0","T1","T2","T3")
+
+ #now on to the confidence intervals
+ mean0 <- exp(exactMeanCI(participantPerTechnique$T0))
+ cat("The mean task completion time for technique 0 is ", formatCI(mean0, "s"), ". ", sep = "")
+ cat("\n")
+ mean1 <- exp(exactMeanCI(participantPerTechnique$T1))
+ cat("The mean task completion time for technique 1 is ", formatCI(mean1, "s"), ". ", sep = "")
+ cat("\n")
+ mean2 <- exp(exactMeanCI(participantPerTechnique$T2))
+ cat("The mean task completion time for technique 2 is ", formatCI(mean2, "s"), ". ", sep = "")
+ cat("\n")
+ mean3 <- exp(exactMeanCI(participantPerTechnique$T3))
+ cat("The mean task completion time for technique 3 is ", formatCI(mean3, "s"), ". ", sep = "")
+ cat("\n")
+
+
+ resultTable <- data.frame(mean0,mean1,mean2,mean3)
+ colnames(resultTable) <- c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine")
+ row.names(resultTable) <- c("mean_time","lowerBound_CI","upperBound_CI")
+
+ cat("Time Table\n")
+ print(resultTable)
+
+ write.table(resultTable, paste(filenamePrefix, "Means_time.csv", sep=""), sep=",")
+ barChartTime(resultTable)
+
+
+ pdf(file=paste(filenamePrefix, "boxplotTime.pdf", sep=""))
+ boxplotTime(summaryTimeDataSubset)
+ dev.off()
+
+ pdf(file=paste(filenamePrefix, "logTimeDistribution.pdf", sep=""))
+ logTimeDistribution(summaryTimeDataSubset)
+ dev.off()
+
+ pdf(file=paste(filenamePrefix, "boxplotTimePerDataset.pdf", sep=""))
+ boxplotTimePerDataset(summaryTimeDataSubset)
+ dev.off()
+
+
+ pdf(file=paste(filenamePrefix, "barChartTime.pdf", sep=""), width=8, height=2)
+ barChartTime(resultTable)
+ dev.off()
+
+ cat("Calculating differences\n")
+ #now plot the differences:
+
+ v1 <- participantPerTechnique$T0 - participantPerTechnique$T2
+ v2 <- participantPerTechnique$T1 - participantPerTechnique$T0
+ v3 <- participantPerTechnique$T1 - participantPerTechnique$T2
+ v100 <- participantPerTechnique$T3 - participantPerTechnique$T1
+
+ mean8 <- exp(exactMeanCI(v1))
+ mean9 <- exp(exactMeanCI(v2))
+ mean10 <- exp(exactMeanCI(v3))
+ mean100 <- exp(exactMeanCI(v100))
+
+ resultTableDifferences <- data.frame(mean8,mean9,mean10,mean100)
+ colnames(resultTableDifferences) <- c("MeTaPoint/MeTaPaint","MeTaBrush/MeTaPoint","MeTaBrush/MeTaPaint","Baseline/MeTaBrush")
+ row.names(resultTableDifferences) <- c("mean_time","lowerBound_CI","upperBound_CI")
+
+
+
+ cat("Time Table Differences\n")
+ print(resultTableDifferences)
+
+ pdf(file=paste(filenamePrefix, "barChartTimeDatasetsDifference.pdf", sep=""), width=8, height=2)
+ barChartTimeDifference(resultTableDifferences)
+ dev.off()
+
+ write.table(resultTableDifferences, paste(filenamePrefix, "Ratios_time.csv", sep=""), sep=",")
+
+
+ # ("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine")
+ v1 <- participantPerTechnique$T0 - participantPerTechnique$T1 #MeTaPoint - MeTaBrush
+ v2 <- participantPerTechnique$T0 - participantPerTechnique$T2 #MeTaPoint - MeTaPaint
+ v3 <- participantPerTechnique$T0 - participantPerTechnique$T3 #MeTaPoint - BaseLine
+ v4 <- participantPerTechnique$T1 - participantPerTechnique$T2 #MeTaBrush - MeTaPaint
+ v5 <- participantPerTechnique$T1 - participantPerTechnique$T3 #MeTaBrush - BaseLine
+ v6 <- participantPerTechnique$T2 - participantPerTechnique$T3 #MeTaPaint - BaseLine
+
+ mean11 <- exp(exactMeanCI(v1))
+ mean12 <- exp(exactMeanCI(v2))
+ mean13 <- exp(exactMeanCI(v3))
+ mean14 <- exp(exactMeanCI(v4))
+ mean15 <- exp(exactMeanCI(v5))
+ mean16 <- exp(exactMeanCI(v6))
+
+ resultTableDifferences <- data.frame(mean11,mean12,mean13,mean14,mean15,mean16)
+ colnames(resultTableDifferences) <- c("MeTaPoint/MeTaBrush","MeTaPoint/MeTaPaint","MeTaPoint/BaseLine","MeTaBrush/MeTaPaint","MeTaBrush/BaseLine","MeTaPaint/BaseLine")
+ row.names(resultTableDifferences) <- c("mean_time","lowerBound_CI","upperBound_CI")
+
+ pdf(file=paste(filenamePrefix, "barChartTimeDatasetsDifference2.pdf", sep=""), width=8, height=3)
+ barChartTimeDifference2(resultTableDifferences)
+ dev.off()
+
+ write.table(resultTableDifferences, paste(filenamePrefix, "Ratios_time2.csv", sep=""), sep=",")
+
+ }
>
>
>
> ##############PLOTTING CODE BELOW
>
>
>
> require(grid)
>
> barChartMCC <- function(MCCresultTable){
+ tr <- t(MCCresultTable)
+ tr <- as.data.frame(tr)
+
+
+ #now need to calculate one number for the width of the interval
+ tr$CI2 <- tr$upperBound_CI - tr$mean_MCC
+ tr$CI1 <- tr$mean_MCC - tr$lowerBound_CI
+
+ #add a technique column
+ tr$technique <- factor(c(0,1,2,3))
+
+
+
+ g <- ggplot(tr, aes(x=technique, y=mean_MCC)) +
+ geom_bar(stat="identity",fill = I("#CCCCCC")) +
+ geom_errorbar(aes(ymin=mean_MCC-CI1, ymax=mean_MCC+CI2),
+ width=0, # Width of the error bars
+ size = 1.1
+ ) +
+
+ labs(x = "", y = "MCC score") +
+ scale_x_discrete(name="",breaks=c("0","1","2","3"),labels=c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine")) +
+ coord_flip() +
+ theme(panel.background = element_rect(fill = 'white', colour = 'white'),axis.title=element_text(size = rel(1.2), colour = "black"),axis.text=element_text(size = rel(1.2), colour = "black"),panel.grid.major = element_line(colour = "#DDDDDD"),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())+
+ geom_point(size=4, colour="black") # dots
+
+ print(g)
+ }
>
> barChartF1 <- function(F1resultTable){
+ tr <- t(F1resultTable)
+ tr <- as.data.frame(tr)
+
+
+ #now need to calculate one number for the width of the interval
+ tr$CI2 <- tr$upperBound_CI - tr$mean_F1
+ tr$CI1 <- tr$mean_F1 - tr$lowerBound_CI
+
+ #add a technique column
+ tr$technique <- factor(c(0,1,2,3))
+
+
+ g <- ggplot(tr, aes(x=technique, y=mean_F1)) +
+ geom_bar(stat="identity",fill = I("#CCCCCC")) +
+ geom_errorbar(aes(ymin=mean_F1-CI1, ymax=mean_F1+CI2),
+ width=0, # Width of the error bars
+ size = 1.1
+ ) +
+
+ labs(x = "", y = "F1 score") +
+ scale_x_discrete(name="",breaks=c("0","1","2","3"),labels=c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine")) +
+ coord_flip() +
+ theme(panel.background = element_rect(fill = 'white', colour = 'white'),axis.title=element_text(size = rel(1.2), colour = "black"),axis.text=element_text(size = rel(1.2), colour = "black"),panel.grid.major = element_line(colour = "#DDDDDD"),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())+
+ geom_point(size=4, colour="black") # dots
+
+ print(g)
+ }
>
>
> barChartTimeDifference <- function(resultTable){
+ print("Creating difference time table")
+ tr <- t(resultTable)
+ tr <- as.data.frame(tr)
+
+
+ #now need to calculate one number for the width of the interval
+ tr$CI2 <- tr$upperBound_CI - tr$mean_time
+ tr$CI1 <- tr$mean_time - tr$lowerBound_CI
+
+ #add a technique column
+ tr$technique <- factor(c(0,1,2,3))
+
+
+ g <- ggplot(tr, aes(x=technique, y=mean_time)) +
+ #geom_bar(stat="identity",fill = I("#CCCCCC")) +
+ geom_errorbar(aes(ymin=mean_time-CI1, ymax=mean_time+CI2),
+ width=0, # Width of the error bars
+ size = 1.1
+ ) +
+
+ labs(x = "", y = "Ratio between completion times",title="no effect") +
+ scale_x_discrete(name="",breaks=c("0","1","2","3"),labels=c("MeTaPoint/MeTaPaint","MeTaPoint/MeTaBrush","MeTaPaint/MeTaBrush","MeTaBrush/Baseline")) +
+ scale_y_continuous(limits = c(0.5,3)) +
+ coord_flip() +
+ theme(plot.title=element_text(hjust=.245),panel.background = element_rect(fill = 'white', colour = 'white'),axis.title=element_text(size = rel(1.2), colour = "black"),axis.text=element_text(size = rel(1.2), colour = "black"),panel.grid.major = element_line(colour = "#DDDDDD"),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())+
+ geom_point(size=4, colour="black") + # dots
+ geom_hline(yintercept = 1)
+
+ print(g)
+ }
>
> barChartTimeDifference2 <- function(resultTable){
+ print("Creating difference time table")
+ tr <- t(resultTable)
+ tr <- as.data.frame(tr)
+
+
+ #now need to calculate one number for the width of the interval
+ tr$CI2 <- tr$upperBound_CI - tr$mean_time
+ tr$CI1 <- tr$mean_time - tr$lowerBound_CI
+
+ #add a technique column
+ tr$technique <- factor(c(0,1,2,3,4,5))
+
+
+ g <- ggplot(tr, aes(x=technique, y=mean_time)) +
+ #geom_bar(stat="identity",fill = I("#CCCCCC")) +
+ geom_errorbar(aes(ymin=mean_time-CI1, ymax=mean_time+CI2),
+ width=0, # Width of the error bars
+ size = 1.1
+ ) +
+
+ labs(x = "", y = "Ratio between completion times",title="no effect") +
+ scale_x_discrete(name="",breaks=c("0","1","2","3","4","5"),labels=c("MeTaPoint/MeTaBrush","MeTaPoint/MeTaPaint","MeTaPoint/BaseLine","MeTaBrush/MeTaPaint","MeTaBrush/BaseLine","MeTaPaint/BaseLine")) +
+ scale_y_continuous(limits = c(1,10)) +
+ coord_flip() +
+ theme(plot.title=element_text(hjust=.5),panel.background = element_rect(fill = 'white', colour = 'white'),axis.title=element_text(size = rel(1.2), colour = "black"),axis.text=element_text(size = rel(1.2), colour = "black"),panel.grid.major = element_line(colour = "#DDDDDD"),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())+
+ geom_point(size=4, colour="black") + # dots
+ geom_hline(yintercept = 1)
+
+ print(g)
+ }
> barChartTime <- function(resultTable){
+ tr <- t(resultTable)
+ tr <- as.data.frame(tr)
+
+
+ #now need to calculate one number for the width of the interval
+ tr$CI2 <- tr$upperBound_CI - tr$mean_time
+ tr$CI1 <- tr$mean_time - tr$lowerBound_CI
+
+ #add a technique column
+ tr$technique <- factor(c(0,1,2,3))
+
+
+ g <- ggplot(tr, aes(x=technique, y=mean_time)) +
+ geom_bar(stat="identity",fill = I("#CCCCCC")) +
+ geom_errorbar(aes(ymin=mean_time-CI1, ymax=mean_time+CI2),
+ width=0, # Width of the error bars
+ size = 1.1
+ ) +
+
+ labs(x = "", y = "Completion time (in seconds)") +
+ scale_y_continuous(limits = c(0,60)) +
+ scale_x_discrete(name="",breaks=c("0","1","2","3"),labels=c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine")) +
+ coord_flip() +
+ theme(panel.background = element_rect(fill = 'white', colour = 'white'),axis.title=element_text(size = rel(1.2), colour = "black"),axis.text=element_text(size = rel(1.2), colour = "black"),panel.grid.major = element_line(colour = "#DDDDDD"),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())+
+ geom_point(size=4, colour="black") # dots
+
+ print(g)
+ }
>
> barChartTimeDatasets <- function(resultTable){
+ tr <- t(resultTable)
+ tr <- as.data.frame(tr)
+
+
+ #now need to calculate one number for the width of the interval
+ tr$CI2 <- tr$upperBound_CI - tr$mean_time
+ tr$CI1 <- tr$mean_time - tr$lowerBound_CI
+
+ #add a technique column
+ tr$technique <- factor(c(4,5,6,7))
+
+
+ g <- ggplot(tr, aes(x=technique, y=mean_time)) +
+ geom_bar(stat="identity",fill = I("#CCCCCC")) +
+ geom_errorbar(aes(ymin=mean_time-CI1, ymax=mean_time+CI2),
+ width=0, # Width of the error bars
+ size = 1.1
+ ) +
+
+ labs(x = "", y = "Completion time (in seconds)") +
+ scale_x_discrete(name="",breaks=c("4","5","6","7"),labels=c("Clusters","Shell","Rings","Simulation")) +
+ coord_flip() +
+ theme(panel.background = element_rect(fill = 'white', colour = 'white'),axis.title=element_text(size = rel(1.2), colour = "black"),axis.text=element_text(size = rel(1.2), colour = "black"),panel.grid.major = element_line(colour = "#DDDDDD"),panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank())+
+ geom_point(size=4, colour="black") # dots
+
+ print(g)
+ }
>
>
>
> boxplotTime <- function(summaryTimeDataSubset){
+
+ g <- ggplot(summaryTimeDataSubset,aes(x=as.factor(TechniqueID),y=TimeInS,fill=as.factor(TechniqueID)))+
+ geom_boxplot() +
+ # labs(title="Overall time per technique") +
+ labs(x = "Technique", y = "Time in s") +
+ scale_x_discrete(name="",breaks=c("0","1","2","3"),labels=c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine"))
+ print(g)
+ }
>
> boxplotTimePerDataset <- function(summaryTimeDataSubset){
+ g <- ggplot(summaryTimeDataSubset,aes(x=as.factor(TechniqueID),y=TimeInS,fill=as.factor(TechniqueID)))+
+ geom_boxplot() +
+ # labs(title="Overall time per technique") +
+ labs(x = "Technique", y = "Time in s") +
+ scale_fill_discrete(name="Technique",breaks=c("0","1","2","3"),labels=c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine"))+
+ scale_x_discrete(name="",breaks=c("0","1","2","3"),labels=c("MeTaPoint","MeTaBrush","MeTaPaint","BaseLine")) +
+ facet_grid(DatasetID~.)
+
+ print(g)
+ }
>
>
> logTimeDistribution <- function(summaryTimeDataSubset){
+ g <- qplot(LogTime,data=summaryTimeDataSubset,facets=.~TechniqueID)
+ print(g)
+ }
>
> F1Distribution <- function(errorDataSubset){
+ g <- qplot(F1,data=errorDataSubset,facets=.~TechniqueID)
+ print(g)
+ }
>
> MCCDistribution <- function(errorDataSubset){
+ g <- qplot(MCC,data=errorDataSubset,facets=.~TechniqueID)
+ print(g)
+ }
>
>
> ##############take just a subset of repetitions
> #full data for 0123dataset, repetitions 2 and 3
>
> cat("****************************************************\n")
****************************************************
> cat("Preparing time data for 0123 datasets\n")
Preparing time data for 0123 datasets
> summaryTimeDataSubset <- summaryTimeData[ which(as.numeric(summaryTimeData$RepetitionID) > 0 &as.numeric(summaryTimeData$DatasetID)!=4), ]
> createTimeStats(summaryTimeDataSubset,"resultFiles/log/time_0123Datasets_rep23")
The mean task completion time for technique 0 is 16s, 95% CI [14, 19].
The mean task completion time for technique 1 is 36s, 95% CI [32, 41].
The mean task completion time for technique 2 is 15s, 95% CI [13, 17].
The mean task completion time for technique 3 is 45s, 95% CI [37, 54].
Time Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_time 16.47652 36.27165 14.76685 44.84460
lowerBound_CI 14.20704 31.78323 12.57151 37.16813
upperBound_CI 19.10854 41.39393 17.34555 54.10652
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Calculating differences
Time Table Differences
MeTaPoint/MeTaPaint MeTaBrush/MeTaPoint MeTaBrush/MeTaPaint Baseline/MeTaBrush
mean_time 1.115778 2.201415 2.456289 1.236354
lowerBound_CI 0.948664 1.985654 2.131199 1.060207
upperBound_CI 1.312329 2.440620 2.830968 1.441767
[1] "Creating difference time table"
[1] "Creating difference time table"
Warning message:
Removed 4 rows containing missing values (`geom_point()`).
> cat("Preparing error data for 0123 datasets\n")
Preparing error data for 0123 datasets
> errorDataSubset <- fullErrorData[ which(as.numeric(fullErrorData$RepetitionID) > 0&as.numeric(summaryTimeData$DatasetID)!=4), ]
> createErrorStats(errorDataSubset,"resultFiles/log/error_0123Datasets_rep23_")
The mean F1 error rate for technique 0 is 0.97, 95% CI [0.97, 0.98],
The mean F1 error rate for technique 1 is 0.97, 95% CI [0.96, 0.97],
The mean F1 error rate for technique 2 is 0.98, 95% CI [0.97, 0.98],
The mean F1 error rate for technique 3 is 0.93, 95% CI [0.87, 0.95],
F1 Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_F1 0.9746273 0.9677603 0.9777824 0.9332369
lowerBound_CI 0.9662269 0.9603070 0.9682937 0.8731106
upperBound_CI 0.9802500 0.9731983 0.9835586 0.9533580
The mean MCC error rate for technique 0 is 0.96, 95% CI [0.94, 0.97],
The mean MCC error rate for technique 1 is 0.94, 95% CI [0.93, 0.95],
The mean MCC error rate for technique 2 is 0.96, 95% CI [0.95, 0.97],
The mean MCC error rate for technique 3 is 0.9, 95% CI [0.84, 0.92],
-------------------------------------
MCC Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_MCC 0.9563775 0.9447154 0.9648347 0.8975993
lowerBound_CI 0.9412768 0.9318410 0.9520117 0.8395053
upperBound_CI 0.9661155 0.9536499 0.9733197 0.9212028
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
RStudioGD
2
>
> cat("****************************************************\n")
****************************************************
> #dataset0 only, rep 2 and 3
> cat("Preparing time data for dataset 0\n")
Preparing time data for dataset 0
> summaryTimeDataSubset = subset(summaryTimeData, as.numeric(RepetitionID) > 0 & DatasetID == "0")
> createTimeStats(summaryTimeDataSubset,"resultFiles/log/time_Dataset0_rep23")
The mean task completion time for technique 0 is 11s, 95% CI [9.6, 13].
The mean task completion time for technique 1 is 43s, 95% CI [36, 50].
The mean task completion time for technique 2 is 10s, 95% CI [8.9, 12].
The mean task completion time for technique 3 is 38s, 95% CI [30, 47].
Time Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_time 11.234040 42.51230 10.319820 37.73508
lowerBound_CI 9.607241 35.93389 8.942534 30.36139
upperBound_CI 13.136306 50.29501 11.909229 46.89956
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Calculating differences
Time Table Differences
MeTaPoint/MeTaPaint MeTaBrush/MeTaPoint MeTaBrush/MeTaPaint Baseline/MeTaBrush
mean_time 1.0885887 3.784239 4.119480 0.8876273
lowerBound_CI 0.9189657 3.273564 3.402231 0.7156512
upperBound_CI 1.2895209 4.374580 4.987939 1.1009305
[1] "Creating difference time table"
[1] "Creating difference time table"
Warning messages:
1: Removed 2 rows containing missing values (`geom_point()`).
2: Removed 3 rows containing missing values (`geom_point()`).
>
> cat("Preparing error data for dataset 0\n")
Preparing error data for dataset 0
> errorDataSubset <- subset(fullErrorData, as.numeric(fullErrorData$RepetitionID) >0 & DatasetID == "0")
> createErrorStats(errorDataSubset,"resultFiles/log/error_Dataset0_rep23_")
The mean F1 error rate for technique 0 is 0.96, 95% CI [0.94, 0.97],
The mean F1 error rate for technique 1 is 0.93, 95% CI [0.9, 0.94],
The mean F1 error rate for technique 2 is 0.97, 95% CI [0.93, 0.98],
The mean F1 error rate for technique 3 is 0.88, 95% CI [0.84, 0.89],
F1 Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_F1 0.9590493 0.9270611 0.9660168 0.8800038
lowerBound_CI 0.9418043 0.9027299 0.9349776 0.8407339
upperBound_CI 0.9696680 0.9439996 0.9765545 0.8938426
The mean MCC error rate for technique 0 is 0.94, 95% CI [0.91, 0.95],
The mean MCC error rate for technique 1 is 0.89, 95% CI [0.85, 0.92],
The mean MCC error rate for technique 2 is 0.95, 95% CI [0.92, 0.97],
The mean MCC error rate for technique 3 is 0.82, 95% CI [0.78, 0.84],
-------------------------------------
MCC Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_MCC 0.9389912 0.8890753 0.9530727 0.8229392
lowerBound_CI 0.9137707 0.8516795 0.9229414 0.7808991
upperBound_CI 0.9545730 0.9150251 0.9655429 0.8390761
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
RStudioGD
2
>
> cat("****************************************************\n")
****************************************************
> #dataset1 only, rep 2 and 3
> cat("Preparing time data for dataset 1\n")
Preparing time data for dataset 1
> summaryTimeDataSubset <- subset(summaryTimeData, as.numeric(RepetitionID) > 0 & DatasetID == "1")
> createTimeStats(summaryTimeDataSubset,"resultFiles/log/time_Dataset1_rep23")
The mean task completion time for technique 0 is 38s, 95% CI [33, 45].
The mean task completion time for technique 1 is 17s, 95% CI [14, 21].
The mean task completion time for technique 2 is 40s, 95% CI [32, 51].
The mean task completion time for technique 3 is 27s, 95% CI [22, 34].
Time Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_time 38.42534 17.48602 40.30461 27.07517
lowerBound_CI 32.50994 14.27802 32.12587 21.60071
upperBound_CI 45.41709 21.41481 50.56551 33.93706
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Calculating differences
Time Table Differences
MeTaPoint/MeTaPaint MeTaBrush/MeTaPoint MeTaBrush/MeTaPaint Baseline/MeTaBrush
mean_time 0.9533734 0.4550649 0.4338468 1.548389
lowerBound_CI 0.7630997 0.3766999 0.3499448 1.250651
upperBound_CI 1.1910906 0.5497321 0.5378649 1.917009
[1] "Creating difference time table"
[1] "Creating difference time table"
Warning messages:
1: Removed 2 rows containing missing values (`geom_point()`).
2: Removed 3 rows containing missing values (`geom_point()`).
> cat("Preparing error data for dataset 1\n")
Preparing error data for dataset 1
> errorDataSubset <- subset(fullErrorData, as.numeric(fullErrorData$RepetitionID) > 0 & DatasetID == "1")
> createErrorStats(errorDataSubset,"resultFiles/log/error_Dataset1_rep23_")
The mean F1 error rate for technique 0 is 0.97, 95% CI [0.96, 0.98],
The mean F1 error rate for technique 1 is 0.98, 95% CI [0.96, 0.98],
The mean F1 error rate for technique 2 is 0.96, 95% CI [0.94, 0.97],
The mean F1 error rate for technique 3 is 0.96, 95% CI [0.93, 0.98],
F1 Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_F1 0.9707562 0.9775593 0.9628212 0.9621260
lowerBound_CI 0.9620775 0.9620477 0.9419986 0.9327366
upperBound_CI 0.9773438 0.9823700 0.9733258 0.9752487
The mean MCC error rate for technique 0 is 0.96, 95% CI [0.94, 0.97],
The mean MCC error rate for technique 1 is 0.97, 95% CI [0.95, 0.97],
The mean MCC error rate for technique 2 is 0.95, 95% CI [0.92, 0.96],
The mean MCC error rate for technique 3 is 0.95, 95% CI [0.91, 0.96],
-------------------------------------
MCC Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_MCC 0.9561037 0.9667628 0.9473610 0.9469257
lowerBound_CI 0.9435234 0.9492606 0.9235591 0.9123719
upperBound_CI 0.9657208 0.9729559 0.9609445 0.9627480
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
RStudioGD
2
>
> cat("****************************************************\n")
****************************************************
> #dataset2 only rep 2 and 3
> cat("Preparing time data for dataset 2\n")
Preparing time data for dataset 2
> summaryTimeDataSubset <- subset(summaryTimeData, as.numeric(RepetitionID) > 0 & DatasetID == "2")
> createTimeStats(summaryTimeDataSubset,"resultFiles/log/time_Dataset2_rep23")
The mean task completion time for technique 0 is 12s, 95% CI [9.6, 15].
The mean task completion time for technique 1 is 58s, 95% CI [50, 68].
The mean task completion time for technique 2 is 8.8s, 95% CI [6.8, 11].
The mean task completion time for technique 3 is 62s, 95% CI [51, 76].
Time Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_time 11.921083 58.10646 8.819390 62.42886
lowerBound_CI 9.596287 49.95824 6.830928 51.01266
upperBound_CI 14.809082 67.58366 11.386688 76.39990
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Calculating differences
Time Table Differences
MeTaPoint/MeTaPaint MeTaBrush/MeTaPoint MeTaBrush/MeTaPaint Baseline/MeTaBrush
mean_time 1.351690 4.874260 6.588489 1.0743876
lowerBound_CI 1.027441 3.976319 5.166861 0.8856797
upperBound_CI 1.778269 5.974976 8.401269 1.3033026
[1] "Creating difference time table"
[1] "Creating difference time table"
Warning messages:
1: Removed 1 rows containing missing values (`position_stack()`).
2: Removed 1 rows containing missing values (`geom_point()`).
3: Removed 1 rows containing missing values (`position_stack()`).
4: Removed 1 rows containing missing values (`geom_point()`).
5: Removed 2 rows containing missing values (`geom_point()`).
6: Removed 4 rows containing missing values (`geom_point()`).
> cat("Preparing error data for dataset 2\n")
Preparing error data for dataset 2
> errorDataSubset <- subset(fullErrorData, as.numeric(fullErrorData$RepetitionID) > 0 & DatasetID == "2")
> createErrorStats(errorDataSubset,"resultFiles/log/error_Dataset2_rep23_")
The mean F1 error rate for technique 0 is 0.98, 95% CI [0.96, 0.99],
The mean F1 error rate for technique 1 is 0.98, 95% CI [0.97, 0.98],
The mean F1 error rate for technique 2 is 0.99, 95% CI [0.98, 1],
The mean F1 error rate for technique 3 is 0.93, 95% CI [0.83, 0.96],
F1 Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_F1 0.9777804 0.9753291 0.9924353 0.9267291
lowerBound_CI 0.9648629 0.9670917 0.9790958 0.8279360
upperBound_CI 0.9862533 0.9797882 0.9970057 0.9602651
The mean MCC error rate for technique 0 is 0.95, 95% CI [0.93, 0.97],
The mean MCC error rate for technique 1 is 0.95, 95% CI [0.93, 0.96],
The mean MCC error rate for technique 2 is 0.99, 95% CI [0.96, 0.99],
The mean MCC error rate for technique 3 is 0.88, 95% CI [0.8, 0.92],
-------------------------------------
MCC Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_MCC 0.9549114 0.9463741 0.9850742 0.8839042
lowerBound_CI 0.9288535 0.9296403 0.9623657 0.7965292
upperBound_CI 0.9716384 0.9552447 0.9936823 0.9201530
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
RStudioGD
2
>
> cat("****************************************************\n")
****************************************************
> #dataset3 only rep 2 and 3
> cat("Preparing time data for dataset 3\n")
Preparing time data for dataset 3
> summaryTimeDataSubset <- subset(summaryTimeData, as.numeric(RepetitionID) > 0 & DatasetID == "3")
> createTimeStats(summaryTimeDataSubset,"resultFiles/log/time_Dataset3_rep23")
The mean task completion time for technique 0 is 14s, 95% CI [12, 17].
The mean task completion time for technique 1 is 40s, 95% CI [35, 46].
The mean task completion time for technique 2 is 13s, 95% CI [10, 16].
The mean task completion time for technique 3 is 63s, 95% CI [52, 78].
Time Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_time 14.32164 40.07185 12.96242 63.40725
lowerBound_CI 11.83146 34.86478 10.41062 51.55290
upperBound_CI 17.33592 46.05659 16.13971 77.98744
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Calculating differences
Time Table Differences
MeTaPoint/MeTaPaint MeTaBrush/MeTaPoint MeTaBrush/MeTaPaint Baseline/MeTaBrush
mean_time 1.104858 2.797993 3.091386 1.582339
lowerBound_CI 0.863637 2.371570 2.442836 1.321666
upperBound_CI 1.413455 3.301091 3.912120 1.894425
[1] "Creating difference time table"
[1] "Creating difference time table"
Warning messages:
1: Removed 1 rows containing missing values (`position_stack()`).
2: Removed 1 rows containing missing values (`geom_point()`).
3: Removed 1 rows containing missing values (`position_stack()`).
4: Removed 1 rows containing missing values (`geom_point()`).
5: Removed 1 rows containing missing values (`geom_point()`).
6: Removed 4 rows containing missing values (`geom_point()`).
> cat("Preparing error data for dataset 3\n")
Preparing error data for dataset 3
> errorDataSubset <- subset(fullErrorData, as.numeric(fullErrorData$RepetitionID) > 0 & DatasetID == "3")
> createErrorStats(errorDataSubset,"resultFiles/log/error_Dataset3_rep23_")
The mean F1 error rate for technique 0 is 0.99, 95% CI [0.99, 0.99],
The mean F1 error rate for technique 1 is 0.99, 95% CI [0.98, 0.99],
The mean F1 error rate for technique 2 is 0.99, 95% CI [0.98, 0.99],
The mean F1 error rate for technique 3 is 0.96, 95% CI [0.88, 0.99],
F1 Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_F1 0.9909232 0.9910919 0.9898562 0.9640887
lowerBound_CI 0.9851747 0.9834049 0.9796517 0.8816865
upperBound_CI 0.9943344 0.9949417 0.9941096 0.9858528
The mean MCC error rate for technique 0 is 0.98, 95% CI [0.96, 0.98],
The mean MCC error rate for technique 1 is 0.98, 95% CI [0.96, 0.99],
The mean MCC error rate for technique 2 is 0.97, 95% CI [0.95, 0.98],
The mean MCC error rate for technique 3 is 0.94, 95% CI [0.86, 0.97],
-------------------------------------
MCC Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_MCC 0.9755036 0.9766492 0.9738307 0.9366281
lowerBound_CI 0.9605727 0.9590015 0.9529581 0.8606405
upperBound_CI 0.9845124 0.9861798 0.9843749 0.9656255
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
RStudioGD
2
>
> cat("****************************************************\n")
****************************************************
> #dataset4 only rep 2 and 3
> cat("Preparing time data for dataset 4\n")
Preparing time data for dataset 4
> summaryTimeDataSubset <- subset(summaryTimeData, as.numeric(RepetitionID) > 0 & DatasetID == "4")
> createTimeStats(summaryTimeDataSubset,"resultFiles/log/time_Dataset4_rep23")
The mean task completion time for technique 0 is 44s, 95% CI [35, 54].
The mean task completion time for technique 1 is 33s, 95% CI [29, 39].
The mean task completion time for technique 2 is 39s, 95% CI [33, 47].
The mean task completion time for technique 3 is 30s, 95% CI [24, 37].
Time Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_time 43.61194 33.17069 39.46743 29.65762
lowerBound_CI 35.14155 28.52374 32.99601 23.99341
upperBound_CI 54.12400 38.57469 47.20807 36.65901
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Calculating differences
Time Table Differences
MeTaPoint/MeTaPaint MeTaBrush/MeTaPoint MeTaBrush/MeTaPaint Baseline/MeTaBrush
mean_time 1.1050109 0.7605874 0.8404573 0.8940912
lowerBound_CI 0.8705881 0.6071222 0.7030542 0.6914476
upperBound_CI 1.4025566 0.9528446 1.0047142 1.1561239
[1] "Creating difference time table"
[1] "Creating difference time table"
Warning message:
Removed 1 rows containing missing values (`geom_point()`).
> cat("Preparing error data for dataset 4\n")
Preparing error data for dataset 4
> errorDataSubset <- subset(fullErrorData, as.numeric(fullErrorData$RepetitionID) > 0 & DatasetID == "4")
> createErrorStats(errorDataSubset,"resultFiles/log/error_Dataset4_rep23_")
The mean F1 error rate for technique 0 is 0.69, 95% CI [0.66, 0.72],
The mean F1 error rate for technique 1 is 0.88, 95% CI [0.86, 0.9],
The mean F1 error rate for technique 2 is 0.66, 95% CI [0.59, 0.7],
The mean F1 error rate for technique 3 is 0.69, 95% CI [0.64, 0.72],
F1 Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_F1 0.6927067 0.8795200 0.6626327 0.6929146
lowerBound_CI 0.6585458 0.8563541 0.5935287 0.6424356
upperBound_CI 0.7238649 0.9008686 0.6993268 0.7238361
The mean MCC error rate for technique 0 is 0.69, 95% CI [0.66, 0.72],
The mean MCC error rate for technique 1 is 0.88, 95% CI [0.86, 0.9],
The mean MCC error rate for technique 2 is 0.67, 95% CI [0.6, 0.7],
The mean MCC error rate for technique 3 is 0.7, 95% CI [0.66, 0.73],
-------------------------------------
MCC Table
MeTaPoint MeTaBrush MeTaPaint BaseLine
mean_MCC 0.6917300 0.8817258 0.6684004 0.7048650
lowerBound_CI 0.6592496 0.8598267 0.6035547 0.6638349
upperBound_CI 0.7228861 0.9024512 0.7037986 0.7314277
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
RStudioGD
2
Warning message:
Removed 1 rows containing non-finite values (`stat_bin()`).
>
> # a = subset(summaryTimeData, as.numeric(RepetitionID) > 0 & DatasetID == "0" & TechniqueID=="0")
> # ggdensity(a$Time,
> # main = "Density plot of sepal length",
> # xlab = "Time")
> #
> # ggqqplot(a$Time)
>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment