Index: /reasoner/measures/script.r
===================================================================
--- /reasoner/measures/script.r	(revision 241)
+++ /reasoner/measures/script.r	(revision 242)
@@ -3,4 +3,5 @@
 require(data.table)
 require(stringr)
+require(dbplyr)
 
 #setup
@@ -13,7 +14,11 @@
 #names of files produced during measurement
 filenames <- c('measurements-sse.tsv', 'measurements-rtvil.tsv', 'measurements-scenarios.tsv', 'measurements-scenarioVariants.tsv', 'measurements-scaleLog.tsv')
+#tags in data to be skipped
 skipTags <- c("CompoundInit")
-#folder <- 'benchmark-results-old-1.3.0-20180826/auto/1'
-#folder <- 'benchmark-results-old-1.1.0-20180818/auto/1'
+#tag repacement (happens before skipping)
+replaceTags <- data.frame(
+  c("REASONING 1", "REASONING 2", "SCENARIO-INC",           "SCENARIO-INST",       "IREASONING"), 
+  c("REASONING",   "REASONING",   "SCENARIO (incremental)", "SCENARIO (runtime)",  "REASONING (incremental)"))
+names(replaceTags) <- c("orig", "subst")
 
 #http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
@@ -48,4 +53,20 @@
 }
 
+#trims a given string x, i.e., removes trailing and leading whitespaces at
+my.trim <- function (x) gsub("^\\s+|\\s+$", "", x)
+
+# searches for tag in replaceTags and either returns the replacement for (trimmed) tag in replaceTags or tag (trimmed)
+my.actualTag = function(tag) {
+  tmp <- my.trim(tag)
+  row <- filter(replaceTags, orig==tmp)
+  if (nrow(row) == 1) {
+    cells <- select(row, subst)
+    res <- as.character(cells[[1]])
+  } else {
+    res <- tmp
+  }
+  return (res)
+}
+
 #reads a file produced by the MeasurementCollector of EASy-Producer
 # in given folder
@@ -61,5 +82,5 @@
 my.readFile = function(folder, filename, run=0, skip=skipRuns, statCols=numCols, skipT=skipTags) {
   if (run > 0)  
-    tmp.folder <- paste(folder, 'auto', run, sep='/')
+    tmp.folder <- paste(folder, run, sep='/')
   else
     tmp.folder <- folder
@@ -72,36 +93,38 @@
   tmp.resTable <- tmp.resTable[,URI:=factor(unlist(lapply(URI, my.shortenURI)))]
   tmp.resTable <- tmp.resTable[,model.name:=factor(model.name)]
-  tmp.resTable <- tmp.resTable[,tag:=factor(tag)]
+  tmp.resTable <- tmp.resTable[,tag:=factor(unlist(lapply(tag, my.actualTag)))]
   tmp.resTable <- tmp.resTable[,caller:=factor(caller)]
   if (length(skipT) > 0) {
     tmp.resTable <- tmp.resTable[!(tag %in% skipTags)]
   }
-  tmp.resTable <- tmp.resTable[,as.list(unlist(lapply(.SD, my.summary))), list(URI, tag, model.name, MODEL_COMPLEXITY), .SDcols=statCols]
   return (tmp.resTable)
 }
 
-my.readData = function(folder, repetitions=fullRepetitions) {
+#read all data for an experiment run.
+# folder the folder to read from
+# number of repetitions, 1 -> read files directly in fulder, >1 read files from sub-folders named 1...n
+#  default -> global fullRepetitions
+#obtains the data, applies the (aggregated) statistics, returns one full table over all experiments
+my.readData = function(folder, repetitions=fullRepetitions, skip=skipRuns, statCols=numCols, skipT=skipTags) {
   tmp.groupedTables <- list()
   tmp.count <- 0;
   for (f in 1:length(filenames)) {
-    #if (repetitions == 1) {
-    #  
-    #} else {
-    #  
-    #}
     tmp.resTables <- list()
-    for (run in 1:fullRepetitions){
-      #read the data
-      tmp.resTable <- my.readFile(folder, filenames[f], run) 
-      #and "append" to list
-      tmp.resTables[[run]] <- tmp.resTable
-    }  
-    #stack all tables for one-step analysis
-    tmp.table<-rbindlist(tmp.resTables)
-    #group stacked table by URI, tag and modelName, turn all cols in numCols/groups into table view .SD and apply summary function 
+    if (repetitions == 1) {
+        tmp.table <- my.readFile(folder, filenames[f], repetitions, skip, statCols, skipT) 
+    } else {
+      for (run in 1:fullRepetitions){
+          #read the data
+          tmp.resTable <- my.readFile(folder, filenames[f], run, skip, statCols, skipT) 
+          #and "append" to list
+          tmp.resTables[[run]] <- tmp.resTable
+      }  
+      #stack all tables for one-step analysis
+      tmp.table<-rbindlist(tmp.resTables)
+    }
+    #group table by URI, tag and modelName, turn all cols in numCols/groups into table view .SD and apply summary function 
     #unpack result of summary so that names are taken over in colum names
-
-    #match(numCols, names(data))    
-    tmp.table <- tmp.table[,as.list(unlist(lapply(.SD, my.summary))), list(URI, tag, model.name, MODEL_COMPLEXITY), .SDcols=numCols]
+    #see P. Tuma, do we teach the right statistics? that for some forms of statistics aggregation shall be ok
+    tmp.table <- tmp.table[,as.list(unlist(lapply(.SD, my.summary))), list(URI, tag, model.name, MODEL_COMPLEXITY), .SDcols=statCols]
     tmp.groupedTables[[f]] <- tmp.table
   }
@@ -124,5 +147,5 @@
 # URI as label (for interactive use ggplotly(tooltip=c("label")) after plot(gg)) and tags as values.
 # default settings can be changed through data$... or column names as strings
-my.createDiagram = function(data, yCol, yColName, yBarCol, diagramTitle="", xCol="MODEL_COMPLEXITY", xColName="model complexity", labelCol="URI", colorCol="tag", colorLegendName="") {
+my.createErrorBarDiagram = function(data, yCol, yColName, yBarCol, diagramTitle="", xCol="MODEL_COMPLEXITY", xColName="model complexity", labelCol="URI", colorCol="tag", colorLegendName="") {
   size <- length(data)
   bar.width = size * 2 # unclear, with seems to be correlated to the size, stackoverflow.com/questions/19420903/wideth-of-error-bars-in-ggplot2
@@ -134,6 +157,7 @@
 }
 
-data <- my.readFile('W:/offlineFiles/EASy-concepts/reasoner/measures/new', 'measurements-sse.tsv')
-gg <- my.createDiagram(data, data$REASONER_REASONING_TIME.mean, "mean reasoning time", data$REASONER_REASONING_TIME.ci, diagramTitle="Reasoning time")
+#data <- my.readFile('W:/offlineFiles/EASy-concepts/reasoner/measures/new', 'measurements-sse.tsv')
+data <- my.readData('W:/offlineFiles/EASy-concepts/reasoner/measures/benchmark-results-new-1.3.0-20190215', 1)
+gg <- my.createErrorBarDiagram(data, data$REASONER_REASONING_TIME.mean, "mean reasoning time", data$REASONER_REASONING_TIME.ci, diagramTitle="Reasoning time")