Index: /reasoner/measures/script.r
===================================================================
--- /reasoner/measures/script.r	(revision 234)
+++ /reasoner/measures/script.r	(revision 234)
@@ -0,0 +1,160 @@
+require(ggplot2)
+require(plotly)
+require(data.table)
+require(stringr)
+
+#setup
+#how many initial in-experiment runs (out of 10) shall be skipped
+skipRuns <- 3
+#maximum number of full repetitions (entire experiments)
+fullRepetitions <- 5
+#numerical columns in data for which statistics shall be calculated
+numCols <- c("RESPONSETIME_AUTO", "REASONER_EVALUATION_TIME", "REASONER_INSTANCE_CREATION_TIME", "REASONER_REASONING_TIME", "REASONER_TRANSLATION_TIME")
+#names of files produced during measurement
+filenames <- c('measurements-sse.tsv', 'measurements-rtvil.tsv', 'measurements-scenarios.tsv', 'measurements-scenarioVariants.tsv', 'measurements-scaleLog.tsv')
+skipTags <- c("CompoundInit")
+#folder <- 'benchmark-results-old-1.3.0-20180826/auto/1'
+#folder <- 'benchmark-results-old-1.1.0-20180818/auto/1'
+
+#http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
+
+# na-tolerant length function
+my.lengthNA <- function (x, na.rm=FALSE) {
+  if (na.rm) 
+    sum(!is.na(x))
+  else
+    length(x)
+}
+
+# statistical summary over x, returns mean, median, standard deviation, standard error and confidence interval  
+my.summary = function(x, conf.interval=.95) {
+  n <- my.lengthNA(x, TRUE)
+  res <- list(mean = mean(x), median = median(x), sd = sd(x), se=0, ci=0)
+  res$se <- res$sd / sqrt(n)
+
+  # Confidence interval multiplier for standard error
+  # Calculate t-statistic for confidence interval: 
+  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
+  ciMult <- ifelse(n > 1, qt(conf.interval/2 + .5, n-1), 0) # 0 does not matter -> res$se anyway NA
+  res$ci <- res$se * ciMult
+  # see also https://www.cyclismo.org/tutorial/R/confidence.html (Section 9.2)
+  return(res)
+}
+
+# shortens a given URI to the path segments including and after testdata (also including one path segment before testdata)
+my.shortenURI = function(uri) {
+  tmp <- str_match(uri, ".*/([^/]*/testdata/.*)")
+  return (ifelse(is.na(tmp[1,2]), uri, tmp[1,2]))
+}
+
+#reads a file produced by the MeasurementCollector of EASy-Producer
+# in given folder
+# using given filename (infix is constructed from run)
+# actual run count for complete experiment repetition
+# skipping warmup runs (default global skipRuns)
+# statistical colums (default global numCols)
+# tags to be skipped (default global skipTags)
+#shortens URIs using my.shortenURI
+#factors model.name, URI, tag, caller
+#applies my.summary for statistical summaries on numCols
+#projects to URI, tag, model.name, MODEL_COMPLEXITY and statistics of numCols
+my.readFile = function(folder, filename, run=0, skip=skipRuns, statCols=numCols, skipT=skipTags) {
+  if (run > 0)  
+    tmp.folder <- paste(folder, 'auto', run, sep='/')
+  else
+    tmp.folder <- folder
+  #read the data, use header as colum names, turn them into real identifiers (check.names), avoid reading strings as factors 
+  tmp.resTable <- as.data.table(read.table(file = paste(tmp.folder, filename, sep='/'), sep = '\t', header = TRUE, dec=",", check.names=TRUE, stringsAsFactors=FALSE)) 
+  #preprocess, only runs later than given run count shall be in the result
+  tmp.resTable <- tmp.resTable[runCount > skip]
+  #preprocess, shorten URIs for interactive display
+  #we must not have factors on URI, but do need factors on later operations as index
+  tmp.resTable <- tmp.resTable[,URI:=factor(unlist(lapply(URI, my.shortenURI)))]
+  tmp.resTable <- tmp.resTable[,model.name:=factor(model.name)]
+  tmp.resTable <- tmp.resTable[,tag:=factor(tag)]
+  tmp.resTable <- tmp.resTable[,caller:=factor(caller)]
+  if (length(skipT) > 0) {
+    tmp.resTable <- tmp.resTable[!(tag %in% skipTags)]
+  }
+  tmp.resTable <- tmp.resTable[,as.list(unlist(lapply(.SD, my.summary))), list(URI, tag, model.name, MODEL_COMPLEXITY), .SDcols=statCols]
+  return (tmp.resTable)
+}
+
+my.readData = function(folder, repetitions=1) {
+  tmp.groupedTables <- list()
+  tmp.count <- 0;
+  for (f in 1:length(filenames)) {
+    tmp.resTables <- list()
+    for (run in 1:fullRepetitions){
+      #read the data
+      tmp.resTable <- my.readFile(folder, filenames[f], run) 
+      #and "append" to list
+      tmp.resTables[[run]] <- tmp.resTable
+    }  
+    #stack all tables for one-step analysis
+    tmp.table<-rbindlist(tmp.resTables)
+    #group stacked table by URI, tag and modelName, turn all cols in numCols/groups into table view .SD and apply summary function 
+    #unpack result of summary so that names are taken over in colum names
+
+    #match(numCols, names(data))    
+    tmp.table <- tmp.table[,as.list(unlist(lapply(.SD, my.summary))), list(URI, tag, model.name, MODEL_COMPLEXITY), .SDcols=numCols]
+    tmp.groupedTables[[f]] <- tmp.table
+  }
+  #stack all grouped tables
+  data <- rbindlist(tmp.groupedTables)
+  return (data);  
+}
+
+#return col in data, i.e., either access it by name if col is a string or return col directly
+#(assuming that col is then something like data$col)
+my.get = function(data, col) {
+    if (is.character(col))
+      res<-data[[col]]
+    else
+      res<-col
+    return(res)
+}
+
+# draws a diagram for data using xCol (default MODEL_COMPLEXITY), y-axis and error-bar y values as given
+# URI as label (for interactive use ggplotly(tooltip=c("label")) after plot(gg)) and tags as values.
+# default settings can be changed through data$... or column names as strings
+my.createDiagram = function(data, yCol, yColName, yBarCol, diagramTitle="", xCol="MODEL_COMPLEXITY", xColName="model complexity", labelCol="URI", colorCol="tag", colorLegendName="") {
+  size <- length(data)
+  bar.width = size * 2 # unclear, with seems to be correlated to the size, stackoverflow.com/questions/19420903/wideth-of-error-bars-in-ggplot2
+  gg<-ggplot(data, aes(x=my.get(data, xCol),y=my.get(data, yCol),label=my.get(data, URI),color=my.get(data, tag))) + 
+    geom_errorbar(aes(ymin=my.get(data, yCol)-my.get(data, yBarCol), ymax=my.get(data, yCol)+my.get(data, yBarCol)), width=bar.width) + 
+    geom_point() +
+    labs(x=xColName, y=yColName, title = diagramTitle, color=colorLegendName)
+  return(gg)
+}
+
+data <- my.readFile('W:/offlineFiles/EASy-concepts/reasoner/measures/new', 'measurements-sse.tsv')
+gg <- my.createDiagram(data, data$REASONER_REASONING_TIME.mean, "mean reasoning time", data$REASONER_REASONING_TIME.ci, diagramTitle="Reasoning time")
+
+
+#library(data.table)
+#t[, print(.SD), list(URI)]
+#t1 <- t[,sapply(.SD,my.summary), list(URI, tag), .SDcols=c("RESPONSETIME_AUTO")]
+#t1 <- t[,sapply(.SD,my.summary), list(URI, tag), .SDcols=c("RESPONSETIME_AUTO", "REASONER_EVALUATION_TIME", "REASONER_INSTANCE_CREATION_TIME", "REASONER_REASONING_TIME", "REASONER_TRANSLATION_TIME")]
+
+#data <- my.readData('W:/offlineFiles/EASy-concepts/reasoner/measures/benchmark-results-new-1.3.0-20180826')
+
+
+#sse <- read.table(file = paste(folder, 'measurements-sse.tsv', sep='/'), sep = '\t', header = TRUE, dec=",")
+#rtvil <- read.table(file = paste(folder, 'measurements-rtvil.tsv', sep='/'), sep = '\t', header = TRUE, dec=",")
+#scenarios <- read.table(file = paste(folder, 'measurements-scenarios.tsv', sep='/'), sep = '\t', header = TRUE, dec=",")
+#scenarioVars <- read.table(file = paste(folder, 'measurements-scenarioVariants.tsv', sep='/'), sep = '\t', header = TRUE, dec=",")
+#scaleLog <- read.table(file = paste(folder, 'measurements-scaleLog.tsv', sep='/'), sep = '\t', header = TRUE, dec=",")
+
+#data <- rbind(sse, rtvil, scenarios, scenarioVars, scaleLog)
+#reasoning <- data[data$tag == "REASONING", ]
+
+#gg<-ggplot(data, aes(x=MODEL_CONSTRAINT_COMPLEXITY,y=REASONER_REASONING_TIME,label=model.name,color=tag))+geom_point()
+#gg<-ggplot(data, aes(x=MODEL_CONSTRAINT_COMPLEXITY+MODEL_VARIABLES,y=REASONER_REASONING_TIME,label=caller,color=tag))+geom_point()
+#gg<-ggplot(data, aes(x=MODEL_COMPLEXITY,y=REASONER_REASONING_TIME,label=model.name,color=tag))+geom_point()
+#plot(gg)
+
+#gg<-ggplot(data, aes(x=MODEL_COMPLEXITY,y=REASONER_REASONING_TIME.mean,label=URI,color=tag)) + 
+#  geom_errorbar(aes(ymin=REASONER_REASONING_TIME.mean-REASONER_REASONING_TIME.ci, ymax=REASONER_REASONING_TIME.mean+REASONER_REASONING_TIME.ci), size=1, width=50) + 
+#  geom_point()
+#ggplotly(tooltip=c("label"))