---
title: "SOSM Experiments"
output: html_notebook
---
# Load Dependencies
```{r}
library(ggplot2)
library(plyr)
library(effsize)
library(ggpubr)
library(xtable)
library(ggrepel)
library(DescTools)
library(caret)
library(reshape2)
```
#Inference experiment
Read the data
```{r}
inferenceData <- NULL
for(i in list.files(pattern="*inference.csv") ){
if(file.size(i) == 0)
next
x <- read.csv(i)
print(i)
colnames(x) <- c("label","algo","seed","tail","data","strategy","Confidence","belief","uncertainty","probability","predictedAccept","accept","states","transitions")
if(nrow(x[x$predictedAccept=="UNDEFINED",])>0)
x[x$predictedAccept=="UNDEFINED",]$predictedAccept <- "REJECT"
x$CorrectPrediction <- rep(FALSE,nrow(x))
x[x$accept==x$predictedAccept,]$CorrectPrediction <- TRUE
x$CorrectPrediction <- as.factor(x$CorrectPrediction)
if(is.null(inferenceData)){
inferenceData <- x
}
else{
inferenceData <- rbind(inferenceData,x)
}
}
inferenceData <- inferenceData[inferenceData$algo=="J48",]
inferenceData$label <- substring(inferenceData$label,0,nchar(inferenceData$label)-10)
```
#For the appendix - establishing the effect of alpha parameter.
Establish which confidence level leads to the greatest effect size
```{r}
summarisedAccept <- ddply(inferenceData[inferenceData$predictedAccept=="ACCEPT",],c("label", "Confidence"),summarise,Uncertainty=round(VD.A(uncertainty ~ CorrectPrediction)$estimate,2),Belief=round(VD.A(belief ~ CorrectPrediction)$estimate,2), tp=sum(predictedAccept=="ACCEPT" & accept=="ACCEPT"),tn=sum(predictedAccept!="ACCEPT" & accept!="ACCEPT"), fp=sum(predictedAccept=="ACCEPT" & accept!="ACCEPT"), fn=sum(predictedAccept!="ACCEPT" & accept=="ACCEPT"),precision=tp/(tp+fp), recall=tp/(tp+fn),specificity = tn/(tn+fp),F=(2*precision*recall)/(precision+recall), BCR=(2 * recall * specificity)/(recall + specificity))
summarisedReject <- ddply(inferenceData[inferenceData$predictedAccept=="REJECT",],c("label", "Confidence"),summarise,Uncertainty=round(VD.A(uncertainty ~ CorrectPrediction)$estimate,2),Belief=round(VD.A(belief ~ CorrectPrediction)$estimate,2), tp=sum(predictedAccept=="ACCEPT" & accept=="ACCEPT"),tn=sum(predictedAccept!="ACCEPT" & accept!="ACCEPT"), fp=sum(predictedAccept=="ACCEPT" & accept!="ACCEPT"), fn=sum(predictedAccept!="ACCEPT" & accept=="ACCEPT"),precision=tp/(tp+fp), recall=tp/(tp+fn),specificity = tn/(tn+fp),F=(2*precision*recall)/(precision+recall), BCR=(2 * recall * specificity)/(recall + specificity))
summarisedAccept <- melt(summarisedAccept,id.vars=c("label","Confidence"), measure.vars=c("Belief","Uncertainty"))
summarisedAccept$AcceptReject <- rep("Accept",nrow(summarisedAccept))
summarisedReject <- melt(summarisedReject,id.vars=c("label","Confidence"), measure.vars=c("Belief","Uncertainty"))
summarisedReject$AcceptReject <- rep("Reject",nrow(summarisedReject))
summarised <- rbind(summarisedAccept,summarisedReject)
confidencePlot <- ggplot(summarised,aes(x=factor(Confidence),y=value, fill = AcceptReject)) + geom_boxplot() + ylab("Vargha Delaney A12") + xlab("Confidence")+ ylim(c(0,1)) + facet_wrap(~variable, ncol=1)
ggsave(confidencePlot,filename="../figures/confidences.png",width=6,height=7,units="in")
```
When it comes to sequences that are correctly accepted by an inferred model, these yield a lower level of uncertainty (i.e. a higher level of certainty) than sequences that are incorrectly accepted. However, when it comes to sequences that are rejected, the median A score is around 0.5, which means that these sequences cannot be usefully discriminated from sequences that are incorrectly rejected.
However, sequences that are incorrectly rejected can nonetheless still be differentiated from sequences that are correctly rejected in terms of their belief value. If we look at the effect size for the beliefs, this shows that sequences that are correctly rejected tend to yield a significantly higher belief value than sequences that are incorrectly rejected. Also, sequences that are correctly accepted tend to yield a significantly lower belief value than sequences that are incorrectly accepted.
Can be explained by the fact that the underlying model has over-generalised.
We now carry out pair-wise statistical significance tests to establish whether there is a significant difference between accuracy results for different alpha values.
```{r}
pairwise.wilcox.test(x=summarised[summarised$AcceptReject=="Accept"&summarised$variable=="Belief",]$value, g=as.factor(summarised[summarised$AcceptReject=="Accept"&summarised$variable=="Belief",]$Confidence),p.adj="bonf")
pairwise.wilcox.test(x=summarised[summarised$AcceptReject=="Accept"&summarised$variable=="Uncertainty",]$value, g=as.factor(summarised[summarised$AcceptReject=="Accept"&summarised$variable=="Uncertainty",]$Confidence),p.adj="bonf")
pairwise.wilcox.test(x=summarised[summarised$AcceptReject=="Reject"&summarised$variable=="Belief",]$value, g=as.factor(summarised[summarised$AcceptReject=="Reject"&summarised$variable=="Belief",]$Confidence),p.adj="bonf")
pairwise.wilcox.test(x=summarised[summarised$AcceptReject=="Reject"&summarised$variable=="Uncertainty",]$value, g=as.factor(summarised[summarised$AcceptReject=="Reject"&summarised$variable=="Uncertainty",]$Confidence),p.adj="bonf")
```
#RQ1.1 - the A12 effect sizes for Belief and Uncertainty.
We now plot the A12 effect size for Belief and Uncertainty for each subject system.
```{r}
summarisedAccept <- ddply(inferenceData[inferenceData$Confidence==2000&inferenceData$predictedAccept=="ACCEPT",],c("label"),summarise,Uncertainty=round(VD.A(uncertainty ~ CorrectPrediction)$estimate,2),Belief=round(VD.A(belief ~ CorrectPrediction)$estimate,2))
summarisedReject <- ddply(inferenceData[inferenceData$Confidence==2000&inferenceData$predictedAccept=="REJECT",],c("label"),summarise,Uncertainty=round(VD.A(uncertainty ~ CorrectPrediction)$estimate,2),Belief=round(VD.A(belief ~ CorrectPrediction)$estimate,2))
summarisedAccept$id <- 1:nrow(summarisedAccept)
summarisedReject$id <- 1:nrow(summarisedReject)
summarised_belief <- melt(summarisedAccept,id.vars=c("label","id"))
summarised_belief$AcceptReject <- rep("Accept",nrow(summarised_belief))
summarised_uncertainty <- melt(summarisedReject,id.vars=c("label","id"))
summarised_uncertainty$AcceptReject <- rep("Reject",nrow(summarised_uncertainty))
summarised_bound <- rbind(summarised_belief,summarised_uncertainty)
effectPlot <- ggplot(summarised_bound,aes(x=label,y=value-0.5,fill=AcceptReject)) + geom_bar(stat="identity",position="dodge") + facet_wrap(~variable,ncol=1) + scale_y_continuous(breaks=seq(-0.5,0.5, by=0.25))+ ylab("Vargha Delaney A12 - 0.5") + theme(axis.text.x = element_text(angle=90, hjust=1,size=8), legend.position="top") + geom_hline(yintercept=0.21, linetype="dashed") + geom_hline(yintercept=-0.21, linetype="dashed") + scale_fill_discrete(name = "Accept or Reject") + xlab("Model")
ggsave(effectPlot,filename="../figures/effectPlot.png",width=6,height=6,units="in")
```
#RQ1.2 - assessing the impact of the learner to correct predictions by inferred model
```{r}
results <- NULL
for(label in unique(inferenceData$label)){
cvCtrl <- trainControl(method = "cv", number = 10,savePredictions=TRUE)
print(label)
currentData <- inferenceData[inferenceData$Confidence==2000&inferenceData$label==label,]
if(nrow(currentData[currentData$predictedAccept=="UNDEFINED",])>0){
currentData[currentData$predictedAccept=="UNDEFINED",]$predictedAccept <- "REJECT"
}
currentData$predictedAccept <- as.factor(currentData$predictedAccept)
currentData$accept <- as.factor(currentData$accept)
fit=train( accept ~ uncertainty + belief + predictedAccept,
data=currentData,
na.action=na.omit,
method='rf',
trControl = cvCtrl
)
preML <- confusionMatrix(data=currentData$predictedAccept, reference=currentData$accept)
postML <- confusionMatrix(data=fit$pred$pred, reference=fit$pred$obs)
newResult = data.frame(name=label,prePrecision=preML$byClass[["Precision"]],preRecall=preML$byClass[["Recall"]], preF1=preML$byClass[["F1"]],preSensitivity=preML$byClass[["Sensitivity"]],preSpecificity=preML$byClass[["Specificity"]], preHBCR=((2*preML$byClass[["Sensitivity"]]*preML$byClass[["Specificity"]])/(preML$byClass[["Sensitivity"]] + preML$byClass[["Specificity"]])),postPrecision=postML$byClass[["Precision"]],postRecall=postML$byClass[["Recall"]], postF1=postML$byClass[["F1"]],postSensitivity=postML$byClass[["Sensitivity"]],postSpecificity=postML$byClass[["Specificity"]], postHBCR=((2*postML$byClass[["Sensitivity"]]*postML$byClass[["Specificity"]])/(postML$byClass[["Sensitivity"]] + postML$byClass[["Specificity"]])))
if(is.null(results)){
results <- newResult
}
else{
results <- rbind(results,newResult)
}
print(nrow(results))
}
```
```{r}
pre <- results[,c(1:7)]
post <- results[,c(1,8:13)]
pre$prePost <- rep("No classifier",nrow(pre))
post$prePost <- rep("With Classifier",nrow(post))
colnames(pre) <- c("name","Precision","Recall","F","Sensitivity","Specificity","BCR","prePost")
colnames(post) <- c("name","Precision","Recall","F","Sensitivity","Specificity","BCR","prePost")
toVisualise2 <- rbind(post,pre)
FPlot <- ggplot(toVisualise2,aes(x=prePost,y=F)) + geom_boxplot() + xlab("Correcting Classifier")
BCRPlot <- ggplot(toVisualise2,aes(x=prePost,y=BCR)) + geom_boxplot()+ xlab("Correcting Classifier")
ggsave(FPlot,filename="../figures/rq4FBox.png",width=6,height=5,units="in")
ggsave(BCRPlot,filename="../figures/rq4BCRBox.png",width=6,height=5,units="in")
```
```{r}
finalResults <- results[,c(1,5,6,7,11,12,13)]
colnames(finalResults) <- c("Model", "Sens", "Spec", "BCR","Sens(C)", "Spec(C)","BCR(C)")
print(xtable(finalResults), include.rownames=FALSE)
```
# Test prioritisation experiment
```{r}
readAndSummarise <- function(Name){
quic_rand <- read.csv(paste(c(Name,"_prioritise_random.csv"),collapse=""))
quic_sl <- read.csv(paste(c(Name,"_prioritise_prioritised.csv"),collapse=""))
quic_coverage <- read.csv(paste(c(Name,"_prioritise_coverage.csv"),collapse=""))
quic_infoDist <- read.csv(paste(c(Name,"_prioritise_gower.csv"),collapse=""))
randRows <- nrow(quic_rand)
slRows <- nrow(quic_sl)
coverageRows <- nrow(quic_coverage)
infoDistRows <- nrow(quic_infoDist)
quic_rand$treatment <- rep("rand",randRows)
quic_sl$treatment <- rep("sl",slRows)
quic_coverage$treatment <- rep("coverage",coverageRows)
quic_infoDist$treatment <- rep("information",infoDistRows)
colnames(quic_sl) <- c("seed","iteration","mutation", "treatment")
colnames(quic_rand) <- c("seed","iteration","mutation","treatment")
colnames(quic_coverage) <- c("seed","iteration","mutation","treatment")
colnames(quic_infoDist) <- c("seed","iteration","mutation","treatment")
quic <- rbind(quic_rand,quic_sl,quic_coverage,quic_infoDist)
colnames(quic) <- c("seed","iteration","mutation","treatment")
quic$Name <- rep(Name,nrow(quic))
return(quic)
}
prioritisation <- NULL
for(i in list.files(pattern="*random.csv") ){
i <- substring(i,0,nchar(i)-22)
print(i)
if(is.null(prioritisation)){
prioritisation <- readAndSummarise(i)
}
else{
prioritisation <- rbind(prioritisation,readAndSummarise(i))
}
}
```
```{r}
linePlots <- function(quic){
summarised <- ddply(quic,c("iteration","treatment","Name"),summarise,mean=mean(mutation),sdlow=quantile(mutation,prob=0.25),sdhigh=quantile(mutation,prob=0.75))
return(summarised)
}
plotData <- linePlots(prioritisation)
mutations <- ggplot(plotData,aes(x=iteration,y=mean,colour=treatment)) + geom_line() + geom_ribbon(aes(ymin=sdlow, ymax = sdhigh,fill=treatment,alpha=0.1)) + facet_wrap(~Name,ncol=6)
ggsave(mutations,filename="../figures/mutations_12_9.png",width=49,height=20,units="in")
plotData$treatment <- as.factor(plotData$treatment)
levels(plotData$treatment) <- c("Coverage", "Info. Dist", "Random", "SOSM")
mutationsKeyb <- ggplot(plotData[plotData$Name=="keyb",],aes(x=iteration,y=mean,colour=treatment)) + geom_line() + geom_ribbon(aes(ymin=sdlow, ymax = sdhigh,fill=treatment),alpha=0.3) + labs(fill='Heuristic')+scale_color_discrete(guide='none') + xlab("Iteration") + ylab("APFD")
ggsave(mutationsKeyb,filename="../figures/keybMutations.png",width=6,height=3.5,units="in")
```
AUC calculations
```{r}
readAndCalculateAUC <- function(Name){
quic_rand <- read.csv(paste(c(Name,"_prioritise_random.csv"),collapse=""))
quic_sl <- read.csv(paste(c(Name,"_prioritise_prioritised.csv"),collapse=""))
quic_coverage <- read.csv(paste(c(Name,"_prioritise_coverage.csv"),collapse=""))
quic_infoDist <- read.csv(paste(c(Name,"_prioritise_gower.csv"),collapse=""))
randRows <- nrow(quic_rand)
slRows <- nrow(quic_sl)
coverageRows <- nrow(quic_coverage)
infoDistRows <- nrow(quic_infoDist)
quic_rand$treatment <- rep("rand",randRows)
quic_sl$treatment <- rep("sl",slRows)
quic_coverage$treatment <- rep("coverage",coverageRows)
quic_infoDist$treatment <- rep("gower",infoDistRows)
colnames(quic_sl) <- c("seed","iteration","mutation", "treatment")
colnames(quic_rand) <- c("seed","iteration","mutation","treatment")
colnames(quic_coverage) <- c("seed","iteration","mutation","treatment")
colnames(quic_infoDist) <- c("seed","iteration","mutation","treatment")
#quic_sl <- quic_sl[which(quic_sl$seed==0 & quic_sl$iteration==0):nrow(quic_sl),]
#quic_rand <- quic_rand[which(quic_rand$seed==0 & quic_rand$iteration==0):nrow(quic_rand),]
#quic_coverage <- quic_coverage[which(quic_coverage$seed==0 & quic_coverage$iteration==0):nrow(quic_coverage),]
#quic_infoDist <- quic_infoDist[which(quic_infoDist$seed==0 & quic_infoDist$iteration==0):nrow(quic_infoDist),]
quic <- rbind(quic_rand,quic_sl,quic_coverage,quic_infoDist)
colnames(quic) <- c("seed","iteration","mutation","treatment")
summarised <- NULL
summarised <- quic %>% dplyr::group_by(seed,treatment) %>% dplyr::summarise(maxMut = max(mutation), Percentage=0.25, index=which(mutation>=mean(mean(maxMut)*Percentage))[1],aucactual=AUC(iteration[1:index],mutation[1:index]),aucMax = AUC(iteration[1:index],rep(mean(maxMut),index)),aucNormal = aucactual / aucMax)
summarised <- rbind(summarised,quic %>% dplyr::group_by(seed,treatment) %>% dplyr::summarise(maxMut = max(mutation), Percentage=0.5, index=which(mutation>=mean(mean(maxMut)*Percentage))[1],aucactual=AUC(iteration[1:index],mutation[1:index]),aucMax = AUC(iteration[1:index],rep(mean(maxMut),index)),aucNormal = aucactual / aucMax))
summarised <- rbind(summarised,quic %>% dplyr::group_by(seed,treatment) %>% dplyr::summarise(maxMut = max(mutation), Percentage=0.75, index=which(mutation>=mean(mean(maxMut)*Percentage))[1],aucactual=AUC(iteration[1:index],mutation[1:index]),aucMax = AUC(iteration[1:index],rep(mean(maxMut),index)),aucNormal = aucactual / aucMax))
summarised <- rbind(summarised,quic %>% dplyr::group_by(seed,treatment) %>% dplyr::summarise(maxMut = max(mutation), Percentage=0.95, index=which(mutation>=mean(mean(maxMut)*Percentage))[1],aucactual=AUC(iteration[1:index],mutation[1:index]),aucMax = AUC(iteration[1:index],rep(mean(maxMut),index)),aucNormal = aucactual / aucMax))
summarised <- rbind(summarised,quic %>% dplyr::group_by(seed,treatment) %>% dplyr::summarise(maxMut = max(mutation), Percentage=1,index=which(mutation>=mean(mean(maxMut)*Percentage))[1],aucactual=AUC(iteration[1:index],mutation[1:index]),aucMax = AUC(iteration[1:index],rep(mean(maxMut),index)),aucNormal = aucactual / aucMax))
summarised <- rbind(summarised,quic %>% dplyr::group_by(seed,treatment) %>% dplyr::summarise(maxMut = max(mutation), Percentage=1.2, index=length(iteration),aucactual=AUC(iteration,mutation),aucMax = AUC(iteration,rep(mean(maxMut),index)),aucNormal = aucactual / aucMax))
summarised$Name <- rep(Name,nrow(summarised))
return(summarised)
}
#nrow(ranks[ranks[,1]=="coverage"&ranks[,4]==1,])
aucs <- NULL
for(i in list.files(pattern="*random.csv") ){
i <- substring(i,0,nchar(i)-22)
print(i)
if(is.null(aucs)){
aucs <- readAndCalculateAUC(i)
}
else{
aucs <- rbind(aucs,readAndCalculateAUC(i))
}
}
summary <- ddply(aucs,c("treatment","Name","Percentage"),summarise,meanAUC=mean(aucNormal, na.rm=TRUE),meanIndex=mean(index, na.rm=TRUE))
ranks <- ddply(summary,c("Name", "Percentage"),transform,rnk=rank(meanIndex,ties.method="average"), rnkAUC = rank(1-meanAUC,ties.method="average"))
ranks$treatment <- as.factor(ranks$treatment)
levels(ranks$treatment) <- c("Coverage", "Info. Dist.", "Random", "SOSM")
summary$treatment <- as.factor(summary$treatment)
levels(summary$treatment) <- c("Coverage", "Info. Dist.", "Random", "SOSM")
winners <- ggplot(ranks[ranks$rnkAUC==1 & ranks$Percentage == 1.2,],aes(x=treatment)) + geom_histogram(stat="count",position="dodge") + ylab("") + xlab("")
aucBox <- ggplot(summary[summary$Percentage==1.20,],aes(x=treatment,y=meanAUC)) + geom_boxplot() + ylab("APFD") + xlab("")
ggsave(winners,filename="../figures/winners_prioritise.png",width=6,height=3.5,units="in")
ggsave(aucBox,filename="../figures/auc_boxes.png",width=6,height=3.5,units="in")
```