In [24]:
import operator
import sys

COMMAND = "drawgraph"
startSize = "550"
batchsize = "110"
fileName = "run.out.learner."+startSize+"."+batchsize+".t"
inFile = open(fileName,"r")
data = {}
counts = {}
experiments = {}
for line in inFile:
    line = line.rstrip()
    fields = line.split("\t")
    if len(fields) != 2: sys.exit(COMMAND+": unexpected input line (TAB): "+line)
    key,value = fields
    if not key in data:
        data[key] = float(value)
        counts[key] = 1
    else:
        data[key] += float(value)
        counts[key] += 1
    fields = key.split(" ")
    if len(fields) != 4: sys.exit(COMMAND+": unexpected input line (SPACE): "+line)
    experiments[fields[0]+" "+fields[1]]= True
inFile.close()

finalValues = {}
maxSize = {}
bestSmartExp = ""
bestSmartExpScore = 0.0
bestSmartExpSize = 0
for key in data:
    exp,stepSize,totalSize,dummy = key.split()
    if not exp in maxSize or int(totalSize) > maxSize[exp]:
        maxSize[exp] = int(totalSize)
        finalValues[exp] = data[key]/counts[key]
        if exp != "r" and exp != "t" and exp != "t-R" and \
            ((finalValues[exp] > bestSmartExpScore and maxSize[exp] >= bestSmartExpSize) or \
             maxSize[exp] > bestSmartExpSize):
            bestSmartExpScore = finalValues[exp]
            bestSmartExpSize = maxSize[exp]
            bestSmartExp = exp
sortedExps = sorted(finalValues,key=finalValues.get,reverse=True)

In [25]:
%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import numpy as np
import math
import re
labels = { "e":"Highest entropy","c":"Lowest confidence","l":"Longest text","m":"Smallest margin",\
           "r":"Random selection","t":"Sequential","t-R":"Reversed sequential"}
plt.figure()
for exp in sortedExps:
    x = []
    y = []
    for key in data:
        fields = key.split(" ")
        if fields[0] == exp:
            x.append(float(fields[2])/550.29)
            y.append(data[key]/counts[key])
    if exp == "r":
        plt.plot(x,y,'-',label=labels[exp],color="red")
    elif exp == bestSmartExp:
        plt.plot(x,y,'-',label=labels[exp],color="black")
    else:
        plt.plot(x,y,'-',label=labels[exp],color="gray",alpha=0.5)

plt.legend(frameon=False,fontsize="x-small",loc="upper left")
fig = plt.gca()
fig.set_xscale("log")
fig.set_xlabel("Training data size")
fig.set_ylabel("Accuracy")
fig.set_title("")
fig.tick_params(top='off', bottom='off', left='off', right='off')
for spine in fig.spines.values(): spine.set_visible(False)
xticks = []
nbrOfTicks = 4
for i in range(0,nbrOfTicks):
    xticks.append(0.5+math.exp(math.log(float(startSize))+i*(math.log(float(startSize)+10*float(batchsize))\
                                                        -math.log(float(startSize)))/(nbrOfTicks-1)))
    xticks[-1] = int(0.5+xticks[-1]/55.029)/10
fig.set_xticks(xticks)
labels = [item.get_text() for item in fig.get_xticklabels()]
for i in range(0,nbrOfTicks): labels[i] = str(xticks[i])+"%"
fig.set_xticklabels(labels)
fig.minorticks_off()
fig.set_title("Start size: "+str(startSize)+"; step size: "+str(stepSize))
plt.show()

pattern = re.compile("\.")
fileName = pattern.sub("-",fileName)
plt.savefig(fileName+".pdf")

labels = [item.get_text() for item in fig.get_yticklabels()]
for i in range(0,len(labels)): labels[i] += "%"
fig.set_yticklabels(labels)
plt.savefig(fileName+".pdf")



In [ ]: