In [1]:
research_directory = %pwd
tools_script = research_directory + '/Tools.ipynb'
%run $tools_script
display_width(90)
In [ ]:
def set_presentation_data():
pdata = {}
for oomName in BenchData.OOMs + BenchData.IOOOMs + BenchData.realWorldData:
pdata[oomName] = {}
pdata[oomName]['xlabels'] = ['', '', '', '$10^4$', '$10^5$', '$10^6$', '']
if oomName == 'RANDOM4_7':
pdata[oomName]['xlabels'] = ['', '', '$10^3$', '$10^4$', '$10^5$', '']
elif oomName == 'RANDOM27_32':
pdata[oomName]['xlabels'] = ['', '', '$10^5$', '$10^6$', '$10^7$', '']
for oomName in BenchData.OOMs + BenchData.realWorldData:
ylim = BenchData.info[oomName]['ylim']
pdata[oomName]['ylabels'] = list(map(lambda x: '%0.1f' % x, np.arange(0, 5, 0.1)))
for oomName in BenchData.IOOOMs:
pdata[oomName]['ylabels'] = ['', '', '$10^{-5}$', '$10^{-4}$', '$10^{-3}$', '$10^{-2}$']
return pdata
def make_presentation_demo():
pdata = set_presentation_data()
with open(research_directory + '/results/resultsWords.p', 'rb') as f:
res = pickle.load(f)['res']
avres = combineResults(res)
plt.figure(figsize=(fig_width, 4/3*fig_width), dpi=144)
for oomName in BenchData.OOMs + BenchData.IOOOMs:
bd = BenchData(oomName)
ax = bd.initPlot()
style = myStyle(lw = 0.8, color = 'purple', ls = 0)
plt.plot(bd.trainLengths(), avres[oomName][(0, bd.info['lenX'], 1, 0)][bd.dim()]['SPEC'][None], **style)
ax.grid(**myStyle(lw=0.3,color='brown', ls=1))
ax.set_xticklabels(pdata[oomName]['xlabels'],y=0.085, color=colors['brown'])
ax.set_yticklabels(pdata[oomName]['ylabels'],x = (0.17 if oomName in BenchData.IOOOMs else 0.12), color=colors['brown'])
plt.subplot(4,3,12)
line, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'purple', ls = 0))
makeLegend([line],['Spectral learning with\n indicative and character-\n istic words selected by\n length $\le L$ and with\n process rank as taget\n dimension'])
finishPlot()
savePlot('presentationDemo')
def make_presentation_demo_rw(seed_for_spectral_norm_sampling=123456789):
np.random.seed(seed_for_spectral_norm_sampling)
pdata = set_presentation_data()
plt.figure(figsize=(fig_width, 1/3*fig_width), dpi=144)
res = {}
for oomName in BenchData.realWorldData:
res[oomName] = []
bd = BenchData(oomName)
train = bd.getSequence()
data = tom.Data(train.sub(0))
for tl in bd.trainLengths():
print('.', end='', flush=True)
data.sequence = train.sub(tl)
lenX = 5 if oomName == 'ECOLI' else 2
data.X = [0, lenX, 1, 0]
data.Y = data.X
dim = tom.learn.dimension_estimate(data)
oom = tom.learn.model_estimate(data, dim)
res[oomName].append(bd.evaluate(oom))
print()
bd = BenchData(oomName)
ax = bd.initPlot()
style = myStyle(lw = 0.8, color = 'purple', ls = 0)
plt.plot(bd.trainLengths(), res[oomName], **style)
ax.grid(**myStyle(lw=0.3,color='brown', ls=1))
ax.set_xticklabels(pdata[oomName]['xlabels'], y=0.09, color=colors['brown'])
for i, direction in enumerate(['top', 'left', 'bottom', 'right']):
ax.spines[direction].set_linewidth(0.5)
if oomName == 'ECOLI':
ax.set_yticks(bd.info['ylim'])
ax.set_yticklabels(['1.93', '2.0'], color=colors['brown'])
else:
ax.set_yticklabels(20* [''] + list(map(lambda x: '%0.1f' % x, np.arange(2.0, 3.5, 0.1))), x=0.12, color=colors['brown'])
plt.subplot(1,3,3)
line, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'purple', ls = 0))
makeLegend([line], ['Spectral learning with\n indicative and charac-\n teristic words selected\n by length $\le L$ and\nwith estimated taget\n dimensions'], row=1)
finishPlot()
savePlot('presentationDemoRW')
make_presentation_demo()
make_presentation_demo_rw() # ~ 1:30 min
In [ ]:
def make_words_plot(algo):
reg = None if algo == 'SPEC' else (2,3)
plt.figure(figsize=(fig_width, 4/3*fig_width), dpi=144)
for oomName in BenchData.OOMs + BenchData.IOOOMs:
bd = BenchData(oomName)
Σₒ, Σᵢ = bd.nO(), max(1, bd.nU())
Σ = Σₒ * Σᵢ
ax = bd.initPlot()
if algo == 'GLS':
lenX = int(np.log(1200)/np.log(Σ))
plt.plot(bd.trainLengths(), avres[oomName][(0,lenX,1,0)][bd.dim()]['SPEC'][None], **myStyle(lw = 0.5, color='grey', ls = 0))
else:
plt.plot(bd.trainLengths(), avres[oomName][(0,0,'o_min',1024)][bd.dim()]['GLS'][(2,3)], **myStyle(lw = 0.5, color='grey', ls = 0))
wordSettings = avres[oomName].keys()
style = myStyle(lw = 1.5, color = 'purple', ls = 0)
plt.plot(bd.trainLengths(), avres[oomName][(0,0,'o_min',1024)][bd.dim()][algo][reg], **style)
for wS in wordSettings:
if wS[0] == wS[1] > 0:
style = myStyle(lw = 1.2 if wS[0] == max([s[0] for s in wordSettings]) else 0.5, color = 'green', ls = 2)
elif wS[1] > 0:
style = myStyle(lw = 1.2 if wS[1] == max([s[1] for s in wordSettings if s[0] == 0]) else 0.5, color = 'red', ls = 3)
elif wS[2] == 'o_min':
style = myStyle(lw = 0, color = 'purple', ls = 0)
else:
style = myStyle(lw = 1.2 if wS[3] == 1024 else 0.5, color = 'blue', ls = 1)
plt.plot(bd.trainLengths(), avres[oomName][wS][bd.dim()][algo][reg], **style)
#style = myStyle(lw = 0.2, color = 'green', ls = 0)
#plt.plot(bd.trainLengths(), avres[oomName][(0,0,'o_min',1024)][bd.dim()][algo][reg], **style)
plt.subplot(4,3,12)
extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
exact, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'green', ls = 2))
less, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'red', ls = 3))
most, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'blue', ls = 1))
omin, = plt.plot([0],[0], **myStyle(lw = 1.5, color = 'purple', ls = 0))
other, = plt.plot([0],[0], **myStyle(lw = 0.5, color = 'grey', ls = 0))
if algo == 'SPEC':
makeLegend([extra, exact, less, most, omin, extra, extra, other],
['\\underline{words selected by}:', 'length $L$', 'length $\le L$','$Z_{\max}$ most frequent', 'frequency $\ge O_{\min}$',
'','\\underline{for comparison}:','Weighted learning with\n words selected by \n frequency $\ge O_{\min}$'],
title='\\underline{Spectral learning with}')
else:
makeLegend([extra, exact, less, most, omin, extra, extra, other],
['\\underline{words selected by}:', 'length $L$', 'length $\le L$','$Z_{\max}$ most frequent', 'frequency $\ge O_{\min}$',
'','\\underline{for comparison}:','Spectral learning with\n words selected by \n length $\le L$'],
title='\\underline{Weighted learning with}')
finishPlot()
savePlot('words' + algo)
# Load the precomputed results from file and plot
with open(research_directory + '/results/resultsWords.p', 'rb') as f:
res = pickle.load(f)['res']
avres = combineResults(res)
del res
make_words_plot('SPEC')
make_words_plot('GLS')
del avres
In [ ]:
with open(research_directory + '/results/resultsDim.p', 'rb') as f:
res = pickle.load(f)['res']
avres = combineResults(res)
with open(research_directory + '/results/resultsDimEstimation.p', 'rb') as f:
dimestres = pickle.load(f)['res']
def optimalityCutoff(algo = 'GLS', wordSetting = (0,0,'o_min', 1024), ide = None):
reg = None if algo == 'SPEC' else (2,3)
optCutoff = {}
for oomName in BenchData.OOMs + BenchData.IOOOMs:
bd = BenchData(oomName)
wS = list(wordSetting)
if wS[0] == 'lenX': wS[0] = bd.info['lenX']
if wS[1] == 'lenX': wS[1] = bd.info['lenX']
wS = tuple(wS)
if bd.dim() < 12:
dimSettings = list(range(1, bd.dim() + 1)) + [bd.dim() + 1, bd.dim() + 3, bd.dim() + 10]
else:
dimSettings = [1,2,4,8,12,16,20,23,26,28,30,31,32,33,35,42]
if ide is None:
optres = np.min([avres[oomName][wS][d][algo][reg] for d in dimSettings], axis=0)
else:
optres = np.min([res[ide][oomName][wS][d][algo][reg] for d in dimSettings], axis=0)
if oomName in BenchData.OOMs:
optres += 0.25 * (optres - (bd.info['entropies'][4] if oomName in BenchData.OOMs else 0))
else:
optres += 0.5 * optres
optCutoff[oomName] = optres
return optCutoff
def optimalDimensions(algo = 'GLS', wordSetting = (0,0,'o_min', 1024), ide = None):
reg = None if algo == 'SPEC' else (2,3)
optCutoff = optimalityCutoff(algo, wordSetting, ide)
optDim = {}
for oomName in BenchData.OOMs + BenchData.IOOOMs:
bd = BenchData(oomName)
wS = list(wordSetting)
if wS[0] == 'lenX': wS[0] = bd.info['lenX']
if wS[1] == 'lenX': wS[1] = bd.info['lenX']
wS = tuple(wS)
if ide is None:
interpolatedRes = {d : np.array(avres[oomName][wS][d][algo][reg]) for d in avres[oomName][wS].keys()}
else:
interpolatedRes = {d : np.array(res[ide][oomName][wS][d][algo][reg]) for d in avres[oomName][wS].keys()}
if oomName in BenchData.OOMs:
interpolatedRes[bd.dim()+2] = (interpolatedRes[bd.dim()+1] + interpolatedRes[bd.dim()+3]) / 2
for i in range(4, 10):
interpolatedRes[bd.dim()+i] = (10-i)/7 * interpolatedRes[bd.dim()+3] + (i-3)/7 * interpolatedRes[bd.dim()+10]
else:
interpolatedRes[bd.dim()+2] = (interpolatedRes[bd.dim()+1] * interpolatedRes[bd.dim()+3])**0.5
for i in range(4, 10):
interpolatedRes[bd.dim()+i] = interpolatedRes[bd.dim()+3]**((10-i)/7) * interpolatedRes[bd.dim()+10]**((i-3)/7)
optDim[oomName] = {'low': [42] * len(bd.trainLengths()), 'high': [1] * len(bd.trainLengths())}
for tli, tl in enumerate(bd.trainLengths()):
for d in interpolatedRes.keys():
if interpolatedRes[d][tli] < optCutoff[oomName][tli]:
optDim[oomName]['low'][tli] = min(optDim[oomName]['low'][tli], d)
optDim[oomName]['high'][tli] = max(optDim[oomName]['high'][tli], d)
return optDim
def make_dimensions_plot(algo, wordSetting):
optimality_cutoff = optimalityCutoff(algo, wordSetting)
reg = None if algo == 'SPEC' else (2,3)
plt.figure(figsize=(fig_width, 4/3*fig_width), dpi=144)
for oomName in BenchData.OOMs + BenchData.IOOOMs:
bd = BenchData(oomName)
bd.initPlot()
wS = list(wordSetting)
if wS[0] == 'lenX': wS[0] = bd.info['lenX']
if wS[1] == 'lenX': wS[1] = bd.info['lenX']
wS = tuple(wS)
if bd.dim() < 12:
dimSettings = list(range(1, bd.dim() + 1)) + [bd.dim() + 1, bd.dim() + 3, bd.dim() + 10]
else:
dimSettings = [1,2,4,8,12,16,20,23,26,28,30,31,32,33,35,42]
for dim in dimSettings:
if dim < bd.dim():
dim_style = myStyle(lw = 0.8, color = 'blue', ls = 1)
elif dim > bd.dim():
dim_style = myStyle(lw = 0.8, color = 'red', ls = 2)
else:
dim_style = myStyle(lw = 1.2, color = 'purple', ls = 0)
plt.plot(bd.trainLengths(), avres[oomName][wS][dim][algo][reg], **dim_style)
plt.plot(bd.trainLengths(), avres[oomName][wS][bd.dim()][algo][reg], **myStyle(lw = 1.2, color = 'purple', ls = 0))
if optimality_cutoff is not None:
plt.plot(bd.trainLengths(), optimality_cutoff[oomName], **myStyle(lw = 0.6, color = 'green', ls = 0))
plt.subplot(4,3,12)
extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
d, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'purple'))
low, = plt.plot([0],[0], **myStyle(lw = 0.8, color = 'blue', ls = 1))
high, = plt.plot([0],[0], **myStyle(lw = 0.8, color = 'red', ls = 2))
tolerance, = plt.plot([0],[0], **myStyle(lw = 0.6, color = 'green', ls = 0))
if algo == 'SPEC':
makeLegend([low, d, high, extra, tolerance],
['$<$ process rank', '$=$ process rank', '$>$ process rank', '', "``optimality threshold''"],
title='\\underline{Spectral learning for words}\n\\underline{selected by length $\le L$}\n\\underline{and target dimension}:')
else:
makeLegend([low, d, high, extra, tolerance],
['$<$ process rank', '$=$ process rank', '$>$ process rank', '', "``optimality threshold''"],
title='\\underline{Weighted learning for words}\n\\underline{selected by frequency $\ge O_{\min}$}\n\\underline{and target dimension}:')
finishPlot()
savePlot('dim' + algo)
def make_dim_estimation_plot(algo = 'GLS', wordSetting = (0,0,'o_min', 1024), ide=None, pqrs = ((1,1,(2,0)), (0,0))):
plt.figure(figsize=(fig_width, 4/3*fig_width), dpi=144)
if ide is None:
optDim = combineResults([optimalDimensions(algo, wordSetting, idee) for idee in range(3)])
else:
optDim = optimalDimensions(algo, wordSetting, ide)
for oomName in BenchData.OOMs + BenchData.IOOOMs:
bd = BenchData(oomName)
wS = list(wordSetting)
wS = tuple(wS)
ax = bd.initPlot()
ax.set_yscale('linear')
ax.set_yticks(np.arange(0, 43, 1))
ax.set_yticklabels([])
ax.set_ylim((0,bd.dim()+5))
dc = (bd.dim()+10) / 250
lfilled = ax.fill_between(bd.trainLengths(), np.array(optDim[oomName]['low'])-2*dc, np.array(optDim[oomName]['high'])+2*dc, color = 1 - 0.333 * (1 - np.array(colors['lightgreen'])))
lrank = ax.axhline(bd.dim(), **myStyle(lw = 0.5, color = 'brown', ls=4))
lfrob, lspec, lgap, lrel = [0,1], [0,1], [0,1], [0,1]
for pqrls, pqr in enumerate(pqrs):
style = myStyle(color = 'blue', ls = 3, lw=0.4 if pqrls else 1.2)
if ide is None:
frob = np.average([[dimestres[idee][oomName][tl][wS][pqr]['frob'][0] for tl in bd.trainLengths()] for idee in range(3)], axis = 0)
else:
frob = np.array([dimestres[ide][oomName][tl][wS][pqr]['frob'][0] for tl in bd.trainLengths()])
lfrob[pqrls], = ax.plot(bd.trainLengths(), frob, **style)
for tl in bd.trainLengths():
for idee in range(3):
s = dimestres[idee][oomName][tl][wS][pqr]['spectrum']
f = (dimestres[idee][oomName][tl][wS][pqr]['frob'][0])
e = d = f
d2 = s[d-1]**2 * s[d+1]**2 / s[d]**4
while e < min(len(s)-3, f+100) and e - d < 100:
e += 1
e2 = s[e-1]**2 * s[e+1]**2 / s[e]**4
if e2 > d2:
d = e
d2 = e2
dimestres[idee][oomName][tl][wS][pqr]['frob_elbow'] = d
style = myStyle(color = 'red', ls = 1, lw=0.5 if pqrls else 1.2)
if ide is None:
frob_elbow = np.average([[dimestres[idee][oomName][tl][wS][pqr]['frob_elbow'] for tl in bd.trainLengths()] for idee in range(3)], axis = 0)
else:
frob_elbow = np.array([dimestres[ide][oomName][tl][wS][pqr]['frob_elbow'] for tl in bd.trainLengths()])
lgap[pqrls], = ax.plot(bd.trainLengths(), frob_elbow + 2*dc, **style)
style = myStyle(color = 'purple', ls = 2, lw=0.4 if pqrls else 1.2)
if ide is None:
mid_spec = np.average([[dimestres[idee][oomName][tl][wS][pqr]['mid_spec'][0] for tl in bd.trainLengths()] for idee in range(3)], axis = 0)
else:
mid_spec = np.array([dimestres[ide][oomName][tl][wS][pqr]['mid_spec'][0] for tl in bd.trainLengths()])
lspec[pqrls], = ax.plot(bd.trainLengths(), mid_spec + dc, **style)
style = myStyle(color = 'green', ls = 0, lw=0.4 if pqrls else 1.2)
if ide is None:
relative = np.average([[dimestres[idee][oomName][tl][wS][pqr]['relative'][0] for tl in bd.trainLengths()] for idee in range(3)], axis = 0)
else:
relative = np.array([dimestres[ide][oomName][tl][wS][pqr]['relative'][0] for tl in bd.trainLengths()])
lrel[pqrls], = ax.plot(bd.trainLengths(), relative-dc, **style)
plt.subplot(4,3,12)
title = '\\underline{Dimension estimation from}\n\\underline{$\hat{F}_{Y,X}$ with words selected}\n' + ('\\underline{by frequency $\ge O_{\min}$}\n\\underline{for weighted learning}' if algo == 'GLS' else '\\underline{by length $\le L$}\n\\underline{for spectral learning}')
extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
#d, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'green'))
#low, = plt.plot([0],[0], **myStyle(lw = 0.8, color = 'blue', ls = 1))
#high, = plt.plot([0],[0], **myStyle(lw = 0.8, color = 'red', ls = 2))
makeLegend([lfilled, lrank, lfrob[0], lspec[0], lgap[0], lrel[0], (lfrob[1], lspec[1], lgap[1], lrel[1])],
['suitable dimensions', 'process rank',
'$r_{\\varepsilon_{\\rm{F}}}$', '$r_{\\varepsilon_2}$', '$r_{\\rm{gap}}$', '$r_{\\varepsilon_{\\rm{J}}}$',
'without normalization'], title=title)
finishPlot()
savePlot('dimEstimation' + algo)
make_dimensions_plot('SPEC', (0,'lenX',1,0))
make_dimensions_plot('GLS', (0,0,'o_min', 1024))
make_dim_estimation_plot('SPEC', (0, 'lenX', 1, 0))
make_dim_estimation_plot('GLS', (0, 0, 'o_min', 1024))
del res, avres, dimestres
In [ ]:
def make_variant_comparison_plot(realworld = False):
with open(research_directory + '/results/results' + ('Realworld.p' if realworld else 'Synthetic.p'), 'rb') as f:
avres = pickle.load(f)['res']
if not realworld: avres = combineResults(avres)
plt.figure(figsize=(fig_width, (1.018 if realworld else 4)/3*fig_width), dpi=144)
for oomName in BenchData.realWorldData if realworld else (BenchData.OOMs + BenchData.IOOOMs):
bd = BenchData(oomName)
bd.initPlot()
wS, d, s = (0,0,'o_min',1024), 0, 'eval'
style = myStyle(lw = 0.6, color = 'grey', ls = 0)
spec, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['SPEC'][s], **style)
style = myStyle(lw = 0.8, color = 'red', ls = 2)
rcw, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['RCW'][s], **style)
style = myStyle(lw = 0.8, color = 'green', ls = 3)
es, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['ES'][s], **style)
style = myStyle(lw = 1, color = 'blue', ls = 1)
wls, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['WLS'][s], **style)
style = myStyle(lw = 1.2, color = 'purple', ls = 0)
if oomName == 'BIBLE27':
gls, = plt.plot(bd.trainLengths()[:-2], avres[oomName][wS][d]['GLS']['eval'][:-2], **style)
style = myStyle(lw = 1.2, color = 'purple', ls = 4)
gls2, = plt.plot(bd.trainLengths()[-3:], avres[oomName][wS][d]['GLS']['eval'][-3:], **style)
else:
gls, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['GLS']['eval'], **style)
plt.subplot(1 if realworld else 4, 3, 3 if realworld else 12)
title = '\\underline{Variants of weighted spec-}\n\\underline{tral learning using words}\n\\underline{selected by frequency $\ge O_{\min}$}\n' r'\underline{and dimension $\max\{r_{\rm{F}}, r_2\}$}.'
makeLegend([spec, rcw, es, wls, (gls, gls2) if realworld else gls],
['Standard spectral', 'Row/column weighted', 'Efficiency sharpening', 'Element-wise weighted',
'Weighted spectral, with\n dimension capped at 192' if realworld else 'Weighted spectral'], title=title, row=1 if realworld else 4)
finishPlot()
savePlot('varComparison' + ('RW' if realworld else ''))
make_variant_comparison_plot(False)
make_variant_comparison_plot(True)
In [ ]:
missingProbs = [0, 1/9.5, 1/4.5, 1/2.5]
regularizations = [(2,3), (2,'3/tl**2')]
wordSettingsAndWildcards = [((0, 0, 'o_min(tl,Σₒ+1,1)', 1024, False, False, 'MissingRelevance(0,1)'), False),
((0, 0, 'o_min(tl,Σₒ+1,1)', 1024, False, False, 'MissingRelevance((Σₒ)**-1,1)'), False),
((0, 0, 'o_min(tl,Σₒ+1,1)', 1024, False, False, 'MissingRelevance((Σₒ)**-1,1)'), True)]
def make_missing_values_plot(algo='Weighted', regular=False, title=None):
with open(research_directory + '/results/resultsSynthetic.p', 'rb') as f:
res0 = pickle.load(f)['res']
avres0 = combineResults(res0)
with open(research_directory + '/results/resultsRealworld.p', 'rb') as f:
avres0.update(pickle.load(f)['res'])
with open(research_directory + '/results/resultsMissing' + ('Regular.p' if regular else 'Random.p'), 'rb') as f:
res = pickle.load(f)['res']
with open(research_directory + '/results/resultsMissing' + ('RegularRW.p' if regular else 'RandomRW.p'), 'rb') as f:
rwres = pickle.load(f)['res']
[res[i].update(rwres) for i in range(3)]
if regular == True:
with open(research_directory + '/results/resultsMissingRegularBIBLE27tweakedDim.p', 'rb') as f:
resregBIBLE27tweakeddim = pickle.load(f)['res']
for oomName in BenchData.OOMs + BenchData.realWorldData:
bd = BenchData(oomName)
for ID, mP, wSawC in itertools.product(range(3), missingProbs, wordSettingsAndWildcards):
if mP != 0:
r = [res[ID][oomName][mP][wSawC][regularization][algo]['cx'] for regularization in regularizations]
optregs = np.argmin([[r[reg][tli] for tli in range(len(r[0]))] for reg in range(2)], axis=0)
res[ID][oomName][mP][wSawC][algo] = {'eval': [res[ID][oomName][mP][wSawC][regularizations[optregs[tli]]][algo]['eval'][tli] for tli in range(len(optregs))]}
avres = combineResults(res)
fig = plt.figure(figsize=(fig_width, 2/3*fig_width / 0.937), dpi=144)
special = None
for oomName in BenchData.OOMs + BenchData.realWorldData:
bd = BenchData(oomName)
bd.initPlot(rows=2, allOOMs = True)
lines = [3*[None], 3*[None], 3*[None], 3*[None]]
for j, wS in enumerate(wordSettingsAndWildcards[0:3]):
for i, mP in enumerate(reversed(missingProbs)):
if mP == 0:
style = myStyle(color='grey', ls = 0, lw = 0.6)
nomissingres0 = avres0[oomName][(0, 0, 'o_min', 1024)][bd.dim()]['SPEC' if algo == 'SPEC' else 'GLS']['eval']
if algo == 'Weighted' and oomName == 'BIBLE27':
nomissingres0[-3:] = avres0[oomName][(0, 0, 'o_min', 1024)][0]['WLS']['eval'][-3:]
lines[i], = plt.plot(bd.trainLengths(), nomissingres0, **style)
else:
style = myStyle(color=2-i, ls = j, lw = 0.8 if j != 1 else 1.0)
try:
lines[i][j], = plt.plot(bd.trainLengths(), avres[oomName][mP][wS][algo]['eval'], **style)
except:
pass
if oomName == 'ECOLI' and mP == 1/4.5 and j == 2 and algo == 'Weighted' and regular == True:
resregECOLIbestregularization = avres[oomName][mP][wS][algo]['eval']
resregECOLIbestregularization[-3:] = avres[oomName][mP][wS][(2,'3/tl**2')][algo]['eval'][-3:]
style = myStyle(color=1, ls = 4, lw=0.8)
special, = plt.plot(bd.trainLengths()[-3:], resregECOLIbestregularization[-3:], **style)
lines[1].append(special)
if oomName == 'BIBLE27' and mP == 1/4.5 and j == 2 and algo == 'Weighted' and regular == True:
r = [resregBIBLE27tweakeddim[oomName][mP][wS][regularization][algo]['cx'] for regularization in regularizations]
optregs = np.argmin([[r[reg][tli] for tli in range(len(r[0]))] for reg in range(2)], axis=0)
resregBIBLE27tweakeddim[oomName][mP][wS][algo] = {'eval': [resregBIBLE27tweakeddim[oomName][mP][wS][regularizations[optregs[tli]]][algo]['eval'][tli] for tli in range(len(optregs))]}
style = myStyle(color=1, ls = 4, lw = 0.8)
special, = plt.plot(bd.trainLengths(), resregBIBLE27tweakeddim[oomName][mP][wS][algo]['eval'], **style)
lines[1].append(special)
if oomName == 'ECOLI':
ignored, = plt.plot([0],[0], **myStyle(ls = 0, lw = 0.8, color = 'black'))
symbol, = plt.plot([0],[0], **myStyle(ls = 1, lw = 1.0, color = 'black'))
wildcard, = plt.plot([0],[0], **myStyle(ls = 2, lw = 0.8, color = 'black'))
wildcard_special, = plt.plot([0],[0], **myStyle(ls = 4, lw = 0.8, color = 1))
if special is not None: wildcard = [wildcard, wildcard_special]
drawLegend([ignored, symbol, wildcard], [r'discarded', r'as symbol', r'as wildcard'],
loc=3, title='\\underline{Treatment of}\n\\underline{missing values:}', title_size=9)
if oomName == 'BIBLE27':
legend = drawLegend(lines, [r'40\%', r'22\%', r'11\%', r'0\%'], loc=3, title=r'\underline{$r_{\rm{mis}}$:}', title_size=9)
if special is not None:
drawLegend([special], ['tweaked'], loc=4)
plt.gca().add_artist(legend)
finishPlot()
plt.subplots_adjust(top=0.937)
fig.text(0.6, 0.987, 'DUMMY', fontsize=1)
fig.suptitle(title, fontsize=10, bbox={'facecolor':(0.9,0.9,0.9), 'edgecolor':'none', 'boxstyle':'round'})
savePlot('missing' + algo + ('_reg' if regular else '_rand'))
make_missing_values_plot('SPEC', False, 'Spectral learning with {\\bf{randomly}} missing values')
make_missing_values_plot('Weighted', False, '{\\bf{Weighted}} spectral learning with {\\bf{randomly}} missing values')
make_missing_values_plot('SPEC', True, 'Spectral learning with {\\bf{regularly}} missing values')
make_missing_values_plot('Weighted', True, '{\\bf{Weighted}} spectral learning with {\\bf{regularly}} missing values')
In [ ]: