In [1]:
research_directory = %pwd
tools_script = research_directory + '/Tools.ipynb'
%run $tools_script
display_width(90)


0. General result presentation demo


In [ ]:
def set_presentation_data():
    pdata = {}
    for oomName in BenchData.OOMs + BenchData.IOOOMs + BenchData.realWorldData:
        pdata[oomName] = {}
        pdata[oomName]['xlabels'] = ['', '', '', '$10^4$', '$10^5$', '$10^6$', '']
        if oomName == 'RANDOM4_7':
            pdata[oomName]['xlabels'] = ['', '', '$10^3$', '$10^4$', '$10^5$', '']
        elif oomName == 'RANDOM27_32':
            pdata[oomName]['xlabels'] = ['', '', '$10^5$', '$10^6$', '$10^7$', '']
    for oomName in BenchData.OOMs + BenchData.realWorldData:
        ylim = BenchData.info[oomName]['ylim']
        pdata[oomName]['ylabels'] = list(map(lambda x: '%0.1f' % x, np.arange(0, 5, 0.1)))
    for oomName in BenchData.IOOOMs:
        pdata[oomName]['ylabels'] = ['', '', '$10^{-5}$', '$10^{-4}$', '$10^{-3}$', '$10^{-2}$']
    return pdata
def make_presentation_demo():
    pdata = set_presentation_data()
    with open(research_directory + '/results/resultsWords.p', 'rb') as f:
        res = pickle.load(f)['res']
        avres = combineResults(res)
    plt.figure(figsize=(fig_width, 4/3*fig_width), dpi=144)
    for oomName in BenchData.OOMs + BenchData.IOOOMs:
        bd = BenchData(oomName)
        ax = bd.initPlot()
        style = myStyle(lw = 0.8, color = 'purple',  ls = 0)
        plt.plot(bd.trainLengths(), avres[oomName][(0, bd.info['lenX'], 1, 0)][bd.dim()]['SPEC'][None], **style)
        ax.grid(**myStyle(lw=0.3,color='brown', ls=1))
        ax.set_xticklabels(pdata[oomName]['xlabels'],y=0.085, color=colors['brown'])
        ax.set_yticklabels(pdata[oomName]['ylabels'],x = (0.17 if oomName in BenchData.IOOOMs else 0.12), color=colors['brown'])

    plt.subplot(4,3,12)
    line, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'purple', ls = 0))
    makeLegend([line],['Spectral learning with\n indicative and character-\n istic words selected by\n length $\le L$ and with\n process rank as taget\n dimension'])
    finishPlot()
    savePlot('presentationDemo')
def make_presentation_demo_rw(seed_for_spectral_norm_sampling=123456789):
    np.random.seed(seed_for_spectral_norm_sampling)
    pdata = set_presentation_data()
    plt.figure(figsize=(fig_width, 1/3*fig_width), dpi=144)
    res = {}
    for oomName in BenchData.realWorldData:
        res[oomName] = []
        bd = BenchData(oomName)
        train = bd.getSequence()
        data = tom.Data(train.sub(0))
        for tl in bd.trainLengths():
            print('.', end='', flush=True)
            data.sequence = train.sub(tl)
            lenX = 5 if oomName == 'ECOLI' else 2
            data.X = [0, lenX, 1, 0]
            data.Y = data.X
            dim = tom.learn.dimension_estimate(data)
            oom = tom.learn.model_estimate(data, dim)
            res[oomName].append(bd.evaluate(oom))
        print()
        bd = BenchData(oomName)
        ax = bd.initPlot()        
        style = myStyle(lw = 0.8, color = 'purple',  ls = 0)
        plt.plot(bd.trainLengths(), res[oomName], **style)
        ax.grid(**myStyle(lw=0.3,color='brown', ls=1))
        ax.set_xticklabels(pdata[oomName]['xlabels'], y=0.09, color=colors['brown'])
        for i, direction in enumerate(['top', 'left', 'bottom', 'right']):
            ax.spines[direction].set_linewidth(0.5)
        if oomName == 'ECOLI':
            ax.set_yticks(bd.info['ylim'])
            ax.set_yticklabels(['1.93', '2.0'], color=colors['brown'])
        else:
            ax.set_yticklabels(20* [''] + list(map(lambda x: '%0.1f' % x, np.arange(2.0, 3.5, 0.1))), x=0.12, color=colors['brown'])
    plt.subplot(1,3,3)
    line, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'purple', ls = 0))
    makeLegend([line], ['Spectral learning with\n indicative and charac-\n teristic words selected\n by length $\le L$ and\nwith estimated taget\n dimensions'], row=1)
    finishPlot()
    savePlot('presentationDemoRW')
make_presentation_demo()
make_presentation_demo_rw() # ~ 1:30 min

1. Selection of indicative and characteristic words


In [ ]:
def make_words_plot(algo):
    reg = None if algo == 'SPEC' else (2,3)
    plt.figure(figsize=(fig_width, 4/3*fig_width), dpi=144)
    for oomName in BenchData.OOMs + BenchData.IOOOMs:
        bd = BenchData(oomName)
        Σₒ, Σᵢ = bd.nO(), max(1, bd.nU())
        Σ = Σₒ * Σᵢ
        ax = bd.initPlot()
        if algo == 'GLS':
            lenX = int(np.log(1200)/np.log(Σ))
            plt.plot(bd.trainLengths(), avres[oomName][(0,lenX,1,0)][bd.dim()]['SPEC'][None], **myStyle(lw = 0.5, color='grey', ls = 0))
        else:
            plt.plot(bd.trainLengths(), avres[oomName][(0,0,'o_min',1024)][bd.dim()]['GLS'][(2,3)], **myStyle(lw = 0.5, color='grey', ls = 0))
        wordSettings = avres[oomName].keys()
        style = myStyle(lw = 1.5, color = 'purple', ls = 0)
        plt.plot(bd.trainLengths(), avres[oomName][(0,0,'o_min',1024)][bd.dim()][algo][reg], **style)
        for wS in wordSettings:
            if wS[0] == wS[1] > 0:
                style = myStyle(lw = 1.2 if wS[0] == max([s[0] for s in wordSettings]) else 0.5, color = 'green', ls = 2)
            elif wS[1] > 0:
                style = myStyle(lw = 1.2 if wS[1] == max([s[1] for s in wordSettings if s[0] == 0]) else 0.5, color = 'red', ls = 3)
            elif wS[2] == 'o_min':
                style = myStyle(lw = 0, color = 'purple', ls = 0)
            else:
                style = myStyle(lw = 1.2 if wS[3] == 1024 else 0.5, color = 'blue',  ls = 1)
            plt.plot(bd.trainLengths(), avres[oomName][wS][bd.dim()][algo][reg], **style)
        #style = myStyle(lw = 0.2, color = 'green', ls = 0)
        #plt.plot(bd.trainLengths(), avres[oomName][(0,0,'o_min',1024)][bd.dim()][algo][reg], **style)

    plt.subplot(4,3,12)
    extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
    exact, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'green', ls = 2))
    less, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'red', ls = 3))
    most, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'blue', ls = 1))
    omin, = plt.plot([0],[0], **myStyle(lw = 1.5, color = 'purple', ls = 0))
    other, = plt.plot([0],[0], **myStyle(lw = 0.5, color = 'grey', ls = 0))
    
    if algo == 'SPEC':
        makeLegend([extra, exact, less, most, omin, extra, extra, other],
                   ['\\underline{words selected by}:', 'length $L$', 'length $\le L$','$Z_{\max}$ most frequent', 'frequency $\ge O_{\min}$',
                    '','\\underline{for comparison}:','Weighted learning with\n words selected by \n frequency $\ge O_{\min}$'],
                   title='\\underline{Spectral learning with}')
    else:
        makeLegend([extra, exact, less, most, omin, extra, extra, other],
                   ['\\underline{words selected by}:', 'length $L$', 'length $\le L$','$Z_{\max}$ most frequent', 'frequency $\ge O_{\min}$',
                    '','\\underline{for comparison}:','Spectral learning with\n words selected by \n length $\le L$'],
                   title='\\underline{Weighted learning with}')

    finishPlot()
    savePlot('words' + algo)

# Load the precomputed results from file and plot
with open(research_directory + '/results/resultsWords.p', 'rb') as f:
    res = pickle.load(f)['res']
    avres = combineResults(res)
    del res
make_words_plot('SPEC')
make_words_plot('GLS')
del avres

2. Selecting the target dimension


In [ ]:
with open(research_directory + '/results/resultsDim.p', 'rb') as f:
    res = pickle.load(f)['res']
    avres = combineResults(res)
with open(research_directory + '/results/resultsDimEstimation.p', 'rb') as f:
    dimestres = pickle.load(f)['res']
def optimalityCutoff(algo = 'GLS', wordSetting = (0,0,'o_min', 1024), ide = None):
    reg = None if algo == 'SPEC' else (2,3)
    optCutoff = {}
    for oomName in BenchData.OOMs + BenchData.IOOOMs:
        bd = BenchData(oomName)
        wS = list(wordSetting)
        if wS[0] == 'lenX': wS[0] = bd.info['lenX']
        if wS[1] == 'lenX': wS[1] = bd.info['lenX']
        wS = tuple(wS) 
        if bd.dim() < 12:
            dimSettings = list(range(1, bd.dim() + 1)) + [bd.dim() + 1, bd.dim() + 3, bd.dim() + 10]
        else:
            dimSettings =  [1,2,4,8,12,16,20,23,26,28,30,31,32,33,35,42]
        if ide is None:
            optres = np.min([avres[oomName][wS][d][algo][reg] for d in dimSettings], axis=0)
        else:
            optres = np.min([res[ide][oomName][wS][d][algo][reg] for d in dimSettings], axis=0)            
        if oomName in BenchData.OOMs:
            optres += 0.25 * (optres - (bd.info['entropies'][4] if oomName in BenchData.OOMs else 0))
        else:
            optres += 0.5 * optres
        optCutoff[oomName] = optres
    return optCutoff
def optimalDimensions(algo = 'GLS', wordSetting = (0,0,'o_min', 1024), ide = None):
    reg = None if algo == 'SPEC' else (2,3)
    optCutoff = optimalityCutoff(algo, wordSetting, ide)
    optDim = {}
    for oomName in BenchData.OOMs + BenchData.IOOOMs:
        bd = BenchData(oomName)
        wS = list(wordSetting)
        if wS[0] == 'lenX': wS[0] = bd.info['lenX']
        if wS[1] == 'lenX': wS[1] = bd.info['lenX']
        wS = tuple(wS)
        if ide is None:
            interpolatedRes = {d : np.array(avres[oomName][wS][d][algo][reg]) for d in avres[oomName][wS].keys()}
        else:
            interpolatedRes = {d : np.array(res[ide][oomName][wS][d][algo][reg]) for d in avres[oomName][wS].keys()}
        if oomName in BenchData.OOMs:
            interpolatedRes[bd.dim()+2] = (interpolatedRes[bd.dim()+1] + interpolatedRes[bd.dim()+3]) / 2
            for i in range(4, 10):
                interpolatedRes[bd.dim()+i] = (10-i)/7 * interpolatedRes[bd.dim()+3] + (i-3)/7 * interpolatedRes[bd.dim()+10]
        else:
            interpolatedRes[bd.dim()+2] = (interpolatedRes[bd.dim()+1] * interpolatedRes[bd.dim()+3])**0.5
            for i in range(4, 10):
                interpolatedRes[bd.dim()+i] = interpolatedRes[bd.dim()+3]**((10-i)/7) * interpolatedRes[bd.dim()+10]**((i-3)/7)            
        optDim[oomName] = {'low': [42] * len(bd.trainLengths()), 'high': [1] * len(bd.trainLengths())}
        for tli, tl in enumerate(bd.trainLengths()):
            for d in interpolatedRes.keys():
                if interpolatedRes[d][tli] < optCutoff[oomName][tli]:
                    optDim[oomName]['low'][tli] = min(optDim[oomName]['low'][tli], d)
                    optDim[oomName]['high'][tli] = max(optDim[oomName]['high'][tli], d)
    return optDim
def make_dimensions_plot(algo, wordSetting):
    optimality_cutoff = optimalityCutoff(algo, wordSetting)
    reg = None if algo == 'SPEC' else (2,3)
    plt.figure(figsize=(fig_width, 4/3*fig_width), dpi=144)
    for oomName in BenchData.OOMs + BenchData.IOOOMs:
        bd = BenchData(oomName)
        bd.initPlot()
        wS = list(wordSetting)
        if wS[0] == 'lenX': wS[0] = bd.info['lenX']
        if wS[1] == 'lenX': wS[1] = bd.info['lenX']
        wS = tuple(wS)
        if bd.dim() < 12:
            dimSettings = list(range(1, bd.dim() + 1)) + [bd.dim() + 1, bd.dim() + 3, bd.dim() + 10]
        else:
            dimSettings =  [1,2,4,8,12,16,20,23,26,28,30,31,32,33,35,42]
        for dim in dimSettings:            
            if dim < bd.dim():
                dim_style = myStyle(lw = 0.8, color = 'blue',  ls = 1)
            elif dim > bd.dim():
                dim_style = myStyle(lw = 0.8, color = 'red', ls = 2)
            else:
                dim_style = myStyle(lw = 1.2, color = 'purple', ls = 0)
            plt.plot(bd.trainLengths(), avres[oomName][wS][dim][algo][reg], **dim_style)
        plt.plot(bd.trainLengths(), avres[oomName][wS][bd.dim()][algo][reg], **myStyle(lw = 1.2, color = 'purple', ls = 0))
        if optimality_cutoff is not None:
            plt.plot(bd.trainLengths(), optimality_cutoff[oomName], **myStyle(lw = 0.6, color = 'green', ls = 0))
    plt.subplot(4,3,12)
    extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
    d, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'purple'))
    low, = plt.plot([0],[0], **myStyle(lw = 0.8, color = 'blue', ls = 1))
    high, = plt.plot([0],[0], **myStyle(lw = 0.8, color = 'red', ls = 2))
    tolerance, = plt.plot([0],[0], **myStyle(lw = 0.6, color = 'green', ls = 0))
    
    if algo == 'SPEC':
        makeLegend([low, d, high, extra, tolerance],
                   ['$<$ process rank', '$=$ process rank', '$>$ process rank', '', "``optimality threshold''"],
                   title='\\underline{Spectral learning for words}\n\\underline{selected by length $\le L$}\n\\underline{and target dimension}:')
    else:
        makeLegend([low, d, high, extra, tolerance],
                   ['$<$ process rank', '$=$ process rank', '$>$ process rank', '', "``optimality threshold''"],
                   title='\\underline{Weighted learning for words}\n\\underline{selected by frequency $\ge O_{\min}$}\n\\underline{and target dimension}:')
    finishPlot()
    savePlot('dim' + algo)
def make_dim_estimation_plot(algo = 'GLS', wordSetting = (0,0,'o_min', 1024), ide=None, pqrs = ((1,1,(2,0)), (0,0))): 
    plt.figure(figsize=(fig_width, 4/3*fig_width), dpi=144)
    if ide is None:
        optDim = combineResults([optimalDimensions(algo, wordSetting, idee) for idee in range(3)])
    else:
        optDim = optimalDimensions(algo, wordSetting, ide)
    for oomName in BenchData.OOMs + BenchData.IOOOMs:
        bd = BenchData(oomName)
        wS = list(wordSetting)
        wS = tuple(wS)
        
        ax = bd.initPlot()
        ax.set_yscale('linear')
        ax.set_yticks(np.arange(0, 43, 1))
        ax.set_yticklabels([])
        ax.set_ylim((0,bd.dim()+5))
        dc = (bd.dim()+10) / 250
        lfilled = ax.fill_between(bd.trainLengths(), np.array(optDim[oomName]['low'])-2*dc, np.array(optDim[oomName]['high'])+2*dc, color = 1 - 0.333 * (1 - np.array(colors['lightgreen'])))
        lrank = ax.axhline(bd.dim(), **myStyle(lw = 0.5, color = 'brown', ls=4))
        lfrob, lspec, lgap, lrel = [0,1], [0,1], [0,1], [0,1]
        for pqrls, pqr in enumerate(pqrs):
            style = myStyle(color = 'blue', ls = 3, lw=0.4 if pqrls else 1.2)
            if ide is None:
                frob = np.average([[dimestres[idee][oomName][tl][wS][pqr]['frob'][0] for tl in bd.trainLengths()] for idee in range(3)], axis = 0)
            else:
                frob = np.array([dimestres[ide][oomName][tl][wS][pqr]['frob'][0] for tl in bd.trainLengths()])                
            lfrob[pqrls], = ax.plot(bd.trainLengths(), frob, **style)
            
            for tl in bd.trainLengths():
                for idee in range(3):
                    s = dimestres[idee][oomName][tl][wS][pqr]['spectrum']
                    f = (dimestres[idee][oomName][tl][wS][pqr]['frob'][0])
                    e = d = f
                    d2 = s[d-1]**2 * s[d+1]**2 / s[d]**4
                    while e < min(len(s)-3, f+100) and e - d < 100:
                        e += 1
                        e2 = s[e-1]**2 * s[e+1]**2 / s[e]**4
                        if e2 > d2:
                            d = e
                            d2 = e2
                    dimestres[idee][oomName][tl][wS][pqr]['frob_elbow'] = d
            style = myStyle(color = 'red', ls = 1, lw=0.5 if pqrls else 1.2)
            if ide is None:
                frob_elbow = np.average([[dimestres[idee][oomName][tl][wS][pqr]['frob_elbow'] for tl in bd.trainLengths()] for idee in range(3)], axis = 0)
            else:
                frob_elbow = np.array([dimestres[ide][oomName][tl][wS][pqr]['frob_elbow'] for tl in bd.trainLengths()])                
            lgap[pqrls], = ax.plot(bd.trainLengths(), frob_elbow + 2*dc, **style)

            style = myStyle(color = 'purple', ls = 2, lw=0.4 if pqrls else 1.2)
            if ide is None:
                mid_spec = np.average([[dimestres[idee][oomName][tl][wS][pqr]['mid_spec'][0] for tl in bd.trainLengths()] for idee in range(3)], axis = 0)
            else:
                mid_spec = np.array([dimestres[ide][oomName][tl][wS][pqr]['mid_spec'][0] for tl in bd.trainLengths()])                
            lspec[pqrls], = ax.plot(bd.trainLengths(), mid_spec + dc, **style)

            style = myStyle(color = 'green', ls = 0, lw=0.4 if pqrls else 1.2)
            if ide is None:
                relative = np.average([[dimestres[idee][oomName][tl][wS][pqr]['relative'][0] for tl in bd.trainLengths()] for idee in range(3)], axis = 0)
            else:
                relative = np.array([dimestres[ide][oomName][tl][wS][pqr]['relative'][0] for tl in bd.trainLengths()])                
            lrel[pqrls], = ax.plot(bd.trainLengths(), relative-dc, **style)

    plt.subplot(4,3,12)
    title = '\\underline{Dimension estimation from}\n\\underline{$\hat{F}_{Y,X}$ with words selected}\n' + ('\\underline{by frequency $\ge O_{\min}$}\n\\underline{for weighted learning}' if algo == 'GLS' else '\\underline{by length $\le L$}\n\\underline{for spectral learning}')
    extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
    #d, = plt.plot([0],[0], **myStyle(lw = 1.2, color = 'green'))
    #low, = plt.plot([0],[0], **myStyle(lw = 0.8, color = 'blue', ls = 1))
    #high, = plt.plot([0],[0], **myStyle(lw = 0.8, color = 'red', ls = 2))
    makeLegend([lfilled, lrank, lfrob[0], lspec[0], lgap[0], lrel[0], (lfrob[1], lspec[1], lgap[1], lrel[1])],
               ['suitable dimensions', 'process rank',
                '$r_{\\varepsilon_{\\rm{F}}}$', '$r_{\\varepsilon_2}$', '$r_{\\rm{gap}}$', '$r_{\\varepsilon_{\\rm{J}}}$',
                'without normalization'], title=title)
    finishPlot()
    savePlot('dimEstimation' + algo)
make_dimensions_plot('SPEC', (0,'lenX',1,0))
make_dimensions_plot('GLS', (0,0,'o_min', 1024))
make_dim_estimation_plot('SPEC', (0, 'lenX', 1, 0))
make_dim_estimation_plot('GLS', (0, 0, 'o_min', 1024))
del res, avres, dimestres

3. Comparison of variants


In [ ]:
def make_variant_comparison_plot(realworld = False):
    with open(research_directory + '/results/results' + ('Realworld.p' if realworld else 'Synthetic.p'), 'rb') as f:
        avres = pickle.load(f)['res']
        if not realworld: avres = combineResults(avres)
    plt.figure(figsize=(fig_width, (1.018 if realworld else 4)/3*fig_width), dpi=144)
    for oomName in BenchData.realWorldData if realworld else (BenchData.OOMs + BenchData.IOOOMs):
        bd = BenchData(oomName)
        bd.initPlot()
        wS, d, s = (0,0,'o_min',1024), 0, 'eval'
        
        style = myStyle(lw = 0.6, color = 'grey', ls = 0)
        spec, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['SPEC'][s], **style)
        
        style = myStyle(lw = 0.8, color = 'red', ls = 2)
        rcw, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['RCW'][s], **style)
        
        style = myStyle(lw = 0.8, color = 'green', ls = 3)
        es, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['ES'][s], **style)

        style = myStyle(lw = 1, color = 'blue', ls = 1)
        wls, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['WLS'][s], **style)

        style = myStyle(lw = 1.2, color = 'purple', ls = 0)
        if oomName == 'BIBLE27':
            gls, = plt.plot(bd.trainLengths()[:-2], avres[oomName][wS][d]['GLS']['eval'][:-2], **style)
            style = myStyle(lw = 1.2, color = 'purple', ls = 4)
            gls2, = plt.plot(bd.trainLengths()[-3:], avres[oomName][wS][d]['GLS']['eval'][-3:], **style)
        else:
            gls, = plt.plot(bd.trainLengths(), avres[oomName][wS][d]['GLS']['eval'], **style)

    plt.subplot(1 if realworld else 4, 3, 3 if realworld else 12)
    title = '\\underline{Variants of weighted spec-}\n\\underline{tral learning using words}\n\\underline{selected by frequency $\ge O_{\min}$}\n' r'\underline{and dimension $\max\{r_{\rm{F}}, r_2\}$}.'
    makeLegend([spec, rcw, es, wls, (gls, gls2) if realworld else gls],
               ['Standard spectral', 'Row/column weighted', 'Efficiency sharpening', 'Element-wise weighted',
                'Weighted spectral, with\n dimension capped at 192' if realworld else 'Weighted spectral'], title=title, row=1 if realworld else 4)
    finishPlot()
    savePlot('varComparison' + ('RW' if realworld else ''))
make_variant_comparison_plot(False)
make_variant_comparison_plot(True)

4. Missing values


In [ ]:
missingProbs = [0, 1/9.5, 1/4.5, 1/2.5]
regularizations = [(2,3), (2,'3/tl**2')]
wordSettingsAndWildcards = [((0, 0, 'o_min(tl,Σₒ+1,1)', 1024, False, False, 'MissingRelevance(0,1)'), False),
                            ((0, 0, 'o_min(tl,Σₒ+1,1)', 1024, False, False, 'MissingRelevance((Σₒ)**-1,1)'), False),
                            ((0, 0, 'o_min(tl,Σₒ+1,1)', 1024, False, False, 'MissingRelevance((Σₒ)**-1,1)'), True)]
def make_missing_values_plot(algo='Weighted', regular=False, title=None):
    with open(research_directory + '/results/resultsSynthetic.p', 'rb') as f:
        res0 = pickle.load(f)['res']
    avres0 = combineResults(res0)
    with open(research_directory + '/results/resultsRealworld.p', 'rb') as f:
        avres0.update(pickle.load(f)['res'])        
    with open(research_directory + '/results/resultsMissing' + ('Regular.p' if regular else 'Random.p'), 'rb') as f:
        res = pickle.load(f)['res']
    with open(research_directory + '/results/resultsMissing' + ('RegularRW.p' if regular else 'RandomRW.p'), 'rb') as f:
        rwres = pickle.load(f)['res']
        [res[i].update(rwres) for i in range(3)]
    if regular == True:
        with open(research_directory + '/results/resultsMissingRegularBIBLE27tweakedDim.p', 'rb') as f:
            resregBIBLE27tweakeddim = pickle.load(f)['res']

    for oomName in BenchData.OOMs + BenchData.realWorldData:
        bd = BenchData(oomName)
        for ID, mP, wSawC in itertools.product(range(3), missingProbs, wordSettingsAndWildcards):
            if mP != 0:
                r = [res[ID][oomName][mP][wSawC][regularization][algo]['cx'] for regularization in regularizations]
                optregs = np.argmin([[r[reg][tli] for tli in range(len(r[0]))] for reg in range(2)], axis=0)
                res[ID][oomName][mP][wSawC][algo] = {'eval': [res[ID][oomName][mP][wSawC][regularizations[optregs[tli]]][algo]['eval'][tli] for tli in range(len(optregs))]} 
    avres = combineResults(res)
    fig = plt.figure(figsize=(fig_width, 2/3*fig_width / 0.937), dpi=144)
    special = None
    for oomName in BenchData.OOMs + BenchData.realWorldData:
        bd = BenchData(oomName)
        bd.initPlot(rows=2, allOOMs = True)
        lines = [3*[None], 3*[None], 3*[None], 3*[None]]
        for j, wS in enumerate(wordSettingsAndWildcards[0:3]):
            for i, mP in enumerate(reversed(missingProbs)):
                if mP == 0:
                    style = myStyle(color='grey', ls = 0, lw = 0.6)
                    nomissingres0 = avres0[oomName][(0, 0, 'o_min', 1024)][bd.dim()]['SPEC' if algo == 'SPEC' else 'GLS']['eval']
                    if algo == 'Weighted' and oomName == 'BIBLE27':
                        nomissingres0[-3:] = avres0[oomName][(0, 0, 'o_min', 1024)][0]['WLS']['eval'][-3:]
                    lines[i], = plt.plot(bd.trainLengths(), nomissingres0, **style)
                else:
                    style = myStyle(color=2-i, ls = j, lw = 0.8 if j != 1 else 1.0)
                    try:
                        lines[i][j], = plt.plot(bd.trainLengths(), avres[oomName][mP][wS][algo]['eval'], **style)
                    except:
                        pass
                if oomName == 'ECOLI' and mP == 1/4.5 and j == 2 and algo == 'Weighted' and regular == True:
                    resregECOLIbestregularization = avres[oomName][mP][wS][algo]['eval']
                    resregECOLIbestregularization[-3:] = avres[oomName][mP][wS][(2,'3/tl**2')][algo]['eval'][-3:]
                    style = myStyle(color=1, ls = 4, lw=0.8)
                    special, = plt.plot(bd.trainLengths()[-3:], resregECOLIbestregularization[-3:], **style)
                    lines[1].append(special)                    
                if oomName == 'BIBLE27' and mP == 1/4.5 and j == 2 and algo == 'Weighted' and regular == True:
                    r = [resregBIBLE27tweakeddim[oomName][mP][wS][regularization][algo]['cx'] for regularization in regularizations]
                    optregs = np.argmin([[r[reg][tli] for tli in range(len(r[0]))] for reg in range(2)], axis=0)
                    resregBIBLE27tweakeddim[oomName][mP][wS][algo] = {'eval': [resregBIBLE27tweakeddim[oomName][mP][wS][regularizations[optregs[tli]]][algo]['eval'][tli] for tli in range(len(optregs))]} 
                    style = myStyle(color=1, ls = 4, lw = 0.8)
                    special, = plt.plot(bd.trainLengths(), resregBIBLE27tweakeddim[oomName][mP][wS][algo]['eval'], **style)
                    lines[1].append(special)
        if oomName == 'ECOLI':
            ignored, = plt.plot([0],[0], **myStyle(ls = 0, lw = 0.8, color = 'black'))
            symbol, = plt.plot([0],[0], **myStyle(ls = 1, lw = 1.0, color = 'black'))
            wildcard, = plt.plot([0],[0], **myStyle(ls = 2, lw = 0.8, color = 'black'))
            wildcard_special, = plt.plot([0],[0], **myStyle(ls = 4, lw = 0.8, color = 1))
            if special is not None: wildcard = [wildcard, wildcard_special]
            drawLegend([ignored, symbol, wildcard], [r'discarded', r'as symbol', r'as wildcard'],
                       loc=3, title='\\underline{Treatment of}\n\\underline{missing values:}', title_size=9)
        if oomName == 'BIBLE27':
            legend = drawLegend(lines, [r'40\%', r'22\%', r'11\%', r'0\%'], loc=3, title=r'\underline{$r_{\rm{mis}}$:}', title_size=9)
            if special is not None:
                drawLegend([special], ['tweaked'], loc=4)
                plt.gca().add_artist(legend)
    finishPlot()
    plt.subplots_adjust(top=0.937)
    fig.text(0.6, 0.987, 'DUMMY', fontsize=1)
    fig.suptitle(title, fontsize=10, bbox={'facecolor':(0.9,0.9,0.9), 'edgecolor':'none', 'boxstyle':'round'})
    savePlot('missing' + algo + ('_reg' if regular else '_rand'))

make_missing_values_plot('SPEC', False, 'Spectral learning with {\\bf{randomly}} missing values')
make_missing_values_plot('Weighted', False, '{\\bf{Weighted}} spectral learning with {\\bf{randomly}} missing values')
make_missing_values_plot('SPEC', True, 'Spectral learning with {\\bf{regularly}} missing values')
make_missing_values_plot('Weighted', True, '{\\bf{Weighted}} spectral learning with {\\bf{regularly}} missing values')

In [ ]: