In [1]:
from IPython import parallel
clients = parallel.Client(profile='parallel')

In [2]:
print clients.ids
print "Total %i cores"%(len(clients.ids))


[0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15]
Total 12 cores

In [3]:
%%px --local

import sys
sys.path.append("\\\\DAP-NAS\\work\\CNOP")
import cPickle as pickle
import numpy as np
import pandas as pd
from statsmodels.tsa.arima_process import arma_generate_sample
import matplotlib.pyplot as plt
import numpy as np
from CNOP import CNOP
import winsound
def threshold(x,thresholds=[],values=[-1,0,1]):
    for threshold,val in zip(thresholds,values):
        if x < threshold: 
            return val
    return values[-1]
import time
from itertools import repeat
import os
from datetime import datetime
import numpy as np
import numpy.linalg as linalg

In [4]:
%%px --local
x2000 = pd.read_csv("\\\\DAP-NAS\\work\\CNOP\\x2000.csv", delimiter=";")

In [5]:
%%px --local

def recalcSE(path, fun, chunksize=1, **kwargs):
    print "Starting SE on %i cores, \"%s\""\
               %(len(clients.ids),  fun.__name__)
    print "NO BACKUP"
    view = clients.load_balanced_view()

    def doJob(i):
        fun, distortion, res, kwargs=i
        try:
            local_res = fun(distortion=distortion, res=res,**kwargs)
            #pickle.dump(local_res, open( filename, "wb" ) )
            return local_res, ""
        except Exception, e:
            #pickle.dump(e, open( filename, "wb" ) )
            return Exception, e, ""
    

    dump = pickle.load(file(path))
    print "Results loaded"
    try:
        job = [(fun, i[0][1], i[0][0], kwargs) for i in dump if type(i[0])!=type]
        print "Job type 1, %i" % len(job)
    except KeyError:
        job = [(fun, i[1], i[0], kwargs) for i in dump if type(i)==tuple]
        print "Job type 2, %i" % len(job)

    ar = view.map_async(doJob, job, chunksize=chunksize)
    yield ar

    ar.wait_interactive()
    results = ar.get()
    print "DONE!"
    yield results

In [6]:
%%px --local

def full_overlapSE(distortion, res, df, give_distortion=True):
    ########################################################
    ##### This code is for full-overlap case of CNOP########
    ########################################################

    #PreSampling:
    N=distortion.shape[1]
    df = df[:N]
    #df = x2000[:N]
    
    ### DGP generation
    regime = pd.DataFrame() 
    beta,   alpha   = [0.6, 0.4, 0.8], [0.85, 1.55]
    gammam, mum =     [0.4, 0.3, 0.9], [-1.2, 0.07]
    gammap, mup =     [0.2, 0.8, 0.3], [1.28, 2.5]
    ####distortion = np.random.randn(3,N)
    regime["xbeta"]    = df.dot(beta) + distortion[0]
    regime['z-gammam'] = df.dot(gammam)  + distortion[1]
    regime['z+gammap'] = df.dot(gammap)  + distortion[2]
    regime['regime'] = regime['xbeta'].apply(lambda x: threshold(x,thresholds=alpha))
    regime['Y-']=regime['z-gammam'].apply(lambda x: threshold(x, thresholds=mum,values=[-2,-1,0]))
    regime['Y+']=regime['z+gammap'].apply(lambda x: threshold(x, thresholds=mup,values=[0,1,2]))
    df['Y'] = 0
    df['Y'] += np.where(regime['regime']==-1,regime['Y-'],0)
    df['Y'] += np.where(regime['regime']==1,regime['Y+'],0)
    ###df is full data matrix

    #Model starts here:
    exog = df[["X1", "X2", "X3"]]
    endog = df[["Y"]]
    l = {0:df}
    pan = pd.Panel(l)
    y      = pan.ix[:,:,['Y']]
    x      = pan.ix[:,:,["X1", "X2", "X3"]]
    zminus = pan.ix[:,:,["X1", "X2", "X3"]]
    zplus  = pan.ix[:,:,["X1", "X2", "X3"]]
    exog = {'x':x,'zplus':zplus,'zminus':zminus}
    CNOP4 = CNOP(y,exog, model='CNOP',interest_step=1)

    # Standard Errors as well
    try:
        res["se"] = CNOP4.se(res.x)
        res['status'] = "OK"
    except Exception, e:
        print e
        res['status'] = e

    if give_distortion:
        return res, distortion
    else:
        return res

In [14]:
%%px --local

def partial_overlapSE(distortion, res, df, give_distortion=True):
    ########################################################
    ##### This code is for full-overlap case of CNOP########
    ########################################################

    #PreSampling:
    N=distortion.shape[1]
    df = df[:N]
    #df = x2000[:N]
    
    ### DGP generation
    regime = pd.DataFrame() 
    beta,   alpha   = [0.6, 0.4], [0.9, 1.5]
    gammam, mum =     [0.3, 0.9], [-0.67, 0.36]
    gammap, mup =     [0.2, 0.3], [0.02, 1.28]
    ####distortion = np.random.randn(3,N)
    regime["xbeta"]    = df[["X1", "X2"]].dot(beta)    + distortion[0]
    regime['z-gammam'] = df[["X1", "X3"]].dot(gammam)  + distortion[1]
    regime['z+gammap'] = df[["X2", "X3"]].dot(gammap)  + distortion[2]
    regime['regime'] = regime['xbeta'].apply(lambda x: threshold(x,thresholds=alpha))
    regime['Y-']=regime['z-gammam'].apply(lambda x: threshold(x, thresholds=mum,values=[-2,-1,0]))
    regime['Y+']=regime['z+gammap'].apply(lambda x: threshold(x, thresholds=mup,values=[0,1,2]))
    df['Y'] = 0
    df['Y'] += np.where(regime['regime']==-1,regime['Y-'],0)
    df['Y'] += np.where(regime['regime']==1,regime['Y+'],0)
    ###df is full data matrix

    #Model starts here:
    exog = df[["X1", "X2", "X3"]]
    endog = df[["Y"]]
    l = {0:df}
    pan = pd.Panel(l)
    y      = pan.ix[:,:,['Y']]
    x      = pan.ix[:,:,["X1", "X2"]]
    zminus = pan.ix[:,:,["X1", "X3"]]
    zplus  = pan.ix[:,:,["X2", "X3"]]
    exog = {'x':x,'zplus':zplus,'zminus':zminus}
    CNOP4 = CNOP(y,exog, model='CNOP',interest_step=1)

    # Standard Errors as well
    try:
        res["se"] = CNOP4.se(res.x)
        res['status'] = "OK"
    except Exception, e:
        print e
        res['status'] = e

    if give_distortion:
        return res, distortion
    else:
        return res

In [59]:
%%px --local

def no_overlapSE(distortion, res, df, give_distortion=True):
    ########################################################
    ##### This code is for full-overlap case of CNOP########
    ########################################################

    #PreSampling:
    N=distortion.shape[1]
    df = df[:N]
    #df = x2000[:N]
    
    ### DGP generation
    regime = pd.DataFrame() 
    beta,   alpha   = [0.6], [0.95, 1.45]
    gammam, mum =     [0.9], [-1.22, 0.03]
    gammap, mup =     [0.8], [-0.03, 1.18]
    #####distortion = np.random.randn(3,N)
    regime["xbeta"]    = df[["X1"]].dot(beta)    + distortion[0]
    regime['z-gammam'] = df[["X2"]].dot(gammam)  + distortion[1]
    regime['z+gammap'] = df[["X3"]].dot(gammap)  + distortion[2]
    regime['regime'] = regime['xbeta'].apply(lambda x: threshold(x,thresholds=alpha))
    regime['Y-']=regime['z-gammam'].apply(lambda x: threshold(x, thresholds=mum,values=[-2,-1,0]))
    regime['Y+']=regime['z+gammap'].apply(lambda x: threshold(x, thresholds=mup,values=[0,1,2]))
    df['Y'] = 0
    df['Y'] += np.where(regime['regime']==-1,regime['Y-'],0)
    df['Y'] += np.where(regime['regime']==1,regime['Y+'],0)
    ###df is full data matrix

    #Model starts here:
    exog = df[["X1", "X2", "X3"]]
    endog = df[["Y"]]
    l = {0:df}
    pan = pd.Panel(l)
    y      = pan.ix[:,:,['Y']]
    x      = pan.ix[:,:,["X1"]]
    zminus = pan.ix[:,:,["X2"]]
    zplus  = pan.ix[:,:,["X3"]]
    exog = {'x':x,'zplus':zplus,'zminus':zminus}
    CNOP4 = CNOP(y,exog, model='CNOP',interest_step=1)

    # Standard Errors as well
    try:
        res["se"] = CNOP4.se(res.x)
        res['status'] = "OK"
    except Exception, e:
        print e
        res['status'] = e

    if give_distortion:
        return res, distortion
    else:
        return res

Results processing part

This code section provides processing function


In [21]:
def process_dump(obj, res_real, cutpoints = (.2, .8)):
    if type(obj) is str:
        mc_res = pickle.load(file(obj))
    else:
        mc_res = obj
    xs = np.array([item[0][0].x for item in mc_res if len(item)==2 and item[0][0].success
                   and linalg.norm(item[0][0].x, ord=np.inf) < 100 
                   ])
    ses = np.array([item[0][0].se for item in mc_res 
                    if len(item)==2 and item[0][0].success
                    and linalg.norm(item[0][0].x, ord=np.inf) < 100 
                    and "se" in item[0][0].keys() #and not np.isnan(item[0][0].se).any() \
                    #and linalg.norm(item[0][0].se, ord=np.inf) < 100
                    ])
    ses = np.nan_to_num(ses)
    
    xs = pd.DataFrame(xs)
    ses = pd.DataFrame(ses)

    
    rmse =  ((res_real - xs) ** 2).mean().mean()
    bias = (res_real - xs).mean().mean()
    #a_ratio = (ses.mean()/xs.std()).mean()
    #m_ratio = (ses.median()/xs.std()).mean()
    a_ratio = (ses[(ses<ses.quantile(cutpoints[1]))&(ses>ses.quantile(cutpoints[0]))].mean()  \
                  / xs[(xs<xs.quantile(cutpoints[1]))&(xs>xs.quantile(cutpoints[0]))].std()).mean()
    m_ratio = (ses[(ses<ses.quantile(cutpoints[1]))&(ses>ses.quantile(cutpoints[0]))].median()  \
                  / xs[(xs<xs.quantile(cutpoints[1]))&(xs>xs.quantile(cutpoints[0]))].std()).mean()

    if type(obj) is str:
        print "FILE: %s"%(obj.split("\\")[-1])
    print "BIAS: %2.3f"%(bias)
    print "RMSE: %2.3f"%(rmse)
    print "A-ratio: %2.3f"%(a_ratio)
    print "M-ratio: %2.3f"%(m_ratio)
    print
    print "XS len: %s" % len(xs)
    print "SE len: %s" % len(ses)
    #print "SE mean: %s "% ses.mean()
    #print "XS variance: %s "% xs.std()
    
    return xs, ses

In [20]:
beta,   alpha   = [0.6, 0.4, 0.8], [0.85, 1.55]
gammam, mum =     [0.4, 0.3, 0.9], [-1.2, 0.07]
gammap, mup =     [0.2, 0.8, 0.3], [1.28, 2.5]
res_real_full = beta+alpha+gammam+mum+gammap+mup
beta,   alpha   = [0.6, 0.4], [0.9, 1.5]
gammam, mum =     [0.3, 0.9], [-0.67, 0.36]
gammap, mup =     [0.2, 0.3], [0.02, 1.28]
res_real_partial = beta+alpha+gammam+mum+gammap+mup
beta,   alpha   = [0.6], [0.95, 1.45]
gammam, mum =     [0.9], [-1.22, 0.03]
gammap, mup =     [0.8], [-0.03, 1.18]
res_real_no = beta+alpha+gammam+mum+gammap+mup

Workspace

Sandbox

Very old processing code


In [33]:
dump = pickle.load(file("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res250_CHECKED"))

In [14]:
distortion=dump[1234][0][1]
#N=distortion.shape[1]
res = dump[1234][0][0]


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-14-b8dcd35a6406> in <module>()
----> 1 distortion=dump[1234][0][1]
      2 #N=distortion.shape[1]
      3 res = dump[1234][0][0]

KeyError: 1

In [34]:
ii = [(i[1], i[0]) for i in dump if type(i)==tuple]

In [35]:
len(ii)


Out[35]:
9994

In [96]:
for dist, r in zip(distortion, res):
    N=distortion.shape[1]

In [71]:
full_overlapSE(distortion=distortion, res=res, df=x2000)


Out[71]:
(    status: 'OK'
   success: True
 exec_time: {'se': 3.13100004196167, 'fit': 7.759999990463257}
      njev: 24
      nfev: 43
        se: array([ 0.34933433,  0.34058812,  0.53842331,  0.93583973,  1.15047938,
        0.32926633,  0.48263084,  0.49835624,  0.91299937,  1.30484267,
        0.33148323,  0.40386578,  0.48224324,  0.91302005,  1.0074087 ])
       fun: 80.819595455899488
         x: array([-0.22118944,  0.64690735,  1.0784131 , -0.58726372,  0.17302768,
        0.74180109,  0.08043358,  1.29506658, -0.97176219,  0.42412097,
        0.86867512,  1.10969237, -0.67776276,  2.07955817,  3.63352395])
   message: 'Optimization terminated successfully.'
       jac: array([  1.10433772e-03,   1.13770950e-03,   3.61218930e-04,
        -5.14491603e-05,  -4.13226304e-04,  -9.73330699e-04,
         1.78551967e-03,   5.37226506e-04,   6.52690583e-05,
         3.15928873e-04,  -8.39702240e-05,   5.86046959e-04,
         6.18651038e-04,  -6.37626673e-04,   3.73471423e-04,
         0.00000000e+00])
       nit: 24,
 array([[-0.65849066,  0.12758959,  0.30828359, -0.72300929, -0.52984377,
          0.12444477,  1.0364022 , -0.88821299, -0.46250863, -0.23207546,
         -1.55012724, -0.48187254,  1.97521992,  1.35728135,  0.1737257 ,
          0.64012175,  2.23702648,  0.72749142,  0.76550129,  0.35432155,
          0.1523343 ,  1.10229344,  0.11748324, -0.18016624, -0.6125319 ,
          0.92742445, -0.28105381,  0.51810738, -0.24117523,  2.14084354,
         -1.44472144, -0.40563316, -0.91085077,  2.38321777,  1.21480444,
         -0.58983945, -0.17816859, -1.22860213, -1.70197894, -0.73543691,
         -0.13782063, -0.02459721,  1.17328641, -0.28251922, -0.19401287,
         -0.41770357, -0.20369682, -0.18265644,  0.91053538,  0.31068703,
          0.3021014 , -0.55796318, -0.46839616,  0.41852571,  0.08869728,
         -0.17379604,  0.27773785, -0.6341638 ,  0.2193762 , -1.99716192,
          0.72754637,  1.04824111, -0.46469864,  0.31664684,  1.77659081,
          0.501851  ,  0.01010618,  0.11548354,  1.31352822, -1.10330377,
          2.04214216,  0.5670347 ,  0.20750388, -1.39193479,  1.09854305,
          0.20991727,  0.6148039 , -0.23219888, -1.63143816, -0.3113239 ,
          0.06370696,  0.05583874, -0.84154001,  2.28391684, -1.38382763,
          0.69328705,  0.63985189, -0.52325082,  0.03394195,  1.66932115,
          1.02726235, -1.10240467,  0.37947473,  1.94462121,  1.23407402,
          2.05201942,  1.18745554, -1.13622877,  0.50604613,  0.47993305,
         -0.11968046, -1.37657067, -0.22178515, -1.04906806, -0.45548828,
         -0.84850566, -0.86726097,  1.33507232, -2.26798557, -1.14556168,
         -1.05415193, -0.17334611, -2.57434859,  0.29055665, -0.59005641,
          0.93932394,  0.14639254,  1.71896725, -0.41747126, -0.18645907,
          0.37416133,  0.17025394, -0.04944148, -0.90371153,  0.69655254],
        [ 0.15539417, -1.62033092,  1.83273409,  0.70142021,  0.50569542,
         -0.69740753, -0.99207173,  0.05741582, -0.36651329, -0.77563492,
         -1.06056146,  0.59775908,  0.467296  ,  0.80204381, -0.92815817,
         -0.21126419, -0.81188892,  0.05078132,  0.2263266 ,  0.55898447,
         -0.54512208,  0.0718628 ,  1.09140112, -0.23984544, -0.96441731,
         -1.10817258,  1.31458691, -0.95658339,  0.77771163, -0.48086161,
         -0.68610134, -0.99365589, -0.7342906 ,  0.09671603,  0.81012795,
         -0.07336175, -0.27410992,  0.49286272,  0.89857111,  0.74268977,
          0.18021026,  0.85102094,  0.2009101 , -0.49143605, -1.20451873,
          1.12788126, -0.27125076,  0.83760736, -0.29542327,  0.46685236,
          1.34313833, -0.01315194, -0.1059768 , -0.53263517, -1.14402407,
          0.52893496, -1.76233971, -0.13808209,  0.75991946, -1.19188042,
          1.0168731 ,  0.08753221,  0.09104594, -0.59182929, -0.32340927,
         -0.83645602,  1.48839117, -0.7497384 , -1.48285311, -0.663332  ,
         -0.82678953,  0.05471952, -0.77192231, -0.38057591, -0.72849574,
          1.94152203, -1.55423222,  1.03758962, -0.91248709, -0.86052947,
          0.09396988,  0.34470884,  0.27811319, -0.84341698, -1.2629853 ,
          0.8358606 , -1.31135226,  1.81650139,  0.23046971,  1.35228671,
          0.56109525, -1.09368332, -0.52127659,  0.69066964, -0.78511718,
         -2.30768187,  0.78987828, -1.81274608,  1.62915463, -1.11144978,
         -0.97142666,  0.36276284,  3.61944534, -0.32155505, -0.17251979,
          1.48071682, -0.05186607, -0.93439808,  0.23019345,  0.49049959,
         -0.4948306 , -0.93685514,  2.04102131,  0.16924849, -1.95989933,
          0.23971384,  1.92274862, -1.3356832 , -0.1792877 ,  0.07917409,
         -1.0898919 ,  1.52765568, -0.27147008,  0.86472629,  1.88663487],
        [ 1.44715764, -1.29437345, -1.70804354,  0.41575311, -0.57227623,
         -0.94440783,  0.81211733, -0.66171501, -1.47383389,  0.5618468 ,
         -2.991046  ,  0.09855748,  0.11685723,  1.2433949 ,  0.43690795,
          2.19380329, -0.92722301, -1.40110508, -2.55701316, -0.86975502,
         -0.74821661, -0.04272166, -0.88968691, -0.74139261,  0.88182524,
         -0.65294366,  0.07929029, -0.52863992, -0.4366701 ,  0.86834561,
          0.07131008, -0.10182941,  0.60489969,  1.99660192,  1.42904904,
          1.61147338, -0.09305679, -1.60127204, -0.21434332,  0.19171124,
         -0.39263112,  0.08225134, -1.41706237,  0.36145739,  1.38756936,
          1.26083718, -0.61374459, -0.84552621,  1.45755129,  0.86011847,
          0.29554469, -1.39663235,  0.47889041, -1.32128609,  0.78316166,
          0.39299545, -1.27208587, -0.87219889, -0.96406386,  0.17194935,
          0.35417004,  0.04383654,  0.23345796,  0.62921235, -1.14920078,
         -0.78706668,  1.2450387 ,  0.44661505,  0.67591455,  0.1674787 ,
          0.5052915 ,  0.93267424, -0.87904208, -0.14699029,  0.53851577,
          0.16776776,  0.59047815, -0.48349618,  0.3130363 , -0.23517739,
          1.98698923, -0.28746599, -1.81373073,  1.25721544,  1.05454123,
         -0.22879573, -1.06500897,  1.17647207,  0.10258579, -0.2905512 ,
          0.62484526,  0.92270418,  0.10570822, -0.51583754,  0.47856994,
         -0.28476543,  0.72794206,  2.2490371 , -0.01077531,  1.72596475,
         -1.35132172,  1.13856469,  0.83635239,  0.25102303, -0.07373776,
         -0.12782123, -1.91769006, -0.78952118, -0.64331411,  0.0408862 ,
          0.50305059, -1.43139072, -0.66349588, -1.25463735, -0.4856183 ,
         -1.93509352,  0.5549096 , -0.16346897, -1.4192809 , -0.0128375 ,
         -2.20290262,  0.40109792, -0.46375213, -0.72490064,  2.18270735]]))

Full overlap


In [299]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res150_CHECKED", full_overlapSE, df=x2000, chunksize=50)

In [300]:
item = next(genr)


Starting SE on 9 cores, "full_overlapSE"
NO BACKUP
Results loaded
Job type 1, 9816

In [301]:
res = next(genr)


 197/197 tasks finished after   62 s
done
DONE!

In [302]:
len(res)


Out[302]:
9816

In [23]:
process_dump(res, res_real_full)
#OLD before NaN -> 0


BIAS: -0.280
RMSE: 10.400
A-ratio: 35935.1136818
M-ratio: 0.175399313066

XS len: 8514
SE len: 1408
SE mean: [  2.25128781e-01   2.60298419e-01   1.94862640e+00   6.45686806e-01
   7.80111506e-01   3.90081990e-01   3.69275157e-01   8.23210521e+05
   8.86223835e+05   6.04598895e+02   2.60580159e-01   4.14352040e-01
   3.58389310e+04   2.78571299e+05   3.88763383e+04] 
XS variance: [ 1.1554774   1.10966607  2.53651001  2.08770918  2.25000791  1.54851286
  1.61313639  3.41722933  3.93851742  4.94660503  1.40904432  2.392834
  2.85243582  5.24450798  5.32106036] 

In [305]:
process_dump(res, res_real_full, cutpoints=(0.1, 0.9));


BIAS: -0.280
RMSE: 10.400
A-ratio: 0.693
M-ratio: 0.634

XS len: 8514
SE len: 8091
SE mean: 0     1.883167e-01
1     2.377810e-01
2     2.186035e+03
3     5.712088e-01
4     6.095025e-01
5     3.500693e-01
6     3.437068e-01
7     1.562116e+06
8     1.534973e+06
9     6.702975e+04
10    2.132596e-01
11    3.718374e-01
12    7.778845e+04
13    6.091778e+10
14    1.757047e+04
dtype: float64 
XS variance: 0     1.155545
1     1.109731
2     2.536659
3     2.087832
4     2.250140
5     1.548604
6     1.613231
7     3.417430
8     3.938749
9     4.946896
10    1.409127
11    2.392975
12    2.852603
13    5.244816
14    5.321373
dtype: float64 

In [319]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res250_CHECKED", full_overlapSE, df=x2000, chunksize=50)
item250 = next(genr)


Starting SE on 12 cores, "full_overlapSE"
NO BACKUP
Results loaded
Job type 2, 9994

In [320]:
res250 = next(genr)


 200/200 tasks finished after   17 s
done
DONE!

In [321]:
process_dump(res250, res_real_full,  cutpoints=(0.1,0.9));


BIAS: -0.105
RMSE: 2.185
A-ratio: 1.236
M-ratio: 1.001

XS len: 9914
SE len: 9908
SE mean: 0     2.542456e-01
1     2.513788e-01
2     5.337107e-01
3     9.098797e-01
4     9.965594e-01
5     2.639343e-01
6     2.527580e-01
7     3.909120e+01
8     3.915632e+01
9     3.071424e+17
10    3.378472e-01
11    3.784930e-01
12    1.380877e+04
13    1.778343e+11
14    1.380928e+04
dtype: float64 
XS variance: 0     0.498076
1     0.485034
2     0.687086
3     1.286306
4     1.230183
5     0.423262
6     0.333848
7     1.204895
8     1.180613
9     2.510756
10    0.552642
11    0.750639
12    1.128163
13    2.761042
14    2.274535
dtype: float64 

In [ ]:
pickle.dump(res250, file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res250_CHECKED", "w"))

In [311]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res500_CHECKED", full_overlapSE, df=x2000, chunksize=50)
item500 = next(genr)


Starting SE on 12 cores, "full_overlapSE"
NO BACKUP
Results loaded
Job type 2, 10000

In [312]:
res500 = next(genr)


 200/200 tasks finished after   26 s
done
DONE!

In [280]:
res500 = pickle.load(file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res500_CHECKED"))

In [313]:
process_dump(res500, res_real_full, cutpoints=(0.1,0.9));


BIAS: -0.046
RMSE: 0.934
A-ratio: 1.288
M-ratio: 1.029

XS len: 10000
SE len: 10000
SE mean: 0       0.174601
1       0.179646
2       0.258586
3       0.713882
4       0.788527
5       0.179658
6       0.175293
7       0.500888
8       0.537127
9     149.241483
10      0.188565
11      0.189466
12      0.279286
13     15.891899
14      0.683444
dtype: float64 
XS variance: 0     0.278079
1     0.283014
2     0.402717
3     0.925671
4     0.838328
5     0.239001
6     0.201319
7     0.401422
8     0.422587
9     2.039739
10    0.236728
11    0.249174
12    0.325520
13    2.038736
14    0.794329
dtype: float64 

In [322]:
del res500

In [ ]:
pickle.dump(res500, file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res500_CHECKED", "w"))

In [325]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res1000_CHECKED", full_overlapSE, df=x2000, chunksize=50)
item1000 = next(genr)


Starting SE on 12 cores, "full_overlapSE"
NO BACKUP
Results loaded
Job type 2, 9999

In [326]:
res1000 = next(genr)


 200/200 tasks finished after   67 s
done
DONE!

In [288]:
pickle.dump(res1000, file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res1000_CHECKED", "w"))

In [327]:
process_dump(res1000, res_real_full, cutpoints=(0.1,0.9));


BIAS: -0.026
RMSE: 0.676
A-ratio: 1.142
M-ratio: 1.037

XS len: 9999
SE len: 9999
SE mean: 0      0.139030
1      0.148434
2      0.209963
3      0.705560
4      0.589676
5      0.129733
6      0.131604
7      0.199479
8      0.225851
9     18.677218
10     0.136535
11     0.129167
12     0.200962
13    12.936865
14     0.515106
dtype: float64 
XS variance: 0     0.195681
1     0.218619
2     0.306642
3     0.847499
4     0.646946
5     0.160897
6     0.140437
7     0.273343
8     0.301693
9     1.844760
10    0.155619
11    0.170510
12    0.215023
13    1.787101
14    0.577913
dtype: float64 

In [328]:
del res1000

In [329]:
del item1000

Partial overlap


In [56]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res150_CHECKED", partial_overlapSE, df=x2000, chunksize=50)
item125 = next(genr)


Starting SE on 12 cores, "partial_overlapSE"
NO BACKUP
Results loaded
Job type 1, 9993

In [57]:
res125 = next(genr)


 200/200 tasks finished after   17 s
done
DONE!

In [58]:
process_dump(res125, res_real_partial, cutpoints=(0.001,0.9));


BIAS: 0.011
RMSE: 0.978
A-ratio: 0.840
M-ratio: 0.784

XS len: 9972
SE len: 9946

In [25]:
pickle.dump(res125, file("W:\\CNOP\\dumps\\SE 06.04\\2Partial\\res150_CHECKED", "w"))
del genr, res125,  item125

In [26]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res250_CHECKED", partial_overlapSE, df=x2000, chunksize=50)
item250 = next(genr)


Starting SE on 12 cores, "partial_overlapSE"
NO BACKUP
Results loaded
Job type 1, 10000

In [27]:
res250 = next(genr)


 200/200 tasks finished after   18 s
done
DONE!

In [45]:
process_dump(res250, res_real_partial, cutpoints=(0.001,0.9));


BIAS: 0.049
RMSE: 0.535
A-ratio: 1.235
M-ratio: 0.918

XS len: 10000
SE len: 10000

In [46]:
pickle.dump(res250, file("W:\\CNOP\\dumps\\SE 06.04\\2Partial\\res250_CHECKED", "w"))
del genr, res250,  item250

In [47]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res500_CHECKED", partial_overlapSE, df=x2000, chunksize=50)
item500 = next(genr)


Starting SE on 12 cores, "partial_overlapSE"
NO BACKUP
Results loaded
Job type 1, 9996

In [48]:
res500 = next(genr)


 200/200 tasks finished after   20 s
done
DONE!

In [50]:
process_dump(res500, res_real_partial, cutpoints=(0.001,0.9));


BIAS: 0.008
RMSE: 0.154
A-ratio: 1.001
M-ratio: 0.989

XS len: 9996
SE len: 9996

In [51]:
pickle.dump(res500, file("W:\\CNOP\\dumps\\SE 06.04\\2Partial\\res500_CHECKED", "w"))
del genr, res500,  item500

In [52]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res1000_CHECKED", partial_overlapSE, df=x2000, chunksize=50)
item1000 = next(genr)


Starting SE on 12 cores, "partial_overlapSE"
NO BACKUP
Results loaded
Job type 1, 9996

In [53]:
res1000 = next(genr)


 200/200 tasks finished after   26 s
done
DONE!

In [54]:
process_dump(res1000, res_real_partial, cutpoints=(0.001,0.9));


BIAS: -0.003
RMSE: 0.037
A-ratio: 1.091
M-ratio: 1.084

XS len: 9996
SE len: 9996

In [55]:
pickle.dump(res1000, file("W:\\CNOP\\dumps\\SE 06.04\\2Partial\\res1000_CHECKED", "w"))
del genr, res1000,  item1000

No Overlap


In [60]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\1No+\\res150_CHECKED", no_overlapSE, df=x2000, chunksize=50)
item125 = next(genr)


Starting SE on 12 cores, "no_overlapSE"
NO BACKUP
Results loaded
Job type 1, 10000

In [61]:
res125 = next(genr)


 200/200 tasks finished after   16 s
done
DONE!

In [71]:
process_dump(res125, res_real_no, cutpoints=(0.01,0.99));


BIAS: -0.041
RMSE: 0.304
A-ratio: 0.994
M-ratio: 0.873

XS len: 9999
SE len: 9997

In [65]:
pickle.dump(res125, file("W:\\CNOP\\dumps\\SE 06.04\\1No\\res150_CHECKED", "w"))
#del genr, res125,  item125

In [66]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\1No+\\res250_CHECKED", no_overlapSE, df=x2000, chunksize=50)
item250 = next(genr)


Starting SE on 12 cores, "no_overlapSE"
NO BACKUP
Results loaded
Job type 1, 10000

In [67]:
res250 = next(genr)


 200/200 tasks finished after   21 s
done
DONE!

In [72]:
process_dump(res250, res_real_no, cutpoints=(0.01,0.99));


BIAS: -0.029
RMSE: 0.090
A-ratio: 1.072
M-ratio: 1.027

XS len: 10000
SE len: 10000

In [73]:
pickle.dump(res250, file("W:\\CNOP\\dumps\\SE 06.04\\1No\\res250_CHECKED", "w"))
#del genr, res125,  item125

In [74]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\1No+\\res500_CHECKED", no_overlapSE, df=x2000, chunksize=50)
item500 = next(genr)


Starting SE on 12 cores, "no_overlapSE"
NO BACKUP
Results loaded
Job type 1, 9998

In [75]:
res500 = next(genr)


 200/200 tasks finished after   20 s
done
DONE!

In [76]:
process_dump(res500, res_real_no, cutpoints=(0.01,0.99));


BIAS: -0.016
RMSE: 0.042
A-ratio: 1.050
M-ratio: 1.027

XS len: 9996
SE len: 9996

In [78]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\1No+\\res1000_OK", no_overlapSE, df=x2000, chunksize=50)
item1000 = next(genr)


Starting SE on 12 cores, "no_overlapSE"
NO BACKUP
Results loaded
Job type 1, 9810

In [79]:
res1000 = next(genr)


 197/197 tasks finished after   26 s
done
DONE!

In [82]:
process_dump(res1000, res_real_no, cutpoints=(0.01,0.99));


BIAS: -0.008
RMSE: 0.020
A-ratio: 1.057
M-ratio: 1.045

XS len: 9810
SE len: 9810

In [ ]:

Картинки с анализом причин ошибок SE


In [14]:
res500 = pickle.load(file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res500_CHECKED"))

In [16]:
process_dump(res500, res_real_full)


BIAS: -0.046
RMSE: 0.934
A-ratio: 9.16270764603
M-ratio: 0.57874313989

XS len: 10000
SE len: 4206
SE mean: [  1.53555917e-01   1.60922561e-01   2.28463284e-01   6.31994080e-01
   6.53792057e-01   1.79356521e-01   1.74371071e-01   6.03300991e-01
   6.38446552e-01   2.37879996e+02   1.68329086e-01   1.84051068e-01
   2.58926706e-01   2.04603058e+01   5.94976933e-01] 
XS variance: [ 0.27806553  0.28299973  0.40269704  0.9256248   0.83828652  0.23898903
  0.20130879  0.40140243  0.42256586  2.03963691  0.23671607  0.24916129
  0.32550367  2.03863356  0.7942897 ] 

In [92]:
len(res500)


Out[92]:
10000

In [99]:
ses = np.array([item[0][0].se for item in res500 
                if len(item)==2 and linalg.norm(item[0][0].x, ord=np.inf) < 100 \
                and "se" in item[0][0].keys() and not np.isnan(item[0][0].se).any() \
                #and linalg.norm(item[0][0].se, ord=np.inf) < 100 ])    
                ])

In [100]:
ses.shape[0]


Out[100]:
10000L

In [52]:
import pandas as pd
%matplotlib inline

In [50]:
df = pd.DataFrame(ses)

In [88]:
(df.median()/ses.std(axis=0))


Out[88]:
0     4.525358e-01
1     5.632764e-01
2     1.398709e-06
3     3.564833e-01
4     4.688591e-01
5     5.274583e-01
6     8.799986e-01
7     9.736958e-09
8     1.096920e-08
9     1.545197e-07
10    4.106955e-01
11    5.001354e-01
12    1.327804e-07
13    7.574424e-14
14    4.455820e-07
dtype: float64

In [89]:
ses.std(axis=0)


Out[89]:
array([  3.34408050e-01,   3.42291517e-01,   1.96369992e+05,
         1.06378004e+00,   9.46158211e-01,   5.33496519e-01,
         3.40365686e-01,   4.70614140e+07,   4.66976587e+07,
         3.57328712e+06,   3.91573390e-01,   5.75672831e-01,
         2.81340750e+06,   5.47920815e+12,   9.88736786e+05])

In [90]:
df.mean()


Out[90]:
0     1.883167e-01
1     2.377810e-01
2     2.186035e+03
3     5.712088e-01
4     6.095025e-01
5     3.500693e-01
6     3.437068e-01
7     1.562116e+06
8     1.534973e+06
9     6.702975e+04
10    2.132596e-01
11    3.718374e-01
12    7.778845e+04
13    6.091778e+10
14    1.757047e+04
dtype: float64

In [77]:
np.std?

In [53]:
#df.ix[:, 6:8].plot(kind = 'density', figsize=(9,9));
df.plot(kind = 'density', figsize=(9,9), logx=True);



In [69]:
df.ix[:,:4].boxplot();
df.ix[:,0:8].boxplot();



In [70]:
dfClean = df[abs(df)<200]

In [74]:
dfClean.dropna(inplace=True)

In [76]:
len(dfClean)


Out[76]:
3715

In [91]:
len(df)


Out[91]:
4206

In [90]:
#dfClean.plot(kind = 'density', figsize=(9,9), logx=True);
#dfClean[[0,9,14]].plot(kind = 'density', figsize=(9,9), logx=False);
df.hist(figsize=(9,9));



In [80]:
dfClean.mean()


Out[80]:
0      0.161814
1      0.169732
2      0.241862
3      0.649407
4      0.665445
5      0.181347
6      0.174770
7      0.282221
8      0.316585
9     13.966356
10     0.169297
11     0.185394
12     0.259970
13     9.727243
14     0.599305
dtype: float64

In [ ]:


In [38]:
np.isnan(np.inf)


Out[38]:
False

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: