In [1]:
from IPython import parallel
clients = parallel.Client(profile='parallel')

In [2]:
print clients.ids
print "Total %i cores"%(len(clients.ids))


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
Total 17 cores

In [3]:
%%px --local

import sys
sys.path.append("\\\\DAP-NAS\\work\\CNOP")
import cPickle as pickle
import numpy as np
import pandas as pd
from statsmodels.tsa.arima_process import arma_generate_sample
import matplotlib.pyplot as plt
import numpy as np
from CNOP import CNOP
import winsound
def threshold(x,thresholds=[],values=[-1,0,1]):
    for threshold,val in zip(thresholds,values):
        if x < threshold: 
            return val
    return values[-1]
import time
from itertools import repeat
import os
from datetime import datetime

In [4]:
%%px --local
x2000 = pd.read_csv("\\\\DAP-NAS\\work\\CNOP\\x2000.csv", delimiter=";")

In [5]:
%%px --local

N=100

def partial_overlap(N, df, give_distortion=True):
    ###########################################################
    ##### This code is for partial-overlap case of CNOP########
    ###########################################################

    #PreSampling:
    df = df[:N]
    #df = x2000[:N]
    
    ### DGP generation
    regime = pd.DataFrame() 
    beta,   alpha   = [0.6, 0.4], [0.9, 1.5]
    gammam, mum =     [0.3, 0.9], [-0.67, 0.36]
    gammap, mup =     [0.2, 0.3], [0.02, 1.28]
    distortion = np.random.randn(3,N)
    regime["xbeta"]    = df[["X1", "X2"]].dot(beta)    + distortion[0]
    regime['z-gammam'] = df[["X1", "X3"]].dot(gammam)  + distortion[1]
    regime['z+gammap'] = df[["X2", "X3"]].dot(gammap)  + distortion[2]
    regime['regime'] = regime['xbeta'].apply(lambda x: threshold(x,thresholds=alpha))
    regime['Y-']=regime['z-gammam'].apply(lambda x: threshold(x, thresholds=mum,values=[-2,-1,0]))
    regime['Y+']=regime['z+gammap'].apply(lambda x: threshold(x, thresholds=mup,values=[0,1,2]))
    df['Y'] = 0
    df['Y'] += np.where(regime['regime']==-1,regime['Y-'],0)
    df['Y'] += np.where(regime['regime']==1,regime['Y+'],0)
    ###df is full data matrix

    #Model starts here:
    exog = df[["X1", "X2", "X3"]]
    endog = df[["Y"]]
    l = {0:df}
    pan = pd.Panel(l)
    y      = pan.ix[:,:,['Y']]
    x      = pan.ix[:,:,["X1", "X2"]]
    zminus = pan.ix[:,:,["X1", "X3"]]
    zplus  = pan.ix[:,:,["X2", "X3"]]
    exog = {'x':x,'zplus':zplus,'zminus':zminus}
    CNOP4 = CNOP(y,exog, model='CNOP',interest_step=1)

    #Counting execution time for fit ...
    exec_time = {"fit":0,"se":0}
    start_time = time.time()
    res = CNOP4.fit( maxiter=250, disp=0)
    exec_time["fit"] = time.time() - start_time

    # And for Standard Errors as well
    try:
        start_time = time.time()
        res["se"] = CNOP4.se(res.x)
        exec_time["se"] = time.time() - start_time
        res['status'] = "OK"
    except Exception, e:
        print e
        res['status'] = e

    res["exec_time"]=exec_time

    if give_distortion:
        return res, distortion
    else:
        return res

In [1]:
N=100
res, distortion = partial_overlap(N, df=x2000)
print "Done for N=%i" % (N)
print "Res.status:", res.success

beta,   alpha   = [0.6, 0.4], [0.9, 1.5]
gammam, mum =     [0.3, 0.9], [-0.67, 0.36]
gammap, mup =     [0.2, 0.3], [0.02, 1.28]
res_real = beta+alpha+gammam+mum+gammap+mup
%matplotlib inline
_ = plt.bar(np.arange(len(res.x-res_real)),
                  list(res.x-res_real))

winsound.PlaySound(u'C:\Windows\Media\Windows Print complete.wav', winsound.SND_FILENAME)
print "X & SE:", np.dstack((res["x"],res["se"]))


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-3e155f24f1ef> in <module>()
      1 N=100
----> 2 res, distortion = partial_overlap(N, df=x2000)
      3 print "Done for N=%i" % (N)
      4 print "Res.status:", res.success
      5 

NameError: name 'partial_overlap' is not defined

In [6]:
%%px --local

def runMC(n_obs, n_repl, fun, path="\\\\DAP-NAS\\work\\CNOP\\dumps\\", prefix = "CNOPres", ext = ".p", **kwargs):
    print "Starting MC on %i cores, %i replications of \"%s\" with %i observations"\
               %(len(clients.ids), n_repl, fun.__name__, n_obs)
    print "If you again run next(), you will get wait_interactive() form and results would be backed up"
    print "Untill then you are free to use AsyncResult object that was apready yielded, but data will not be backed up!"
    view = clients.load_balanced_view()

    def doJob(i):
        n_obs,path,fun, kwargs=i
        dumb_str=time.strftime("%Y%m%d%H%M%S")+str(np.random.rand())

        filename = path + dumb_str

        try:
            local_res = fun(n_obs, **kwargs)
            pickle.dump(local_res, open( filename, "wb" ) )
            return local_res, filename
        except Exception, e:
            pickle.dump(e, open( filename, "wb" ) )
            return Exception, e, filename
    
    cur_time_str=time.strftime('%Y%m%d_%H%M%S')
    temp_folder_str = path+'temp/'+cur_time_str+"/"
    if not os.path.exists(temp_folder_str):
        os.makedirs(temp_folder_str)

    print temp_folder_str
    readme_f = file(temp_folder_str+"!README.txt", "w")
    readme_f.write("Doing MC on %i cores, %i replications of \"%s\" with %i observations"\
                   %(len(clients.ids), n_repl, fun.__name__, n_obs))
    readme_f.close()

    ar = view.map_async(doJob, [[n_obs, temp_folder_str, fun, kwargs]]*n_repl)
    yield ar

    ar.wait_interactive()
    results = ar.get()
    cur_time_str=time.strftime('%Y%m%d_%H%M%S')
    filename = path + prefix + cur_time_str + ext
    print "DONE! DB in %s"%(temp_folder_str)
    yield results

In [13]:
MCgenr100 = runMC(500, 20, partial_overlap, df=x2000, path="\\\\DAP-NAS\\work\\CNOP\\dumps\\")
MCAsyncMapResult100 = next(MCgenr100)


Starting MC on 8 cores, 20 replications of "partial_overlap" with 500 observations
If you again run next(), you will get wait_interactive() form and results would be backed up
Untill then you are free to use AsyncResult object that was apready yielded, but data will not be backed up!
\\DAP-NAS\work\CNOP\dumps\temp/20150329_163212/

In [14]:
results100=next(MCgenr100)


  20/20 tasks finished after  138 s
done
DONE! DB in \\DAP-NAS\work\CNOP\dumps\temp/20150329_163212/

In [15]:
res_100obs = [x[0][0] for x in results100 if len(x[0])>0 and x[0][0].success]

In [16]:
xs100 =np.mean([i.x for i in res_100obs], axis = 0)
se100_true = np.std([i.x for i in res_100obs], axis=0)
ses100 =np.nanmean([i.se for i in res_100obs if "se" in i], axis = 0)

In [17]:
_ = plt.bar(np.arange(len(xs100-res_real)),
                  list(xs100-res_real), label="params")
print "SE (true):", se100_true
print "Bias:", np.mean(xs100-res_real)
print "RMSE:", np.sqrt(np.mean((xs100-res_real)**2))


SE (true): [ 0.09546612  0.12776311  0.36486905  0.23048422  0.13358977  0.15915175
  0.16057161  0.2271023   0.15993511  0.18941529  1.15444552  0.24456778]
Bias: -0.0447156672673
RMSE: 0.16225886409

In [7]:
MCgenr500 = runMC(500, 10000, partial_overlap, df=x2000, path="\\\\DAP-NAS\\work\\CNOP\\dumps\\")
MCAsyncMapResult500 = next(MCgenr500)


Starting MC on 17 cores, 10000 replications of "partial_overlap" with 500 observations
If you again run next(), you will get wait_interactive() form and results would be backed up
Untill then you are free to use AsyncResult object that was apready yielded, but data will not be backed up!
\\DAP-NAS\work\CNOP\dumps\temp/20150329_164457/

In [8]:
results500=next(MCgenr500)
pickle.dump(results500, file("\\\\DAP-NAS\\work\\CNOP\\dumps\\res500PartialOverlap", "w"))


10000/10000 tasks finished after 34787 s
done
DONE! DB in \\DAP-NAS\work\CNOP\dumps\temp/20150329_164457/

In [9]:
del results500

In [10]:
MCgenr1000 = runMC(1000, 10000, partial_overlap, df=x2000, path="\\\\DAP-NAS\\work\\CNOP\\dumps\\")
MCAsyncMapResult1000 = next(MCgenr1000)


Starting MC on 17 cores, 10000 replications of "partial_overlap" with 1000 observations
If you again run next(), you will get wait_interactive() form and results would be backed up
Untill then you are free to use AsyncResult object that was apready yielded, but data will not be backed up!
\\DAP-NAS\work\CNOP\dumps\temp/20150330_022738/

In [11]:
results1000=next(MCgenr1000)
pickle.dump(results1000, file("\\\\DAP-NAS\\work\\CNOP\\dumps\\res1000PartialOverlap", "w"))


10000/10000 tasks finished after 68485 s
done
DONE! DB in \\DAP-NAS\work\CNOP\dumps\temp/20150330_022738/

150 obs


In [7]:
MCgenr150 = runMC(150, 10000, partial_overlap, df=x2000, path="\\\\DAP-NAS\\work\\CNOP\\dumps\\")
MCAsyncMapResult150 = next(MCgenr150)


Starting MC on 17 cores, 10000 replications of "partial_overlap" with 150 observations
If you again run next(), you will get wait_interactive() form and results would be backed up
Untill then you are free to use AsyncResult object that was apready yielded, but data will not be backed up!
\\DAP-NAS\work\CNOP\dumps\temp/20150405_205301/

In [8]:
results150=next(MCgenr150)


10000/10000 tasks finished after 8543 s
done
DONE! DB in \\DAP-NAS\work\CNOP\dumps\temp/20150405_205301/

In [10]:
pickle.dump(results150, file("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res150_CHECKED", "w"))