In [1]:
from pymisca.util import *
from pymisca.vis_util import *
import IPython.display as ipd

import KBs
import random

DIR = 'data_pdist'
!mkdir -p {DIR}


is in ipython: 1
[WARN] pymisca.vis_util cannot find network

appx 10 rule per sec caching time is 1 min then 600 rules per cache


In [ ]:
%%time
import os
import workers
import time

ts = []
t0 = time.time()
oit = list(range(471,60*24))
for seed in oit:
    bsize= 600
    worker = workers.worker0323
    kb = KBs.kb_2dntca()
    it = kb.bulk_rstr(seed=seed,bsize=bsize)
    out = mp_map(worker,it,n_cpu=6)

    fname = os.path.join(DIR,'seed%d_size%d.npy'%(seed,bsize))
    np.save(fname,out)
    
    print "finished %d of %d"%(seed,len(oit))
    t = time.time()
    ts.append(t-t0)
    t0 = t
#     break
print 'finished'
print 'avgtime:%.5fs'%(np.mean(ts))


finished 471 of 969
finished 472 of 969
finished 473 of 969
finished 474 of 969
finished 475 of 969
finished 476 of 969
finished 477 of 969
finished 478 of 969
finished 479 of 969
finished 480 of 969
finished 481 of 969
finished 482 of 969
finished 483 of 969
finished 484 of 969
finished 485 of 969
finished 486 of 969
finished 487 of 969
finished 488 of 969
finished 489 of 969
finished 490 of 969
finished 491 of 969
finished 492 of 969
finished 493 of 969
finished 494 of 969
finished 495 of 969
finished 496 of 969
finished 497 of 969
finished 498 of 969
finished 499 of 969
finished 500 of 969

In [ ]:
print 'avgtime:%.5fs'%(np.mean(ts))

In [10]:
# tst_dct = [{'family':'2dntca','rulestr':x[1]} for x in KBs.tst_data]
# tst_out = mp_map(worker,tst_dct,n_cpu=12)
# print 'finished'

# plt.figure(figsize=(12,7))
# dt  = map(lambda x:x.get('data').get('med'),tst_out)
# xs,ys = zip(*dt)
# plt.scatter(*zip(*dt),color='red')
# for i,(x,y) in enumerate(dt):
#     x = x+(np.random.random()-0.5)/50.
#     y = y+(np.random.random()-0.5)/50.
#     plt.text(x,y,i )
# plt.plot([0,1],[0,1],'r--')

# dt  = map(lambda x:x.get('data').get('med'),out)
# xs,ys = zip(*dt)
# plt.scatter(*zip(*dt),color='blue')
# for i,(x,y) in enumerate(dt):
#     x = x+(np.random.random()-0.5)/50.
#     y = y+(np.random.random()-0.5)/50.
#     plt.text(x,y,i)
# plt.plot([0,1],[0,1],'r--')

# plt.grid()
# plt.show()


<matplotlib.figure.Figure at 0x7fc748a11290>

In [24]:
tst_dct = [{'family':'2dntca','rulestr':x[1]} for x in KBs.tst_data]
tst_out = mp_map(worker,tst_dct,n_cpu=8)
print 'finished'


finished

In [ ]:
c1='''
0
5
10
11
13
15
16
19
21
26'''
c2='''
6
1
27
31'''
c3='''
3
8
25'''
c4='''
17
18
20
28'''

In [ ]:
6
1
27
31

In [ ]:
2
4
7
9
29
30
12
14
22
23
24

In [ ]:
3
8
25

In [ ]:
17
18
20
28

In [91]:
KBs.lview(KBs.guess(dct=tst_dct[16]))


http://newflaw.com/view.php?rule_alias=b3aiy2cin5ijn4cinqtwy6aci8s03jkqry2cek5ceinry4akrtyz7e6aci8
Out[91]:
'http://newflaw.com/view.php?rule_alias=b3aiy2cin5ijn4cinqtwy6aci8s03jkqry2cek5ceinry4akrtyz7e6aci8'