In [1]:
from sys import path
path.append('/home/bingnan/ecworkspace/HFT1')
In [4]:
%matplotlib inline
In [5]:
from init import *
In [115]:
import itertools, sys
In [6]:
_xin_mean = xin.mean(axis=0)
_xin_std = xin.std(axis=0)
xin_stdzd = (xin - _xin_mean) / _xin_std
xout_stdzd = (xout - _xin_mean) / _xin_std
xtest_stdzd = (xtest - _xin_mean) / _xin_std
In [ ]:
distance.cdist
In [148]:
def my_kernel_eu(x, y, gamma=.1):
print x.shape, y.shape
f = pairwise.euclidean_distances
# timer1.refresh('set f')
dis = f(x, y, squared=True)
timer1.refresh('calculate dis')
dis *= 1./80
timer1.refresh('/= 80')
dis *= -gamma
timer1.refresh(' *= -gamma ')
np.exp(dis, dis)
timer1.refresh(' exp ')
print sys.getsizeof(dis)/1024./1024.
return dis
In [107]:
def my_kernel_scipy(x, y, gamma=.1):
print x.shape, y.shape
# f = pairwise.euclidean_distances
# timer1.refresh('set f')
dis = distance.cdist(x, y, metric='euclidean')
timer1.refresh('calculate dis')
dis *= 1./80
timer1.refresh('/= 80')
dis *= -gamma
timer1.refresh(' *= -gamma ')
np.exp(dis, dis)
timer1.refresh(' exp ')
return dis
In [21]:
def MyTime(startpoint, s):
time_length = tm.time() - startpoint
print '{0:.3f}s consumed. '.format(time_length) + s + ' is done.'
In [33]:
def MyTime(startpoint, s, time_list, event_list):
"""
startpoint is the start time
s is event name(finished)
time_list and event_list are python lists
"""
time_length = tm.time() - startpoint
time_list.append(time_length)
event_list.append(s)
#print '{0:.3f}s consumed. '.format(time_length) + s + ' is done.'
In [2]:
class MyTimeCLS(object):
def __init__(self):
self.start_time = tm.time()
self._time_list = [0.]
self.interval_list = []
self.event_list = []
def refresh(self, event):
current_time = tm.time() - self.start_time
self._time_list.append(current_time)
self.event_list.append(event)
""" len(self._time_list) is 1 longer than len(self.event_list)"""
def show(self):
self._time_list = np.array(self._time_list)
self.interval_list = self._time_list[1:] - self._time_list[:-1]
for t, e in itertools.izip(self.interval_list, self.event_list):
#print "{0:.3f} s till {1} is done.".format(t, e)
print "{0:.4f} s for: {1}.".format(t, e)
print "{0:.4f} s in total".format(self._time_list[-1])
In [35]:
timer1 = MyTimeCLS()
In [44]:
import timeit
In [43]:
timer1.refresh('r3')
In [ ]:
In [140]:
sys.getsizeof(np.random.rand(10000, 10000))/1024./1024.
Out[140]:
In [122]:
sys.getsizeof(x0)/1024/1024
Out[122]:
In [109]:
def MyRgrs(xi, xo, yi, yo, model=None, align=False):
'''use yi and yout 's index to align. x is of full length.
Parameters
----------
model : sklearn clf
Returns
-------
res : regression result
'''
if align:
xi = xi.ix[yi.index]
xo = xo.ix[yo.index]
if not xi.ndim > 1:
xi = xi.reshape(-1, 1)
xo = xo.reshape(-1, 1)
timer1.refresh('regression preparation')
res = model.fit(xi, yi)
timer1.refresh('fit')
rsq_in = res.score(xi, yi)
timer1.refresh('rsq_in calc')
# res.predict(xi)
# timer1.refresh('predict xin')
rsq_out = res.score(xo, yo)
timer1.refresh('rsq_out calc')
timer1.refresh('All in regression func')
return res, rsq_in, rsq_out
In [145]:
yin2 = yin.ix[::50]
print len(yin2)
yout2 = yout.ix[::15]
print len(yout2)
In [149]:
timer1 = MyTimeCLS()
for myepsilon in np.arange(.2, .3, .3):
#MyTime(start_time, 'enter loop1')
for myc in np.arange(10e-2, 100e-2, 100e-2):
#MyTime(start_time, 'enter loop2')
for mygamma in np.arange(.4341/5, 300e-2, 300e-2):
#MyTime(start_time, 'enter loop3')
mmy_kernel = functools.partial(my_kernel_eu, gamma=mygamma)
#MyTime(start_time, 'Partial func.')
mod = svm.SVR(kernel=mmy_kernel, epsilon=myepsilon, C=myc)
#MyTime(start_time, 'SVR model')
#print ('\n\n===========$\epsilon$: %f, $C$: %f, $\gamma$: %f===================' % (myepsilon, myc, mygamma))
res_eu, rsqin, rsq_out = MyRgrs(xin_stdzd#.ix[:, temp1]
, xout_stdzd#.ix[:, temp1]
, yin2, yout2, mod, align=True)
In [150]:
timer1.show()
In [146]:
timer1 = MyTimeCLS()
for myepsilon in np.arange(.2, .3, .3):
#MyTime(start_time, 'enter loop1')
for myc in np.arange(10e-2, 100e-2, 100e-2):
#MyTime(start_time, 'enter loop2')
for mygamma in np.arange(.4341/5, 300e-2, 300e-2):
#MyTime(start_time, 'enter loop3')
mmy_kernel = functools.partial(my_kernel_scipy, gamma=mygamma)
#MyTime(start_time, 'Partial func.')
mod = svm.SVR(kernel=mmy_kernel, epsilon=myepsilon, C=myc)
#MyTime(start_time, 'SVR model')
#print ('\n\n===========$\epsilon$: %f, $C$: %f, $\gamma$: %f===================' % (myepsilon, myc, mygamma))
res_eu, rsqin, rsq_out = MyRgrs(xin_stdzd#.ix[:, temp1]
, xout_stdzd#.ix[:, temp1]
, yin2, yout2, mod, align=True)
In [111]:
print timer1._time_list[-1]
timer1.show()
In [110]:
print timer2._time_list[-1]
timer2.show()
In [ ]:
In [ ]:
In [372]:
n_feature = 101
a, b = np.random.rand(1283, n_feature), np.random.rand(1687, n_feature)
myweight = np.random.rand(n_feature)
In [373]:
In [116]:
%timeit (pairwise.euclidean_distances(temp, temp))
In [119]:
%timeit (pairwise.manhattan_distances(temp, temp))
In [120]:
%timeit (distance.cdist(temp, temp, metric='euclidean'))
In [131]:
myweight = np.random.rand(80)
In [134]:
%timeit temp1 = distance.cdist(temp, temp, metric='minkowski', p=1.5)
In [123]:
%timeit temp1 = distance.cdist(temp, temp, metric='wminkowski', p=1.5, w=myweight)
In [146]:
timer1 = MyTimeCLS()
timer1.refresh('start')
temp1 = distance.cdist(temp, temp, metric='minkowski', p=1.5)
timer1.refresh('dist')
timer1.show()
In [149]:
timer1 = MyTimeCLS()
timer1.refresh('start')
temp2 = distance.cdist(temp, temp, metric='wminkowski', p=1.5, w=myweight)
timer1.refresh('dist')
timer1.show()
another way
In [156]:
timer1 = MyTimeCLS()
timer1.refresh('start')
temp *= myweight
temp3 = distance.cdist(temp, temp, metric='minkowski', p=1.5)
timer1.refresh('dist')
timer1.show()
In [159]:
temp2
Out[159]:
In [162]:
np.abs(temp2 - temp3).sum()
Out[162]:
In [ ]:
In [164]:
import cProfile, pstats, StringIO
In [402]:
pr = cProfile.Profile()
pr.enable()
#---
temp2 = distance.cdist(a, b, metric='wminkowski', p=3, w=myweight)
#---
pr.disable()
s = StringIO.StringIO()
sortby = 'cumulative'
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print s.getvalue()
In [174]:
pr = cProfile.Profile()
pr.enable()
#---
a *= myweight
b *= myweight
temp3 = distance.cdist(a, b, metric='minkowski', p=1.5)
#---
pr.disable()
s = StringIO.StringIO()
sortby = 'cumulative'
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print s.getvalue()
1-D scenario
In [175]:
a, b = np.arange(4), np.arange(1, 5)
In [176]:
print a.shape, b.shape
In [178]:
c, d = a.reshape(-1, 1), b.reshape(1, -1)
print c.shape, d.shape
In [179]:
c - d
Out[179]:
2-D scenario
In [360]:
n_feature = 77
a, b =np.random.rand(330, n_feature), np.random.rand(440, n_feature)
# a, b = np.round(a, 1), np.round(b, 1)
myweight = np.random.rand(n_feature, 1, 1)
print a.shape, b.shape, myweight.shape
In [361]:
c, d = (a.T)[:, :, np.newaxis], (b.T)[:, np.newaxis, :]
print c.shape, d.shape
In [362]:
e = c - d
print e.shape
In [363]:
g = e * myweight
#print g
In [364]:
#h = np.sum(np.abs(g), axis=0)
h = np.sqrt(np.sum(g**2, axis=0))
In [365]:
mdzz = distance.cdist(a, b, metric='wminkowski',
p=2, w=myweight
)
In [367]:
np.abs(h-mdzz).sum()
Out[367]:
In [226]:
mdzz.shape
Out[226]:
In [ ]:
In [241]:
c[0, 4, :] - d[0, :, 1]
Out[241]:
In [242]:
f[0,:,:]
Out[242]:
test it!
In [95]:
def mycdist(a, b, p, w):
w = w[np.newaxis, np.newaxis, :]
timer2.refresh('w newaxis')
c, d = a[:, np.newaxis, :], b[np.newaxis, :, :]
timer2.refresh('a,b newaxis')
# g = np.abs(c - d) * w
e = c - d
timer2.refresh('broad cast')
e *= w
timer2.refresh('mul weight')
e = np.abs(e)
timer2.refresh('ABS')
if p == 1:
h = np.sum(e, axis=2)
timer2.refresh('just sum')
elif p == 2:
h = np.sqrt(np.sum(e**p, axis=2))
timer2.refresh('shortcut')
else:
h = np.sqrt(np.sum(np.power(e, p), axis=2))
timer2.refresh('power func')
return h
In [471]:
4096*2
Out[471]:
In [151]:
n_feature = 128
a, b = np.random.rand(512, n_feature), np.random.rand(512, n_feature)
myweight = np.random.rand(n_feature)
In [99]:
timer1 = MyTimeCLS()
timer1.refresh('start')
c, d = a * myweight, b * myweight
temp1 = pairwise.manhattan_distances(c, d)
timer1.refresh('dist')
timer1.show()
In [157]:
for i in np.arange(.2, 10, .2):
print '\n========{0}=========='.format(i)
timer3 = MyTimeCLS()
timer3.refresh('start')
temp3 = distance.cdist(a, b, metric='wminkowski', p=i, w=myweight)
timer3.refresh('dist')
timer3.show()
In [101]:
timer2 = MyTimeCLS()
timer2.refresh('start')
temp2 = mycdist(a, b, p=3, w=myweight)
timer2.refresh('dist')
timer2.show()
In [91]:
np.abs(temp2 -temp3).mean()
Out[91]:
In [66]:
(temp3 -temp1) > 0
Out[66]:
In [ ]:
In [395]:
temp = np.random.rand(3,4)
In [459]:
from scipy import linalg
In [464]:
mat = np.random.rand(1234, 1234, 80)
In [465]:
%timeit mat**2.579
In [469]:
%timeit np.power(mat, 2.579)