In [1]:
from sys import path
path.append('/home/bingnan/ecworkspace/HFT1')
In [2]:
from init import *
In [3]:
sns.set_context('talk')
In [4]:
%matplotlib inline
In [4]:
In [83]:
rgs2.half_life/5
Out[83]:
In [84]:
rgs_bcmk_eusq = myRgr()
rgs_bcmk_eusq.dataGo(xin_stdzd, yin2, xout_stdzd, yout2, xtest_stdzd, ytest2, align=True)
rgs_bcmk_eusq.disPrep(my_distance, metric='eu', w=None, squared=True)
In [85]:
rgs_bcmk_eusq.kernelGo(my_kernel_exp, gamma=rgs_bcmk_eusq.half_life/5)
rgs_bcmk_eusq.regressorGo('svr', epsilon=0.2, C=.1)
rgs_bcmk_eusq.fit()
rgs_bcmk_eusq.rsqGo()
rgs_bcmk_eusq.test()
In [49]:
print rgs_bcmk_eusq.result.support_.shape
plt.figure(figsize=(15, 5))
sns.distplot(rgs_bcmk_eusq.result.dual_coef_)
Out[49]:
In [26]:
Corr2D(rgs_bcmk_eu._yin_predict, rgs1._yin_predict)
Out[26]:
above is SVR benchmark result
below is boosting result
In [73]:
def tst_bst(rgs_linear, rgs_svr_list, x, y, w):
ypre_linear = rgs_linear.predict(x)
x = np.dot(x, w.T)
try:
ypre_list = []
for i, rgs_svr in enumerate(rgs_svr_list):
ypre_list.append(rgs_svr.predict(x))
print '\t\t\t\tNo.{0} Residual_rsq = {1}, Combined_rsq = {2}'.format(i,
rsquare(rgs_svr.yout, rgs_svr._yout_predict), rsquare(y, yhat=(ypre_linear + ypre_list[i])))
ypre_svr = np.zeros_like(ypre_linear)
for ypre in ypre_list:
ypre_svr = ypre_svr + ypre
ypre_svr = ypre_svr / len(ypre_list)
except:
ypre_svr = rgs_svr_list.predict(x)
rsq = rsquare(y, yhat=(ypre_linear + ypre_svr))
print '\t\t\t\tAll in all rsq = {0}'.format(rsq)
In [82]:
tst_bst(rgs1, [rgs2, rgs3, rgs4],
xtest_stdzd.ix[ytest2.index],
ytest2,
pca_mat[:7, :])
In [47]:
tst_bst(rgs1, bager,
xtest_stdzd.ix[ytest2.index],
ytest2,
pca_mat[:7, :])
In [200]:
print Corr2D(rgs2._yout_predict, rgs4._yout_predict)
# print rsquare(rgs2.yout, rgs2._yout_predict)
# print rsquare(rgs3.yout, rgs3._yout_predict)
# print rsquare(rgs2.yout, (rgs2._yout_predict + rgs3._yout_predict)/2)
In [56]:
yin2 = yin.ix[::50]
yout2 = yout.ix[::15]
ytest2 = ytest.ix[::15]
print yin2.shape, yout2.shape
In [86]:
rgs1 = myRgr()
rgs1.dataGo(xin_stdzd, yin2, xout_stdzd, yout2, xtest_stdzd, ytest2, align=True)
rgs1.regressorGo('Ridge', alpha=6000)
rgs1.fit()
rgs1.rsqGo()
rgs1.test()
#---------------------
In [197]:
Out[197]:
In [44]:
rgs_stack = myRgr()
rgs_stack.dataGo(pd.DataFrame([rgs2._yin_predict, rgs3._yin_predict]).T, rgs1.rzdu_in,
pd.DataFrame([rgs2._yout_predict, rgs3._yout_predict]).T, rgs1.rzdu_out,
xtest_stdzd,#pd.DataFrame([rgs2.predict(rgs2.xtest), rgs3.predict(rgs3.xtest)]).T,
ytest2, align=False)
rgs_stack.regressorGo('Ridge', alpha=.5)
rgs_stack.fit()
rgs_stack.rsqGo()
#rgs_stack.test()
#---------------------
print rgs_stack.result.coef_
In [20]:
pca_mat = np.load('pca_components_calc_from_xinstdzd.npy')
explain_var = np.load('pca_explain_ratio_calc_from_xinstdzd.npy')
In [21]:
plt.figure(figsize=(15,10))
plt.plot(explain_var, marker='d')
plt.show()
In [61]:
from sklearn.ensemble import BaggingRegressor
In [62]:
temp = svm.SVR(kernel='linear',
gamma=rgs2.half_life*.2,
epsilon=.17, C=.0004)
In [63]:
bager = BaggingRegressor(temp, n_estimators=100, max_samples=0.5, n_jobs=7)
In [64]:
bager.fit(rgs4.xin, rgs4.yin)
Out[64]:
In [66]:
print bager.score(rgs4.xin, rgs4.yin)
print bager.score(rgs4.xout, rgs4.yout)
In [75]:
rgs2 = myRgr()
rgs2.dataGo(xin_stdzd, rgs1.rzdu_in, xout_stdzd, rgs1.rzdu_out, xtest_stdzd, ytest2, align=True, w=pca_mat[:7, :])
rgs2.disPrep(my_distance, metric='eu', w=None, squared=True)
In [76]:
for myepsilon in np.arange(.17, .2, .2):
for myc in np.arange(.015, .09, 9.01):
for mygamma in np.arange(.2, 2.04, 2.001):
print '==================== C: {0} epsilon: {1} gamma: {2:.4f}'.format(myc, myepsilon, mygamma)
rgs2.kernelGo(my_kernel_exp, gamma=rgs2.half_life * mygamma)
rgs2.regressorGo('svr', epsilon=myepsilon, C=myc)
rgs2.fit()
rgs2.rsqGo()
In [68]:
def my_kernel_spherical(dis, gamma=.1):
dis /= 20.
dis[dis > 1.] = 1.
f_s = lambda x: 1 - x * 3 / 2. + x**3 / 2.
return f_s(dis)
In [69]:
rgs3 = myRgr()
rgs3.dataGo(xin_stdzd, rgs1.rzdu_in, xout_stdzd, rgs1.rzdu_out, xtest_stdzd, ytest2, align=True, w=pca_mat[:7, :])
rgs3.disPrep(my_distance, metric='eu', w=None, squared=True)
#rgs3.partial_kernel = pairwise.cosine_similarity
In [71]:
for myepsilon in np.arange(.17, .19, 1.002):
for myc in np.arange(10e-5, 100e-5, 200e-5):
for mygamma in np.arange(.2, 6., 9.3):
print '==================== C: {0} epsilon: {1} gamma: {2:.4f}'.format(myc, myepsilon, mygamma)
rgs3.kernelGo(my_kernel_spherical)
rgs3.regressorGo('svr', epsilon=myepsilon, C=myc)
rgs3.fit()
rgs3.rsqGo()
In [77]:
rgs4 = myRgr()
rgs4.dataGo(xin_stdzd, rgs1.rzdu_in, xout_stdzd, rgs1.rzdu_out, xtest_stdzd, ytest2, align=True, w=pca_mat[:7, :])
#rgs4.disPrep(my_distance, metric='mh', w=pca_mat[:6, :], squared=False)
In [97]:
sns.distplot(np.dot(xin_stdzd.ix[::1000], xin_stdzd.ix[::1000].T).ravel() * 1e-2)
Out[97]:
In [138]:
sns.distplot(pairwise.sigmoid_kernel(xin_stdzd.ix[::300], xin_stdzd.ix[::300], gamma=5e-2, coef0=0.).ravel())
Out[138]:
In [78]:
for myepsilon in np.arange(.17,.18, 1.003):
for myc in np.arange(2e-2, 20e-2, 20e-2):
for mygamma in np.arange(10e-2, 16., 19.3):
print '==================== C: {0} epsilon: {1} gamma: {2:.4f}'.format(myc, myepsilon, mygamma)
rgs4.kernelGo('linear')
rgs4.regressorGo('svr', epsilon=myepsilon, C=myc,
gamma=mygamma, coef0=1.
)
rgs4.fit()
rgs4.rsqGo()
In [67]:
In [81]:
Corr2D(rgs2._yout_predict, rgs4._yout_predict)
Out[81]:
In [79]:
print rgs3.result.support_.shape
print rgs3.result.intercept_
sns.distplot(rgs3.result.dual_coef_)
Out[79]:
In [75]:
np.median(rgs3.partial_kernel(rgs3.xin, rgs3.xin))
Out[75]:
In [121]:
%matplotlib inline
In [120]:
sns.distplot(rgs3.yin, bins=300, kde=False, norm_hist=True)
sns.distplot(rgs1.yin.values, bins=300, kde=False, norm_hist=True)
Out[120]:
In [52]:
temp = rgs3.partial_kernel(rgs3.xin, rgs3.xin).ravel()
In [54]:
Out[54]:
In [53]:
sns.distplot(temp[::10])
print np.median(temp)
In [47]:
rgs3.result.support_.shape
Out[47]:
In [169]:
rgs2.result.dual_coef_
Out[169]:
In [84]:
for myepsilon in np.arange(.17, .9, .9):
for myc in np.arange(.8, 2., 2.2):
for mygamma in np.arange(.2, 2., 2.2):
print '==================== C: {0} epsilon: {1} gamma: {2}'.format(myc, myepsilon, mygamma)
rgs2 = myRgr()
rgs2.modelGo('svr', regressor_kws={'epsilon': myepsilon, 'C': myc},
kernel=my_kernel_exp,
kernel_kws={'gamma': half_life*mygamma, 'metric': 'eu', 'squared': True, 'w': None, 'full_output': False})
rgs2.dataGo(xin_stdzd.ix[:, feature], rgs1.rzdu_in, xout_stdzd.ix[:, feature], rgs1.rzdu_out, xtest_stdzd, ytest2, align=True)
rgs2.fit()
rgs2.rsqGo()
In [28]:
rsquare(yout2, rgs2.predict(xout_stdzd.ix[yout2.index]))
Out[28]:
In [33]:
sns.set_context('poster')
In [37]:
sns.distplot(rgs1.rzdu_in, kde=False, bins=100)
Out[37]:
In [339]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 5))
sns.distplot(rgs_bcmk.yin, norm_hist=True, ax=ax1)
sns.distplot(rgs_bcmk.yout, norm_hist=True, ax=ax2)
sns.distplot(rgs_bcmk.rzdu_in, norm_hist=True, ax=ax3)
sns.distplot(rgs_bcmk.rzdu_out, norm_hist=True, ax=ax4)
Out[339]:
In [380]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 5))
sns.distplot(rgs1.rzdu_in, norm_hist=True, ax=ax1)
sns.distplot(rgs1.rzdu_out, norm_hist=True, ax=ax2)
# sns.distplot(rgs_bcmk.rzdu_in, norm_hist=True, ax=ax3)
# sns.distplot(rgs_bcmk.rzdu_out, norm_hist=True, ax=ax4)
Out[380]:
In [250]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 3))
sns.distplot(rgs1.rzdu_in, ax=ax1)
sns.distplot(rgs1.rzdu_out, ax=ax2)
Out[250]:
In [ ]:
In [242]:
rgs_lasso = myRgr()
rgs_lasso.dataGo(xin_stdzd, yin2, xout_stdzd, yout2, xtest_stdzd, ytest2, align=True)
rgs_lasso.regressorGo('Lasso', alpha=5e-3)
rgs_lasso.fit()
rgs_lasso.rsqGo()
rgs_lasso.test()
#---------------------
In [244]:
feature = rgs_lasso.result.coef_ != 0
In [245]:
feature
Out[245]:
In [17]:
from sklearn.neighbors import KNeighborsRegressor
In [35]:
def sigmaEst(y, yhat):
err = y - yhat
N = len(err)
sns.distplot(err)
#err /= np.sqrt(N)
return err.std()
In [36]:
def noiseEst(y, yhat, nnn):
res = y - yhat
res = res**2
res = res.sum()
N = len(y)
ddof = N / (N**(1./5) * nnn)
print 'ddof = {0}'.format(ddof)
res /= N - ddof
return res
In [37]:
def epsilonEst(x, y, nnn, w=None):
if w is not None:
x = np.dot(x, w.T)
neigh = KNeighborsRegressor(n_neighbors=nnn)
neigh.fit(x, y)
ypre = neigh.predict(x)
print 'rsq: {0}'.format(rsquare(y, ypre))
# sigma2 = noiseEst(y, ypre, nnn)
# print sigma2
# n = len(y)
# print 'n = {0}'.format(n)
# return 3 * np.sqrt(sigma2 * np.log(n) / n)
return noiseEst(y, ypre, nnn), sigmaEst(y, ypre)
In [38]:
epsilonEst(xin_stdzd.ix[rgs1.rzdu_in.ix[::].index], rgs1.rzdu_in.ix[::], 3, w=pca_mat[:5, :])
Out[38]:
In [26]:
max(abs(yin2.mean() - 3*(yin2.std())), abs(yin2.mean() + 3*(yin2.std())))
Out[26]:
In [148]:
neigh = KNeighborsRegressor(n_neighbors=6)
In [149]:
temp_resample = 10
neigh.fit(xin_stdzd.ix[yin2.ix[::temp_resample].index], yin2.ix[::temp_resample])
Out[149]:
In [150]:
temp = neigh.predict(xin_stdzd.ix[yin2.ix[::temp_resample].index])
In [153]:
sigma2 = noiseEst(yin2.ix[::temp_resample], temp)
print sigma2
In [156]:
n = len(yin2.ix[::temp_resample])
3 * np.sqrt(sigma2 * np.log(n) / n)
Out[156]:
In [146]:
rsquare(yin2.ix[::temp_resample], temp)
Out[146]:
In [147]:
plt.scatter(yin2.ix[::temp_resample], temp, facecolor='')
Out[147]:
In [ ]: