In [1]:
from sys import path
path.append('/home/bingnan/ecworkspace/HFT1')

In [2]:
%matplotlib inline

In [3]:
from init import *

In [4]:
_xin_mean = xin.mean(axis=0)
_xin_std = xin.std(axis=0)
xin_stdzd = (xin - _xin_mean) / _xin_std
xout_stdzd = (xout - _xin_mean) / _xin_std
xtest_stdzd = (xtest - _xin_mean) / _xin_std

In [5]:
yin2 = yin.ix[::50]
print len(yin2)
yout2 = yout.ix[::15]
print len(yout2)


10448
11609

In [60]:
def Lag_and_Corr(x, y, max_lag=50):
    # max_lag = 50
    lag_arr = range(-max_lag, max_lag, 1)
    corr_arr = np.zeros_like(lag_arr, dtype=float)
    for i, lag in enumerate(lag_arr):
        x_lag = x.shift(periods=lag, axis=0)
        x_with_y = pd.concat([x_lag, y], axis=1)
        x_with_y.dropna(inplace=True)
        corr = np.corrcoef(x_with_y.values.T)[0, 1]
        corr_arr[i] = corr
    return (lag_arr, corr_arr)

In [77]:
%matplotlib inline

In [79]:
for i in range(80):
    lag_arr, corr_arr = Lag_and_Corr(xin_stdzd.ix[:, i], yin, 10)
    plt.plot(lag_arr, corr_arr, marker='D')
    plt.ylim([-.3, .3])
    plt.xlabel('periods (parameter in pd.shift func)')
    plt.ylabel('Coef. of Corr')
    plt.title('X%d' %i)
    plt.savefig('./lag_and_corr/lag_and_corr_X%d' % i)
    plt.close()
    print '%d done' % i


0 done
1 done
2 done
3 done
4 done
5 done
6 done
7 done
8 done
9 done
10 done
11 done
12 done
13 done
14 done
15 done
16 done
17 done
18 done
19 done
20 done
21 done
22 done
23 done
24 done
25 done
26 done
27 done
28 done
29 done
30 done
31 done
32 done
33 done
34 done
35 done
36 done
37 done
38 done
39 done
40 done
41 done
42 done
43 done
44 done
45 done
46 done
47 done
48 done
49 done
50 done
51 done
52 done
53 done
54 done
55 done
56 done
57 done
58 done
59 done
60 done
61 done
62 done
63 done
64 done
65 done
66 done
67 done
68 done
69 done
70 done
71 done
72 done
73 done
74 done
75 done
76 done
77 done
78 done
79 done

In [65]:
plt.plot(lag_arr, corr_arr)


Out[65]:
[<matplotlib.lines.Line2D at 0x7f63971c3810>]

In [74]:
temp = xin_stdzd.ix[:30, 3].copy()
pd.concat([temp, temp.shift(periods=1, axis=0)], axis=1)


Out[74]:
x3 x3
0 0.657299 NaN
1 0.657299 0.657299
2 0.568547 0.657299
3 0.367458 0.568547
4 0.367458 0.367458
5 0.392092 0.367458
6 0.381502 0.392092
7 0.381502 0.381502
8 0.379948 0.381502
9 0.379948 0.379948
10 0.324574 0.379948
11 0.313408 0.324574
12 0.313408 0.313408
13 0.303281 0.313408
14 0.315435 0.303281
15 0.315435 0.315435
16 0.265364 0.315435
17 0.267029 0.265364
18 0.267029 0.267029
19 0.204979 0.267029
20 0.140692 0.204979
21 0.140692 0.140692
22 0.184308 0.140692
23 0.184308 0.184308
24 0.348742 0.184308
25 0.354239 0.348742
26 0.354239 0.354239
27 0.553845 0.354239
28 0.698666 0.553845
29 0.665205 0.698666
30 1.152093 0.665205

In [28]:
temp1.dropna(inplace=True)

In [31]:
np.corrcoef(temp1.values.T)


Out[31]:
array([[ 1.        , -0.00128591],
       [-0.00128591,  1.        ]])

In [ ]: