In [1]:

    
from __future__ import print_function

import os
import sys
# 使用insert 0即只使用github，避免交叉使用了pip安装的abupy，导致的版本不一致问题
sys.path.insert(0, os.path.abspath('../'))
import abupy
# 使用沙盒数据，目的是和书中一样的数据环境
abupy.env.enable_example_env_ipython()

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_context(rc={'figure.figsize': (14, 7) } )
figzize_me = figsize =(14, 7)
# import warnings; warnings.simplefilter('ignore')









    



only use pandas calc nd, please install talib!
/Users/Bailey/anaconda3/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The finance module has been deprecated in mpl 2.0 and will be removed in mpl 2.2. Please use the module mpl_finance instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)
enable example env will only read RomDataBu/df_kl.h5



In [2]:

    
print(sys.version)









    



3.6.0 |Anaconda 4.3.1 (x86_64)| (default, Dec 23 2016, 13:19:00) 
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)]

第3章量化工具-Numpy

abu量化系统github地址 (您的star是我的动力！)

abu量化文档教程ipython notebook

3.1 并行化思想与基础操作

3.1.1 并行化思想



In [2]:

    
# 如下方式引用numpy是numpy.org推荐的方式                     
import numpy as np



In [3]:

    
normal_list = range(10000)
%timeit [i**2 for i in normal_list]









    



100 loops, best of 3: 3.19 ms per loop



In [4]:

    
np_list = np.arange(10000)
%timeit np_list**2









    



The slowest run took 16.42 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 6.49 µs per loop



In [5]:

    
# 注意 * 3的操作被运行在每一个元素上
np_list = np.ones(5) * 3
print(np_list)
# 普通的列表把*3操作认为是整体性操作
normal_list = [1, 1, 1, 1, 1] * 3
normal_list, len(normal_list)









    



[ 3.  3.  3.  3.  3.]






    Out[5]:





([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 15)

3.1.2 初始化操作



In [6]:

    
# 100个0
np.zeros(100)
# shape：3行2列 全是0
np.zeros((3, 2))

# shape： 3行2列 全是1
np.ones((3, 2))
# shape：x=2, y=3, z=3 值随机
np.empty((2, 3, 3))

# 初始化序列与np_list一样的shape，值全为1
np.ones_like(np_list)
# 初始化序列与np_list一样的shape，值全为0
np.zeros_like(np_list)
# eye得到对角线全为1的单位矩阵
np.eye(3)









    Out[6]:





array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])



In [7]:

    
data = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr_np = np.array(data)
arr_np









    Out[7]:





array([[1, 2, 3, 4],
       [5, 6, 7, 8]])



In [8]:

    
np.linspace(0, 1, 10)









    Out[8]:





array([ 0.        ,  0.11111111,  0.22222222,  0.33333333,  0.44444444,
        0.55555556,  0.66666667,  0.77777778,  0.88888889,  1.        ])



In [9]:

    
# 200支股票
stock_cnt = 200
# 504个交易日
view_days = 504
# 生成服从正态分布：均值期望＝0，标准差＝1的序列
stock_day_change = np.random.standard_normal((stock_cnt, view_days))
# 使用沙盒数据，目的是和书中一样的数据环境，不需要注视掉
stock_day_change = np.load('../gen/stock_day_change.npy')
# 打印shape (200, 504) 200行504列
print(stock_day_change.shape)
# 打印出第一支只股票，头五个交易日的涨跌幅情况
print(stock_day_change[0:1, :5])









    



(200, 504)
[[ 0.38035486  0.12259674 -0.2851901  -0.00889681  0.45731945]]

3.1.3 索引选取和切片选择



In [10]:

    
# 0:2第一，第二支股票，0:5头五个交易日的涨跌幅数据
stock_day_change[0:2, 0:5]









    Out[10]:





array([[ 0.38035486,  0.12259674, -0.2851901 , -0.00889681,  0.45731945],
       [ 0.13380956, -0.49312626,  1.44701057, -1.03491806,  0.42295542]])



In [11]:

    
# -2:倒数一，第二支股票，-5:最后五个交易日的涨跌幅数据
stock_day_change[-2:, -5:]









    Out[11]:





array([[ 0.21652192, -0.03053515, -0.77747062, -1.19236603, -0.04788549],
       [-0.96380496,  2.03488293,  0.99338065, -0.92392477,  0.96930104]])



In [12]:

    
# tmp = a
tmp = stock_day_change[0:2, 0:5].copy() 
# a = b
stock_day_change[0:2, 0:5] = stock_day_change[-2:, -5:]
# b = tmp
stock_day_change[-2:, -5:] = tmp
# view result
stock_day_change[0:2, 0:5], stock_day_change[-2:, -5:]









    Out[12]:





(array([[ 0.21652192, -0.03053515, -0.77747062, -1.19236603, -0.04788549],
        [-0.96380496,  2.03488293,  0.99338065, -0.92392477,  0.96930104]]),
 array([[ 0.38035486,  0.12259674, -0.2851901 , -0.00889681,  0.45731945],
        [ 0.13380956, -0.49312626,  1.44701057, -1.03491806,  0.42295542]]))

3.1.4 数据转换与规整



In [13]:

    
print(stock_day_change[0:2, 0:5])
stock_day_change[0:2, 0:5].astype(int)









    



[[ 0.21652192 -0.03053515 -0.77747062 -1.19236603 -0.04788549]
 [-0.96380496  2.03488293  0.99338065 -0.92392477  0.96930104]]






    Out[13]:





array([[ 0,  0,  0, -1,  0],
       [ 0,  2,  0,  0,  0]])



In [14]:

    
# 2代表保留两位小数
np.around(stock_day_change[0:2, 0:5], 2)









    Out[14]:





array([[ 0.22, -0.03, -0.78, -1.19, -0.05],
       [-0.96,  2.03,  0.99, -0.92,  0.97]])



In [15]:

    
# 使用copy目的是不修改原始序列
tmp_test = stock_day_change[0:2, 0:5].copy()
# 将第一个元素改成nan
tmp_test[0][0] = np.nan
tmp_test









    Out[15]:





array([[        nan, -0.03053515, -0.77747062, -1.19236603, -0.04788549],
       [-0.96380496,  2.03488293,  0.99338065, -0.92392477,  0.96930104]])



In [16]:

    
tmp_test = np.nan_to_num(tmp_test)
tmp_test









    Out[16]:





array([[ 0.        , -0.03053515, -0.77747062, -1.19236603, -0.04788549],
       [-0.96380496,  2.03488293,  0.99338065, -0.92392477,  0.96930104]])

3.1.5 逻辑条件进行数据筛选



In [17]:

    
# 找出上述切片内涨幅超过0.5的股票时段, 通过输出结果你可以看到返回的是boolean的数组
mask = stock_day_change[0:2, 0:5] > 0.5
print(mask)









    



[[False False False False False]
 [False  True  True False  True]]



In [18]:

    
tmp_test = stock_day_change[0:2, 0:5].copy()
# 使用上述的mask数组筛选出符合条件的数组, 即中筛选mask中对应index值为True的
tmp_test[mask]









    Out[18]:





array([ 2.03488293,  0.99338065,  0.96930104])



In [19]:

    
tmp_test[tmp_test > 0.5] = 1
tmp_test









    Out[19]:





array([[ 0.21652192, -0.03053515, -0.77747062, -1.19236603, -0.04788549],
       [-0.96380496,  1.        ,  1.        , -0.92392477,  1.        ]])



In [20]:

    
tmp_test = stock_day_change[-2:, -5:]
print(tmp_test)
tmp_test[(tmp_test > 1) | (tmp_test < -1)]









    



[[ 0.38035486  0.12259674 -0.2851901  -0.00889681  0.45731945]
 [ 0.13380956 -0.49312626  1.44701057 -1.03491806  0.42295542]]






    Out[20]:





array([ 1.44701057, -1.03491806])

3.1.6 通用序列函数



In [21]:

    
# np.all判断序列中的所有元素是否全部是true, 即对bool序列进行与操作
# 本例实际判断stock_day_change[0:2, 0:5]中是否全是上涨的
np.all(stock_day_change[0:2, 0:5] > 0)









    Out[21]:





False



In [22]:

    
# np.any判断序列中是否有元素为true, 即对bool序列进行或操作
# 本例实际判断stock_day_change[0:2, 0:5]中是至少有一个是上涨的
np.any(stock_day_change[0:2, 0:5] > 0)









    Out[22]:





True



In [23]:

    
# 对两个序列对应的元素两两比较，maximum结果集取大,相对使用minimum为取小的结果集
np.maximum(stock_day_change[0:2, 0:5], stock_day_change[-2:, -5:])









    Out[23]:





array([[ 0.38035486,  0.12259674, -0.2851901 , -0.00889681,  0.45731945],
       [ 0.13380956,  2.03488293,  1.44701057, -0.92392477,  0.96930104]])



In [24]:

    
change_int = stock_day_change[0:2, 0:5].astype(int)
print(change_int)
# 序列中数值值唯一且不重复的值组成新的序列
np.unique(change_int)









    



[[ 0  0  0 -1  0]
 [ 0  2  0  0  0]]






    Out[24]:





array([-1,  0,  2])



In [25]:

    
# axis＝1
np.diff(stock_day_change[0:2, 0:5])









    Out[25]:





array([[-0.24705707, -0.74693547, -0.41489541,  1.14448054],
       [ 2.9986879 , -1.04150228, -1.91730542,  1.89322581]])



In [26]:

    
# 唯一区别 axis=0
np.diff(stock_day_change[0:2, 0:5], axis=0)









    Out[26]:





array([[-1.18032689,  2.06541808,  1.77085127,  0.26844126,  1.01718653]])



In [27]:

    
tmp_test = stock_day_change[-2:, -5:]
print(np.where(tmp_test > 0.5, 1, 0))









    



[[0 0 0 0 0]
 [0 0 1 0 0]]



In [28]:

    
print(np.where(tmp_test > 0.5, 1, tmp_test))









    



[[ 0.38035486  0.12259674 -0.2851901  -0.00889681  0.45731945]
 [ 0.13380956 -0.49312626  1.         -1.03491806  0.42295542]]



In [29]:

    
# 序列中的值大于0.5并且小于1的赋值为1，否则赋值为0
np.where(np.logical_and(tmp_test > 0.5, tmp_test < 1), 1, 0)









    Out[29]:





array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])



In [30]:

    
# 序列中的值大于0.5或者小于－0.5的赋值为1，否则赋值为0
np.where(np.logical_or(tmp_test > 0.5, tmp_test < -0.5), 1, 0)









    Out[30]:





array([[0, 0, 0, 0, 0],
       [0, 0, 1, 1, 0]])

3.1.7 数据本地序列化操作



In [31]:

    
stock_day_change = np.load('../gen/stock_day_change.npy')

np.save('../gen/stock_day_change', stock_day_change)

stock_day_change.shape









    Out[31]:





(200, 504)

3.2 统计概念与函数使用

3.2.1 统计基础函数使用



In [32]:

    
stock_day_change_four = stock_day_change[:4, :4]
stock_day_change_four









    Out[32]:





array([[ 0.38035486,  0.12259674, -0.2851901 , -0.00889681],
       [ 0.13380956, -0.49312626,  1.44701057, -1.03491806],
       [ 1.49695798,  1.17420526,  0.26125628,  0.70377972],
       [-1.57012465,  0.25266829,  1.14584289,  0.29308672]])



In [33]:

    
print('最大涨幅 {}'.format(np.max(stock_day_change_four, axis=1)))









    



最大涨幅 [ 0.38035486  1.44701057  1.49695798  1.14584289]



In [34]:

    
print('最大跌幅 {}'.format(np.min(stock_day_change_four, axis=1)))
print('振幅幅度 {}'.format(np.std(stock_day_change_four, axis=1)))
print('平均涨跌 {}'.format(np.mean(stock_day_change_four, axis=1)))









    



最大跌幅 [-0.2851901  -1.03491806  0.26125628 -1.57012465]
振幅幅度 [ 0.23989905  0.92537522  0.46843241  0.99049224]
平均涨跌 [ 0.05221617  0.01319395  0.90904981  0.03036831]



In [35]:

    
print('最大涨幅 {}'.format(np.max(stock_day_change_four, axis=0)))









    



最大涨幅 [ 1.49695798  1.17420526  1.44701057  0.70377972]



In [36]:

    
print('最大涨幅股票{}'.format(np.argmax(stock_day_change_four, axis=0)))









    



最大涨幅股票[2 2 1 2]



In [37]:

    
print('最大跌幅股票{}'.format(np.argmin(stock_day_change_four, axis=0)))









    



最大跌幅股票[3 1 0 1]



In [38]:

    
print('最大跌幅 {}'.format(np.min(stock_day_change_four, axis=0)))
print('振幅幅度 {}'.format(np.std(stock_day_change_four, axis=0)))
print('平均涨跌 {}'.format(np.mean(stock_day_change_four, axis=0)))









    



最大跌幅 [-1.57012465 -0.49312626 -0.2851901  -1.03491806]
振幅幅度 [ 1.09773969  0.59620404  0.69038879  0.64260976]
平均涨跌 [ 0.11024944  0.26408601  0.64222991 -0.01173711]

3.2.2 统计基础概念



In [39]:

    
a_investor = np.random.normal(loc=100, scale=50, size=(100, 1))
b_investor = np.random.normal(loc=100, scale=20, size=(100, 1))



In [40]:

    
# a交易者
print('a交易者期望{0:.2f}元, 标准差{1:.2f}, 方差{2:.2f}'.format(
    a_investor.mean(), a_investor.std(), a_investor.var()))

# b交易者
print('b交易者期望{0:.2f}元, 标准差{1:.2f}, 方差{2:.2f}'.format(
    b_investor.mean(), b_investor.std(), b_investor.var()))









    



a交易者期望102.68元, 标准差45.20, 方差2042.83
b交易者期望100.13元, 标准差18.62, 方差346.74



In [41]:

    
# a交易者期望
a_mean = a_investor.mean()
# a交易者标注差
a_std = a_investor.std()
# 收益绘制曲线
plt.plot(a_investor)
# 水平直线 上线
plt.axhline(a_mean + a_std, color='r')
# 水平直线 均值期望线
plt.axhline(a_mean, color='y')
# 水平直线 下线
plt.axhline(a_mean - a_std, color='g')









    Out[41]:





<matplotlib.lines.Line2D at 0x11a1b17f0>



In [42]:

    
b_mean = b_investor.mean()
b_std = b_investor.std()
# b交易者收益绘制曲线
plt.plot(b_investor)
# 水平直线 上线
plt.axhline(b_mean + b_std, color='r')
# 水平直线 均值期望线
plt.axhline(b_mean, color='y')
# 水平直线 下线
plt.axhline(b_mean - b_std, color='g')









    Out[42]:





<matplotlib.lines.Line2D at 0x11a5b5b70>

3.3 正态分布

3.3.1 正态分布基础概念



In [43]:

    
import scipy.stats as scs

# 均值期望
stock_mean = stock_day_change[0].mean()
# 标准差
stock_std = stock_day_change[0].std()
print('股票0 mean均值期望:{:.3f}'.format(stock_mean))
print('股票0 std振幅标准差:{:.3f}'.format(stock_std))

# 绘制股票0的直方图
plt.hist(stock_day_change[0], bins=50, normed=True)

# linspace从股票0 最小值－> 最大值生成数据
fit_linspace = np.linspace(stock_day_change[0].min(),
                           stock_day_change[0].max())

# 概率密度函数(PDF，probability density function)
# 由均值，方差，来描述曲线，使用scipy.stats.norm.pdf生成拟合曲线
pdf = scs.norm(stock_mean, stock_std).pdf(fit_linspace)
# plot x, y
plt.plot(fit_linspace, pdf, lw=2, c='r')









    



股票0 mean均值期望:-0.020
股票0 std振幅标准差:1.007






    Out[43]:





[<matplotlib.lines.Line2D at 0x11a4222e8>]



In [44]:

    
pdf









    Out[44]:





array([ 0.0054162 ,  0.00765058,  0.01065132,  0.01461578,  0.0197674 ,
        0.02635034,  0.03462041,  0.04483193,  0.05722053,  0.07198226,
        0.08925001,  0.10906873,  0.1313716 ,  0.15595953,  0.18248684,
        0.21045555,  0.23922051,  0.26800671,  0.29593894,  0.32208298,
        0.3454957 ,  0.36528069,  0.38064488,  0.39095113,  0.39576206,
        0.39487083,  0.38831588,  0.3763782 ,  0.35956132,  0.33855613,
        0.3141938 ,  0.28739138,  0.25909502,  0.2302256 ,  0.20163103,
        0.17404851,  0.14807866,  0.12417204,  0.10262765,  0.08360152,
        0.06712328,  0.05311796,  0.04143036,  0.0318497 ,  0.02413243,
        0.01802213,  0.0132654 ,  0.00962374,  0.00688139,  0.00484974])

3.3.2 实例1：正态分布买入策略



In [45]:

    
# 保留后50天的随机数据作为策略验证数据
keep_days = 50
# 统计前454, 切片切出0-454day，view_days = 504
stock_day_change_test = stock_day_change[:stock_cnt,
                        0:view_days - keep_days]
# 打印出前454跌幅最大的三支，总跌幅通过np.sum计算，np.sort对结果排序
print(np.sort(np.sum(stock_day_change_test, axis=1))[:3])
# 使用np.argsort针对股票跌幅进行排序，返回序号，即符合买入条件的股票序号
stock_lower_array = np.argsort(np.sum(stock_day_change_test, axis=1))[:3]
# 输符合买入条件的股票序号
stock_lower_array









    



[-63.3678566  -58.85378699 -45.36941461]






    Out[45]:





array([109, 132,  53])



In [46]:

    
def show_buy_lower(stock_ind):
    """
    :param stock_ind: 股票序号,即在stock_day_change中的位置
    :return:
    """
    # 设置一个一行两列的可视化图表
    _, axs = plt.subplots(nrows=1, ncols=2, figsize=(16, 5))
    # view_days504 - keep_days50 = 454
    # 绘制前454天股票走势图，np.cumsum()：序列连续求和
    axs[0].plot(np.arange(0, view_days - keep_days),
                stock_day_change_test[stock_ind].cumsum())

    # [view_days504 - keep_days50 = 454 : view_days504]
    # 从第454天开始到504天的股票走势
    cs_buy = stock_day_change[stock_ind][
             view_days - keep_days:view_days].cumsum()

    # 绘制从第454天到504天股票走势图
    axs[1].plot(np.arange(view_days - keep_days, view_days), cs_buy)
    # 返回从第454天开始到第504天计算盈亏的盈亏序列的最后一个值
    return cs_buy[-1]



In [47]:

    
# 最后输出的盈亏比例
profit = 0
# 跌幅最大的三支遍历序号
for stock_ind in stock_lower_array:
    # profit即三支股票从第454天买入开始计算，直到最后一天的盈亏比例
    profit += show_buy_lower(stock_ind)

# str.format 支持{:.2f}形式保留两位小数
print('买入第 {} 支股票，从第454个交易日开始持有盈亏:{:.2f}%'.format(
    stock_lower_array, profit))









    



买入第 [109 132  53] 支股票，从第454个交易日开始持有盈亏:16.43%

3.4 伯努利分布

3.4.1 伯努利分布概念

3.4.2 实例2：如何在交易中获取优势



In [48]:

    
# 设置100个赌徒
gamblers = 100

def casino(win_rate, win_once=1, loss_once=1, commission=0.01):
    """
        赌场：简单设定每个赌徒一共有1000000一共想在赌场玩10000000次，
        但是你要是没钱了也别想玩了
        win_rate:   输赢的概率
        win_once:   每次赢的钱数
        loss_once:  每次输的钱数
        commission: 手续费这里简单的设置了0.01 1%
    """
    my_money = 1000000
    play_cnt = 10000000
    commission = commission
    for _ in np.arange(0, play_cnt):
        # 使用伯努利分布根据win_rate来获取输赢
        w = np.random.binomial(1, win_rate)
        if w:
            # 赢了 +win_once
            my_money += win_once
        else:
            # 输了 -loss_once
            my_money -= loss_once
        # 手续费
        my_money -= commission
        if my_money <= 0:
            # 没钱就别玩了，不赊账
            break
    return my_money



In [49]:

    
# 如果有numba使用numba进行加速, 这个加速效果非常明显，不使用numba非常非常非常慢
import numba as nb
casino = nb.jit(casino)



In [50]:

    
# 100个赌徒进场天堂赌场，胜率0.5，赔率1，还没手续费 
heaven_moneys = [casino(0.5, commission=0) for _ in
                 np.arange(0, gamblers)]



In [51]:

    
# 100个赌徒进场开始，胜率0.4，赔率1，没手续费 
cheat_moneys = [casino(0.4, commission=0) for _ in
                np.arange(0, gamblers)]



In [52]:

    
# 100个赌徒进场开始，胜率0.5，赔率1，手续费0.01 
commission_moneys = [casino(0.5, commission=0.01) for _ in
                     np.arange(0, gamblers)]



In [53]:

    
_ = plt.hist(heaven_moneys, bins=30)



In [54]:

    
_ = plt.hist(cheat_moneys, bins=30)



In [55]:

    
_ = plt.hist(commission_moneys, bins=30)



In [56]:

    
# 100个赌徒进场开始，胜率0.5，赔率1.04，手续费0.01
moneys = [casino(0.5, commission=0.01, win_once=1.02, loss_once=0.98)
          for _ in np.arange(0, gamblers)]

_ = plt.hist(moneys, bins=30)



In [57]:

    
# 100个赌徒进场开始，胜率0.45，赔率1.04，手续费0.01
moneys = [casino(0.45, commission=0.01, win_once=1.02, loss_once=0.98)
          for _ in np.arange(0, gamblers)]

_ = plt.hist(moneys, bins=30)



In [ ]:

第3章 量化工具-Numpy

3.1 并行化思想与基础操作

3.1.1 并行化思想

3.1.2 初始化操作

3.1.3 索引选取和切片选择

3.1.4 数据转换与规整

3.1.5 逻辑条件进行数据筛选

3.1.6 通用序列函数

3.1.7 数据本地序列化操作

3.2 统计概念与函数使用

3.2.1 统计基础函数使用

3.2.2 统计基础概念

3.3 正态分布

3.3.1 正态分布基础概念

3.3.2 实例1：正态分布买入策略

3.4 伯努利分布

3.4.1 伯努利分布概念

3.4.2 实例2：如何在交易中获取优势

第3章量化工具-Numpy