• 比较不同组合组合优化器在不同规模问题上的性能;

  • 下面的结果主要比较alphamindpython中其他优化器的性能差别,我们将尽可能使用cvxopt中的优化器,其次选择scipy

  • 由于scipyashare_ex上面性能太差,所以一般忽略scipy在这个股票池上的表现;

  • 时间单位都是毫秒。

  • 请在环境变量中设置DB_URI指向数据库


In [1]:
import os
import timeit
import numpy as np
import pandas as pd
import cvxpy
from alphamind.api import *
from alphamind.portfolio.linearbuilder import linear_builder
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder

pd.options.display.float_format = '{:,.2f}'.format

0. 数据准备



In [2]:
ref_date = '2018-02-08'
u_names = ['sh50', 'hs300', 'zz500', 'zz800', 'zz1000', 'ashare_ex']
b_codes = [16, 300, 905, 906, 852, None]
risk_model = 'short'
factor = 'EPS'
lb = 0.0
ub = 0.1
data_source = os.environ['DB_URI']
engine = SqlEngine(data_source)

universes = [Universe(u_name) for u_name in u_names]
codes_set = [engine.fetch_codes(ref_date, universe=universe) for universe in universes]
data_set = [engine.fetch_data(ref_date, factor, codes, benchmark=b_code, risk_model=risk_model) for codes, b_code in zip(codes_set, b_codes)]

1. 线性优化(带线性限制条件)



In [3]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])

number = 1

for u_name, sample_data in zip(u_names, data_set):
    factor_data = sample_data['factor']
    er = factor_data[factor].values
    n = len(er)
    lbound = np.ones(n) * lb
    ubound = np.ones(n) * ub
    
    risk_constraints = np.ones((n, 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = linear_builder(er, lbound, ubound, risk_constraints, risk_target)
    elasped_time1 = timeit.timeit("linear_builder(er, lbound, ubound, risk_constraints, risk_target)", number=number, globals=globals()) / number * 1000

    A_eq = risk_constraints.T
    b_eq = np.array([1.])
    
    w = cvxpy.Variable(n)
    curr_risk_exposure = w * risk_constraints
    
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0]]
    
    objective = cvxpy.Minimize(-w.T * er)
    prob = cvxpy.Problem(objective, constraints)
    
    prob.solve(solver='ECOS')
    elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
                                  number=number, globals=globals()) / number * 1000

    np.testing.assert_almost_equal(x1 @ er, np.array(w.value).flatten() @ er, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")


2019-02-10 00:39:31,620 - ALPHA_MIND - INFO - sh50 is finished
2019-02-10 00:39:31,648 - ALPHA_MIND - INFO - hs300 is finished
2019-02-10 00:39:31,682 - ALPHA_MIND - INFO - zz500 is finished
2019-02-10 00:39:31,738 - ALPHA_MIND - INFO - zz800 is finished
2019-02-10 00:39:31,789 - ALPHA_MIND - INFO - zz1000 is finished
D:\ProgramData\anaconda3\lib\site-packages\cvxpy-1.0.10-py3.6-win-amd64.egg\cvxpy\problems\problem.py:614: RuntimeWarning: overflow encountered in long_scalars
  if self.max_big_small_squared < big*small**2:
D:\ProgramData\anaconda3\lib\site-packages\cvxpy-1.0.10-py3.6-win-amd64.egg\cvxpy\problems\problem.py:615: RuntimeWarning: overflow encountered in long_scalars
  self.max_big_small_squared = big*small**2
2019-02-10 00:39:31,962 - ALPHA_MIND - INFO - ashare_ex is finished

In [4]:
df


Out[4]:
sh50 hs300 zz500 zz800 zz1000 ashare_ex
cvxpy 4.58 6.41 9.76 14.33 19.88 61.95
alphamind 1.35 3.50 4.88 9.03 4.97 27.62

In [5]:
prob.value


Out[5]:
-5.262849995706494

2. 线性优化(带L1限制条件)



In [6]:
from cvxpy import pnorm

df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind (clp simplex)', 'alphamind (clp interior)', 'alphamind (ecos)'])
turn_over_target = 0.5
number = 1

for u_name, sample_data in zip(u_names, data_set):
    factor_data = sample_data['factor']
    er = factor_data[factor].values
    n = len(er)
    lbound = np.ones(n) * lb
    ubound = np.ones(n) * ub
    
    if 'weight' in factor_data:
        current_position = factor_data.weight.values
    else:
        current_position = np.ones_like(er) / len(er)

    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))

    status, y, x1 = linear_builder(er,
                                   lbound,
                                   ubound,
                                   risk_constraints,
                                   risk_target,
                                   turn_over_target=turn_over_target,
                                   current_position=current_position,
                                   method='interior')
    elasped_time1 = timeit.timeit("""linear_builder(er,
                                                    lbound,
                                                    ubound,
                                                    risk_constraints,
                                                    risk_target,
                                                    turn_over_target=turn_over_target,
                                                    current_position=current_position,
                                                    method='interior')""", number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    curr_risk_exposure = risk_constraints.T @ w
    
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0],
                   pnorm(w - current_position, 1) <= turn_over_target]
    
    objective = cvxpy.Minimize(-w.T * er)
    prob = cvxpy.Problem(objective, constraints)
    
    prob.solve(solver='ECOS')
    elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
                                  number=number, globals=globals()) / number * 1000
    
    status, y, x2 = linear_builder(er,
                                   lbound,
                                   ubound,
                                   risk_constraints,
                                   risk_target,
                                   turn_over_target=turn_over_target,
                                   current_position=current_position,
                                   method='simplex')
    elasped_time3 = timeit.timeit("""linear_builder(er,
                                                    lbound,
                                                    ubound,
                                                    risk_constraints,
                                                    risk_target,
                                                    turn_over_target=turn_over_target,
                                                    current_position=current_position,
                                                    method='simplex')""", number=number, globals=globals()) / number * 1000
    
    status, y, x3 = linear_builder(er,
                                   lbound,
                                   ubound,
                                   risk_constraints,
                                   risk_target,
                                   turn_over_target=turn_over_target,
                                   current_position=current_position,
                                   method='ecos')
    elasped_time4 = timeit.timeit("""linear_builder(er,
                                                    lbound,
                                                    ubound,
                                                    risk_constraints,
                                                    risk_target,
                                                    turn_over_target=turn_over_target,
                                                    current_position=current_position,
                                                    method='ecos')""", number=number, globals=globals()) / number * 1000
    
    
    np.testing.assert_almost_equal(x1 @ er, np.array(w.value).flatten() @ er, 4)
    np.testing.assert_almost_equal(x2 @ er, np.array(w.value).flatten() @ er, 4)
    np.testing.assert_almost_equal(x3 @ er, np.array(w.value).flatten() @ er, 4)

    df.loc['alphamind (clp interior)', u_name] = elasped_time1
    df.loc['alphamind (clp simplex)', u_name] = elasped_time3
    df.loc['alphamind (ecos)', u_name] = elasped_time4
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")


2019-02-10 00:39:32,075 - ALPHA_MIND - INFO - sh50 is finished
2019-02-10 00:39:32,209 - ALPHA_MIND - INFO - hs300 is finished
2019-02-10 00:39:32,469 - ALPHA_MIND - INFO - zz500 is finished
2019-02-10 00:39:32,998 - ALPHA_MIND - INFO - zz800 is finished
2019-02-10 00:39:33,755 - ALPHA_MIND - INFO - zz1000 is finished
2019-02-10 00:39:40,815 - ALPHA_MIND - INFO - ashare_ex is finished

In [7]:
df


Out[7]:
sh50 hs300 zz500 zz800 zz1000 ashare_ex
cvxpy 8.49 13.78 21.18 33.32 43.27 176.12
alphamind (clp simplex) 0.63 8.76 26.53 65.43 112.34 1,585.36
alphamind (clp interior) 19.95 28.48 46.78 114.66 170.22 1,462.77
alphamind (ecos) 10.03 14.87 33.66 49.57 52.49 340.38

3. Mean - Variance 优化 (无约束)



In [8]:
from cvxpy import *

df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)

    bm = np.zeros(n)
    lbound = -np.ones(n) * np.inf
    ubound = np.ones(n) * np.inf
    risk_model = dict(cov=None, factor_cov=risk_cov/10000., factor_loading=risk_exposure, idsync=(special_risk**2)/10000.)

    status, y, x1 = mean_variance_builder(er,
                                          risk_model,
                                          bm,
                                          lbound,
                                          ubound,
                                          None,
                                          None,
                                          lam=1)
    elasped_time1 = timeit.timeit("""mean_variance_builder(er,
                                          risk_model,
                                          bm,
                                          lbound,
                                          ubound,
                                          None,
                                          None,
                                          lam=1)""",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = sum_squares(multiply(special_risk / 100., w)) + quad_form((w.T * risk_exposure).T, risk_cov / 10000.)
    objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
    prob = cvxpy.Problem(objective)
    prob.solve(solver='ECOS')
    elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
                                  number=number, globals=globals()) / number * 1000
    
    u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er + 0.5 * x2 @ sec_cov @ x2
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")


2019-02-10 00:39:40,951 - ALPHA_MIND - INFO - sh50 is finished
2019-02-10 00:39:41,217 - ALPHA_MIND - INFO - hs300 is finished
2019-02-10 00:39:41,592 - ALPHA_MIND - INFO - zz500 is finished
2019-02-10 00:39:42,175 - ALPHA_MIND - INFO - zz800 is finished
2019-02-10 00:39:42,796 - ALPHA_MIND - INFO - zz1000 is finished
2019-02-10 00:39:45,458 - ALPHA_MIND - INFO - ashare_ex is finished

In [9]:
df


Out[9]:
sh50 hs300 zz500 zz800 zz1000 ashare_ex
cvxpy 19.52 54.76 83.50 104.39 143.43 550.83
alphamind 31.92 70.01 95.37 156.45 154.81 623.11

4. Mean - Variance 优化 (Box约束)



In [10]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)

    bm = np.zeros(n)
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.1
    
    risk_model = dict(cov=None, factor_cov=risk_cov/10000., factor_loading=risk_exposure, idsync=(special_risk**2)/10000.)

    status, y, x1 = mean_variance_builder(er,
                                          risk_model,
                                          bm,
                                          lbound,
                                          ubound,
                                          None,
                                          None)
    elasped_time1 = timeit.timeit("""mean_variance_builder(er,
                                          risk_model,
                                          bm,
                                          lbound,
                                          ubound,
                                          None,
                                          None)""",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = sum_squares(multiply(special_risk / 100., w)) + quad_form((w.T * risk_exposure).T, risk_cov / 10000.)
    objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
    constraints = [w >= lbound,
                   w <= ubound]
    prob = cvxpy.Problem(objective, constraints)
    prob.solve(solver='ECOS')
    elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
                                  number=number, globals=globals()) / number * 1000
    
    u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er + 0.5 * x2 @ sec_cov @ x2
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")


2019-02-10 00:39:45,561 - ALPHA_MIND - INFO - sh50 is finished
2019-02-10 00:39:45,704 - ALPHA_MIND - INFO - hs300 is finished
2019-02-10 00:39:45,943 - ALPHA_MIND - INFO - zz500 is finished
2019-02-10 00:39:46,729 - ALPHA_MIND - INFO - zz800 is finished
2019-02-10 00:39:47,099 - ALPHA_MIND - INFO - zz1000 is finished
2019-02-10 00:39:49,245 - ALPHA_MIND - INFO - ashare_ex is finished

In [11]:
df


Out[11]:
sh50 hs300 zz500 zz800 zz1000 ashare_ex
cvxpy 19.41 49.60 75.66 337.10 115.25 392.13
alphamind 7.35 18.63 34.55 52.96 46.70 160.48

5. Mean - Variance 优化 (Box约束以及线性约束)



In [12]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1

for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)
    
    bm = np.zeros(n)
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.1
    
    risk_constraints = np.ones((len(er), 1))
    risk_target = (np.array([1.]), np.array([1.]))
    
    risk_model = dict(cov=None, factor_cov=risk_cov/10000., factor_loading=risk_exposure, idsync=(special_risk**2)/10000.)

    status, y, x1 = mean_variance_builder(er,
                                          risk_model,
                                          bm,
                                          lbound,
                                          ubound,
                                          risk_constraints,
                                          risk_target)
    elasped_time1 = timeit.timeit("""mean_variance_builder(er,
                                                           risk_model,
                                                           bm,
                                                           lbound,
                                                           ubound,
                                                           risk_constraints,
                                                           risk_target)""",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = sum_squares(multiply(special_risk / 100., w)) + quad_form((w.T * risk_exposure).T, risk_cov / 10000.)
    objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
    curr_risk_exposure = risk_constraints.T @ w
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0]]
    prob = cvxpy.Problem(objective, constraints)
    prob.solve(solver='ECOS')
    elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
                                  number=number, globals=globals()) / number * 1000

    u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er + 0.5 * x2 @ sec_cov @ x2
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")


2019-02-10 00:39:49,364 - ALPHA_MIND - INFO - sh50 is finished
2019-02-10 00:39:49,480 - ALPHA_MIND - INFO - hs300 is finished
2019-02-10 00:39:49,680 - ALPHA_MIND - INFO - zz500 is finished
2019-02-10 00:39:50,021 - ALPHA_MIND - INFO - zz800 is finished
2019-02-10 00:39:50,458 - ALPHA_MIND - INFO - zz1000 is finished
2019-02-10 00:39:52,248 - ALPHA_MIND - INFO - ashare_ex is finished

In [13]:
df


Out[13]:
sh50 hs300 zz500 zz800 zz1000 ashare_ex
cvxpy 19.20 39.56 60.21 104.13 153.07 628.02
alphamind 10.89 16.91 43.05 47.43 45.44 146.20

6. 线性优化(带二次限制条件)



In [14]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1
target_vol = 0.5


for u_name, sample_data in zip(u_names, data_set):
    all_styles = risk_styles + industry_styles + ['COUNTRY']
    factor_data = sample_data['factor']
    risk_cov = sample_data['risk_cov'][all_styles].values
    risk_exposure = factor_data[all_styles].values
    special_risk = factor_data.srisk.values
    sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
    er = factor_data[factor].values
    n = len(er)
    
    if 'weight' in factor_data:
        bm = factor_data.weight.values
    else:
        bm = np.ones_like(er) / n
    lbound = np.zeros(n)
    ubound = np.ones(n) * 0.1
    
    risk_constraints = np.ones((n, 1))
    risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))
    risk_model = dict(cov=None, factor_cov=risk_cov/10000., factor_loading=risk_exposure, idsync=(special_risk**2)/10000.)

    status, y, x1 = target_vol_builder(er,
                                       risk_model,
                                       bm,
                                       lbound,
                                       ubound,
                                       risk_constraints,
                                       risk_target,
                                       vol_target=target_vol)
    elasped_time1 = timeit.timeit("""target_vol_builder(er,
                                       risk_model,
                                       bm,
                                       lbound,
                                       ubound,
                                       risk_constraints,
                                       risk_target,
                                       vol_target=target_vol)""",
                                  number=number, globals=globals()) / number * 1000
    
    w = cvxpy.Variable(n)
    risk = sum_squares(multiply(special_risk / 100., w)) + quad_form((w.T * risk_exposure).T, risk_cov / 10000.)
    objective = cvxpy.Minimize(-w.T * er)
    curr_risk_exposure = risk_constraints.T @ w
    constraints = [w >= lbound,
                   w <= ubound,
                   curr_risk_exposure == risk_target[0],
                   risk <= target_vol * target_vol]
    prob = cvxpy.Problem(objective, constraints)
    prob.solve(solver='ECOS')
    elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
                                  number=number, globals=globals()) / number * 1000

    u1 = -x1 @ er
    x2 = np.array(w.value).flatten()
    u2 =  -x2 @ er
    
    np.testing.assert_array_almost_equal(u1, u2, 4)

    df.loc['alphamind', u_name] = elasped_time1
    df.loc['cvxpy', u_name] = elasped_time2
    alpha_logger.info(f"{u_name} is finished")


2019-02-10 00:39:52,370 - ALPHA_MIND - INFO - sh50 is finished
2019-02-10 00:39:52,532 - ALPHA_MIND - INFO - hs300 is finished
2019-02-10 00:39:52,798 - ALPHA_MIND - INFO - zz500 is finished
2019-02-10 00:39:53,099 - ALPHA_MIND - INFO - zz800 is finished
2019-02-10 00:39:53,589 - ALPHA_MIND - INFO - zz1000 is finished
2019-02-10 00:39:55,894 - ALPHA_MIND - INFO - ashare_ex is finished

In [15]:
df


Out[15]:
sh50 hs300 zz500 zz800 zz1000 ashare_ex
cvxpy 19.62 45.64 58.58 92.44 152.57 831.04
alphamind 13.23 26.67 58.33 51.23 87.07 223.06

In [ ]: