In [ ]:
import os
import h5py
import numpy as np
import pandas as pd
from sklearn import preprocessing
from scipy.stats import mstats
from talib2feature import get_factors
from high2low import high2low


def get_factors_dataset(inputs):
    Index = inputs.index
    High = inputs.high.values
    Low = inputs.low.values
    Close = inputs.close.values
    Open = inputs.open.values
    Volume = inputs.volume.values
    return get_factors(
        Index, Open, Close, High, Low, Volume, 
        rolling=188, drop=True, normalization=False)

In [ ]:
universe_list = os.listdir("Access/")
dataset = {}
for i in universe_list:
    stk_code = i[6:17].replace("_", ".")
    path = "Access/{}".format(i)
    min_quotes = pd.read_hdf(path)
    min_quotes = high2low(min_quotes, "5min")
    fac = get_factors_dataset(min_quotes)
    dataset[stk_code] = fac
    
dataset = pd.Panel(dataset)

dataset = dataset.transpose([2, 1, 0])
tradeDays = pd.read_hdf("tradeDays.h5")
dataset_universe = pd.read_hdf("universe_SH50.h5")

In [ ]:
DataSet = h5py.File("dataset_factors.h5")
dset = DataSet.create_dataset("talib_factors", [468, 50, 50, 58, 16], chunks=True)
for fth, f in enumerate(dataset.items):
    print(f)
    fac = dataset[f]
    
    
    # 因子矩阵标准化
    buffer_scale = []
    for i in range(23, 491):
        # time range
        date = tradeDays[i]
        start_date = tradeDays[i-16]
        end_date = tradeDays[i-1]
        # universe
        stk = dataset_universe.loc[date].tolist()    
        # factor scale
        tmp = fac.loc[start_date:end_date, stk]
        tmp = mstats.winsorize(tmp, limits=0.05, axis=1)    
        tmp = preprocessing.scale(tmp, axis=1)
        buffer_scale.append(tmp)
    buffer_scale = np.stack(buffer_scale, axis=0)
    buffer_scale = buffer_scale.reshape([468, 16, 50, 50])
    buffer_scale = buffer_scale.transpose([0, 3, 2, 1])
    dset[:, :, :, fth, :] = buffer_scale
    
DataSet.close()

行情终端


In [1]:
import os 
import h5py
import numpy as np
import pandas as pd

In [2]:
# DataSet for talib factors image
# Time Uinverse Minutes Factors Days
dataset = h5py.File("dataset_factors.h5")
dataset = dataset["talib_factors"]

# 交易日历
tradeDays = pd.read_hdf("tradeDays.h5").iloc[23:]
tradeDays.reset_index(drop=True, inplace=True)

# 股票池
dataset_universe = pd.read_hdf("universe_SH50.h5")

In [5]:
class Terminal(object):
    def __init__(self):
        self.factors = dataset
        self.tradeDays = tradeDays
        self.universe = dataset_universe
        
    def step(self, step):
        day = self.tradeDays[step]
        return self.factors[step], self.universe.loc[day].tolist()
    
    def reset(self):
        return self.factors[0],  self.universe.iloc[0].tolist()

In [ ]: