In [ ]:
import os
import h5py
import numpy as np
import pandas as pd
from sklearn import preprocessing
from scipy.stats import mstats
from talib2feature import get_factors
from high2low import high2low
def get_factors_dataset(inputs):
Index = inputs.index
High = inputs.high.values
Low = inputs.low.values
Close = inputs.close.values
Open = inputs.open.values
Volume = inputs.volume.values
return get_factors(
Index, Open, Close, High, Low, Volume,
rolling=188, drop=True, normalization=False)
In [ ]:
universe_list = os.listdir("Access/")
dataset = {}
for i in universe_list:
stk_code = i[6:17].replace("_", ".")
path = "Access/{}".format(i)
min_quotes = pd.read_hdf(path)
min_quotes = high2low(min_quotes, "5min")
fac = get_factors_dataset(min_quotes)
dataset[stk_code] = fac
dataset = pd.Panel(dataset)
dataset = dataset.transpose([2, 1, 0])
tradeDays = pd.read_hdf("tradeDays.h5")
dataset_universe = pd.read_hdf("universe_SH50.h5")
In [ ]:
DataSet = h5py.File("dataset_factors.h5")
dset = DataSet.create_dataset("talib_factors", [468, 50, 50, 58, 16], chunks=True)
for fth, f in enumerate(dataset.items):
print(f)
fac = dataset[f]
# 因子矩阵标准化
buffer_scale = []
for i in range(23, 491):
# time range
date = tradeDays[i]
start_date = tradeDays[i-16]
end_date = tradeDays[i-1]
# universe
stk = dataset_universe.loc[date].tolist()
# factor scale
tmp = fac.loc[start_date:end_date, stk]
tmp = mstats.winsorize(tmp, limits=0.05, axis=1)
tmp = preprocessing.scale(tmp, axis=1)
buffer_scale.append(tmp)
buffer_scale = np.stack(buffer_scale, axis=0)
buffer_scale = buffer_scale.reshape([468, 16, 50, 50])
buffer_scale = buffer_scale.transpose([0, 3, 2, 1])
dset[:, :, :, fth, :] = buffer_scale
DataSet.close()
In [1]:
import os
import h5py
import numpy as np
import pandas as pd
In [2]:
# DataSet for talib factors image
# Time Uinverse Minutes Factors Days
dataset = h5py.File("dataset_factors.h5")
dataset = dataset["talib_factors"]
# 交易日历
tradeDays = pd.read_hdf("tradeDays.h5").iloc[23:]
tradeDays.reset_index(drop=True, inplace=True)
# 股票池
dataset_universe = pd.read_hdf("universe_SH50.h5")
In [5]:
class Terminal(object):
def __init__(self):
self.factors = dataset
self.tradeDays = tradeDays
self.universe = dataset_universe
def step(self, step):
day = self.tradeDays[step]
return self.factors[step], self.universe.loc[day].tolist()
def reset(self):
return self.factors[0], self.universe.iloc[0].tolist()
In [ ]: