In [1]:
# 啟動互動式繪圖環境
%pylab inline
In [2]:
# 引入相依套件
import collections
import math
import numpy as np
import pandas as pd
from numpy import random
import matplotlib.pyplot as plt
# 引入檔案
# 資料來源 3008 大立光 2012-8/1 ~ 2014-12/09
# http://www.twse.com.tw/ch/trading/exchange/STOCK_DAY/genpage/Report201412/201412_F3_1_8_3008.php?STK_NO=3008&myear=2014&mmon=12
datapath = '/Users/wy/Desktop/2498.txt'
data = pd.read_csv(datapath)
In [3]:
data.head()
Out[3]:
In [4]:
# data詳細資料 總數,平均數,標準差...
data.describe()
Out[4]:
In [5]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(data['Close'])
ax.set_title('Close')
Out[5]:
In [6]:
# data[data['Close'] < 3]
In [7]:
# 技術分析資料來源
# http://hymar.myweb.hinet.net/study/stock/theory/
In [8]:
# Rise Ratio label
def RRlabel(data):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size-1)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止 第一筆data沒有更舊的
if item >=0:
# (今日收盤價 - 昨日收盤價)/昨日收盤價
tmp = (data['Close'][item]-data['Close'][item+1])/data['Close'][item+1]*100
if tmp > 0:
tmp = 1
else:
tmp = -1
# if item >=0:
# # (今日收盤價 - 昨日收盤價)/昨日收盤價
# tmp = (data['Close'][item]-data['Close'][item+1])/data['Close'][item+1]*100
# if tmp > 0. and tmp < 2.:
# tmp = 1
# elif tmp >= 2.:
# tmp= 2
# elif tmp > -2. and tmp < 0.:
# tmp = -1
# elif tmp <= -2.:
# tmp = -2
# else:
# tmp = 0
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.append('NaN')
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create RR 欄位
data['RRlabel']=tmpSeries
In [9]:
# Rise Ratio 漲幅比
def RR(data):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止 第一筆data沒有更舊的
if item-1 >=0:
# (今日收盤價 - 昨日收盤價)/昨日收盤價
tmp = (data['Close'][item-1]-data['Close'][item])/data['Close'][item]*100
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create RR 欄位
data['RR']=tmpSeries
In [10]:
# 威廉指標(WMS%R或%R)
def WMS(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day+1 >= 0:
# 9日WMS%R =(9日內最高價-第9日收盤價) / (9日內最高價-9日內最低價)*100
# [item-day+1:item+1] 今日區間 [item-day+1] 第N日 583-9=574+1=575
tmp = (data['High'][item-day+1:item+1].max()-data['Close'][item-day+1])/(data['High'][item-day+1:item+1].max()-data['Low'][item-day+1:item+1].min())*100
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create WMS 欄位
data['WMS'+str(day)]=tmpSeries
In [11]:
# 買賣意願指標 day 建議26
def BR(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day >= 0:
# 26日BR = (今日最高價 - 昨日收盤價)26天累計總數 / (昨日收盤價 - 今日最低價)26天累計總數
# [(item-day+1)-1:(item+1)-1] 有-1 今日區間 [(item-day+1):(item+1)] 昨日區間
tmp = (data['High'][(item-day+1)-1:(item+1)-1].sum()-data['Close'][item-day+1:item+1].sum())/(data['Close'][item-day+1:item+1].sum()-data['Low'][(item-day+1)-1:(item+1)-1].sum())
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create BR 欄位
data['BR'+str(day)]=tmpSeries
In [12]:
# 買賣氣勢指標 day建議26
def AR(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day+1 >= 0:
# 26日AR = (最高價 - 開盤價)26天累計總數 / (開盤價 - 最低價)26天累計總數
# [item-day+1:item+1] 今日區間
tmp = (data['High'][item-day+1:item+1].sum()-data['Open'][item-day+1:item+1].sum())/(data['Open'][item-day+1:item+1].sum()-data['Low'][item-day+1:item+1].sum())
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create AR 欄位
data['AR'+str(day)]=tmpSeries
In [13]:
# 平均成交量 mean volumn day建議 5 10 20
def MV(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day+1 >= 0:
# N日平均量 = N日內的成交量總和 / N
# [item-day+1:item+1] 今日區間
tmp = data['Volume'][item-day+1:item+1].mean()
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create MV 欄位
data['MV'+str(day)]=tmpSeries
In [14]:
# 移動平均線(MA,Moving Average) 建議 5 10 20
def MA(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day+1 >= 0:
# 移動平均數 = 採樣天數的股價合計 / 採樣天數
# [item-day+1:item+1] 今日區間
tmp = data['Close'][item-day+1:item+1].mean()
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create MA 欄位
data['MA'+str(day)]=tmpSeries
In [15]:
# 心理線(PSY) 建議13
def PSY(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day >= 0:
# 13日PSY值 = ( 13日內之上漲天數 / 13 ) * 100
# [item-day+1-1:item+1-1] 跳一天 最早的天沒有RR值
count = 0
for a in data['RR'][item-day+1-1:item+1-1]:
if a > 0:
count+=1
tmp = float(count)/float(13)*100
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create PSY 欄位
data['PSY'+str(day)]=tmpSeries
In [16]:
# 能量潮(OBV) 建議12
def OBV(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day >= 0:
# 今日OBV值 = 最近12天股價上漲日成交量總和 - 最近12天股價下跌日成交量總和
# 先由 ['RR'] 求出boolean值 > 0 True 套入['Volume']符合True全加起來
bolRise = data['RR'][item-day+1-1:item+1-1]>0
sumVolRise = data['Volume'][item-day+1-1:item+1-1][bolRise].sum()
bolDesc = data['RR'][item-day+1-1:item+1-1]<0
sumVolDesc = data['Volume'][item-day+1-1:item+1-1][bolDesc].sum()
tmp = sumVolRise-sumVolDesc
# 可切換 OBV累積12日移動平均值 = (最近12天股價上漲日成交量總和 - 最近12天股價下跌日成交量總和) / 12
# tmp = (sumVolRise-sumVolDesc)/12
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create OBV 欄位
data['OBV'+str(day)]=tmpSeries
In [17]:
# 數量指標(VR) 建議12
def VR(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day >= 0:
# VR = ( N日內上漲日成交值總和 + 1/2*N日內平盤日成交值總和) / ( N日內下跌日成交值總和 + 1/2*N日內平盤日成交值總和)* 100%
# 先由 ['RR'] 求出boolean值 > 0 True 套入['Volume']符合True全加起來
bolRise = data['RR'][item-day+1-1:item+1-1]>0
sumVolRise = data['Volume'][item-day+1-1:item+1-1][bolRise].sum()
bolNorm = data['RR'][item-day+1-1:item+1-1] == 0
sumVolNorm = data['Volume'][item-day+1-1:item+1-1][bolNorm].sum()
bolDesc = data['RR'][item-day+1-1:item+1-1]<0
sumVolDesc = data['Volume'][item-day+1-1:item+1-1][bolDesc].sum()
tmp = (sumVolRise+0.5*sumVolNorm)/(sumVolDesc+0.5*sumVolNorm)*100
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create VR 欄位
data['VR'+str(day)]=tmpSeries
In [18]:
# 相對強弱指標(RSI) 建議6 12 28
def RSI(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day >= 0:
# 6日RSI=100*6日內收盤上漲總幅度平均值 / (6日內收盤上漲總幅度平均值 - 6日內收盤下跌總幅度平均值)
# 先由 ['RR'] 求出boolean值 > 0 True 套入['Volume']符合True全加起來
bolRise = data['RR'][item-day+1-1:item+1-1]>0
if np.sum(bolRise) == 0:
meanRise = 0
else:
meanRise = data['RR'][item-day+1-1:item+1-1][bolRise].mean()
bolDesc = data['RR'][item-day+1-1:item+1-1]<0
if np.sum(bolDesc) == 0:
meanDesc = 0
else:
meanDesc = data['RR'][item-day+1-1:item+1-1][bolDesc].mean()
if meanRise ==0 and meanDesc==0:
tmp = 0
else:
tmp = 100*meanRise/(meanRise-meanDesc)
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create RSI 欄位
data['RSI'+str(day)]=tmpSeries
In [19]:
# 乖離率(BIAS) 10,20
def BIAS(data,day):
# 由於 data 新到舊 0~xxx,遞增,因此需反轉陣列
dataList = range(data['Date'].size)
dataList.reverse()
tmpList = []
for item in dataList:
# 防止前day沒有data
if item-day+1 >= 0:
# N日乖離率 = (當日股價 - N日股價移動平均數) / N日平均股價
tmp = (data['Close'][item-day+1]-data['MA'+str(day)][item-day+1])/data['MA'+str(day)][item-day+1]*100
tmpList.append(tmp)
# 前day 沒data會出現NA
tmpList.reverse()
tmpSeries = pd.Series(tmpList)
# create BIAS 欄位
data['BIAS'+str(day)]=tmpSeries
In [20]:
# RR漲幅比須先算出來,後續指標需用到此項
RR(data)
RRlabel(data)
WMS(data,9)
BR(data,26)
AR(data,26)
MV(data,5)
MV(data,10)
MV(data,20)
MA(data,5)
MA(data,10)
MA(data,20)
# 算BIAS須先求出MA值
BIAS(data,5)
BIAS(data,10)
BIAS(data,20)
PSY(data,13)
OBV(data,12)
VR(data,12)
VR(data,26)
RSI(data,6)
RSI(data,12)
RSI(data,28)
In [21]:
slicenum = 28
slicedata = data.head(data['Date'].size-slicenum)
In [22]:
slicedata.head()
slicedata = slicedata[1:]
In [23]:
tt = slicedata.describe()
datapath = '/Users/wy/Desktop/describe.csv'
tt.to_csv(datapath, encoding='utf-8')
In [24]:
# s19 = slicedata['RR']
# s20 = slicedata['RRlabel']
# s1 = slicedata['MV5']
# s2 = slicedata['MV10']
# s3 = slicedata['MV20']
# s4 = slicedata['MA5']
# s5 = slicedata['MA10']
# s6 = slicedata['MA20']
# s7 = slicedata['WMS9']
# s8 = slicedata['BR26']
# s9 = slicedata['AR26']
# s10 = slicedata['PSY13']
# s11 = slicedata['VR12']
# s12 = slicedata['BIAS5']
# s13 = slicedata['BIAS10']
# s14 = slicedata['BIAS20']
# s15 = slicedata['OBV12']
# s16 = slicedata['RSI6']
# s17 = slicedata['RSI12']
# s18 = slicedata['RSI28']
# tt = dict(MV5=s1,MV10=s2,MV20=s3,MA5=s4,MA10=s5,MA20=s6,WMS9=s7,BR26=s8,AR26=s9,PSY13=s10,VR12=s11,BIAS5=s12,BIAS10=s13,BIAS20=s14,OBV12=s15,RSI6=s16,RSI12=s17,RSI28=s18,RR=s19,RRlabel=s20)
# tt = pd.DataFrame(tt)
# alltmpList=[]
# for index, row in tt.iterrows():
# tmpList=[]
# row[1:] = map(float,row[1:])
# stmin = row[1:].min()
# stmax = float(row[1:].max())
# for item in row[1:]:
# tmp = (float(item) - stmin)/(stmax - stmin)
# tmpList.append(tmp)
# alltmpList.append(tmpList)
# pd.DataFrame(alltmpList)
In [25]:
# Min-max normalization
def minmax_normalization(slicedata,row):
dataList = range(slicedata[row].size)
tmpList = []
for item in dataList:
tmp = (slicedata[row][item] - slicedata[row].min())/(slicedata[row].max() - slicedata[row].min())
tmpList.append(tmp)
tmpSeries = pd.Series(tmpList)
# create 'minmaxN'+row 欄位
slicedata['minmaxN'+row]=tmpSeries
In [26]:
# z-score normalization
def zscore_normalization(slicedata,row):
dataList = range(slicedata[row].size)
tmpList = []
for item in dataList:
tmp = (slicedata[row][item] - slicedata[row].mean())/slicedata[row].std()
tmpList.append(tmp)
tmpSeries = pd.Series(tmpList)
# create 'zscoreN'+row 欄位
slicedata['zscoreN'+row]=tmpSeries
In [27]:
# log normalization
# data 皆大於0才可使用
def log_normalization(slicedata,row,base):
dataList = range(slicedata[row].size)
tmpList = []
# +1預防取log時有0
for item in dataList:
tmp = math.log(slicedata[row][item]+1,base)/math.log(slicedata[row].max(),base)
tmpList.append(tmp)
tmpSeries = pd.Series(tmpList)
# create 'logN'+row 欄位
slicedata['logN'+row]=tmpSeries
In [28]:
tl=['MV5','MV10','MV20','MA5','MA10','MA20','WMS9','BR26','AR26','PSY13','VR12','VR26']
nologtl=['BIAS5','BIAS10','BIAS20','OBV12','RSI6','RSI12','RSI28','RR']
for a in tl:
print(a)
log_normalization(slicedata,a,10)
zscore_normalization(slicedata,a)
minmax_normalization(slicedata,a)
minmaxN = 'minmaxN'+a
zscoreN = 'zscoreN'+a
logN = 'logN'+a
s1 = slicedata['Date']
s2 = slicedata['RRlabel']
s3 = slicedata[a]
s4 = slicedata[minmaxN]
s5 = slicedata[zscoreN]
s6 = slicedata[logN]
c = 'c_'+a
d_minmaxN = 'd_minmaxN'+a
e_zscoreN = 'e_zscoreN'+a
f_logN = 'f_logN'+a
tt={}
tt['a_Date']=s1
tt['b_RR']=s2
tt[c]=s3
tt[d_minmaxN]=s4
tt[e_zscoreN]=s5
tt[f_logN]=s6
od = collections.OrderedDict(sorted(tt.items()))
tt = pd.DataFrame(od)
tt=tt[1:]
datapath = '/Users/wy/Desktop/2498_islig_stock2/'+a+'.csv'
tt.to_csv(datapath, encoding='utf-8')
# for a in nologtl:
# zscore_normalization(slicedata,a)
# minmax_normalization(slicedata,a)
# minmaxN = 'minmaxN'+a
# zscoreN = 'zscoreN'+a
# s1 = slicedata['Date']
# s2 = slicedata['RRlabel']
# s3 = slicedata[a]
# s4 = slicedata[minmaxN]
# s5 = slicedata[zscoreN]
# c = 'c_'+a
# d_minmaxN = 'd_minmaxN'+a
# e_zscoreN = 'e_zscoreN'+a
# tt={}
# tt['a_Date']=s1
# tt['b_RRlabel']=s2
# tt[c]=s3
# tt[d_minmaxN]=s4
# tt[e_zscoreN]=s5
# od = collections.OrderedDict(sorted(tt.items()))
# tt = pd.DataFrame(od)
# tt=tt[1:]
# datapath = '/Users/wy/Desktop/3008stock2/'+a+'.csv'
# tt.to_csv(datapath, encoding='utf-8')
In [21]:
slicedata
In [27]:
log_normalization(slicedata,'MV5',10)
In [28]:
zscore_normalization(slicedata,'BIAS5')
zscore_normalization(slicedata,'MV5')
In [29]:
minmax_normalization(slicedata,'BIAS5')
minmax_normalization(slicedata,'MV5')
In [30]:
s1 = slicedata['Date']
s2 = slicedata['RR']
s3 = slicedata['BIAS5']
s4 = slicedata['minmaxNBIAS5']
s5 = slicedata['zscoreNBIAS5']
import collections
tt = dict( a_Date = s1, b_RR = s2 , c_BIAS5 = s3 , d_minmaxNBIAS5 = s4 , e_zscoreNBIAS5 = s5)
od = collections.OrderedDict(sorted(tt.items()))
tt = pd.DataFrame(od)
datapath = '/Users/wy/Desktop/BIAS5.csv'
tt.to_csv(datapath, encoding='utf-8')
In [31]:
tt.head()
Out[31]:
In [32]:
s1 = slicedata['Date']
s2 = slicedata['RR']
s3 = slicedata['MV5']
s4 = slicedata['minmaxNMV5']
s5 = slicedata['zscoreNMV5']
s6 = slicedata['logNMV5']
import collections
tt = dict( a_Date = s1, b_RR = s2 , c_MV5 = s3 , d_minmaxNMV5 = s4 , e_zscoreNMV5 = s5 , f_logNMV5 = s6)
od = collections.OrderedDict(sorted(tt.items()))
tt = pd.DataFrame(od)
datapath = '/Users/wy/Desktop/MV5.csv'
tt.to_csv(datapath, encoding='utf-8')
In [33]:
b = slicedata['WMS9']==0
numpy.sum(b)
Out[33]:
In [449]:
%reset
In [353]:
slicedata[slicedata['RSI28']==0]
Out[353]:
In [417]:
slicedata['minmaxNRSI6']
Out[417]:
In [294]:
# 判斷有無NAN
slicedata[pd.isnull(slicedata['RSI28'])]
Out[294]:
In [232]:
slicedata[550:565]
Out[232]:
In [236]:
item=561
day=6
bolRise = data['RR'][item-day+1-1:item+1-1]>0
meanRise = data['RR'][item-day+1-1:item+1-1][bolRise].mean()
bolDesc = data['RR'][item-day+1-1:item+1-1]<0
meanDesc = data['RR'][item-day+1-1:item+1-1][bolDesc].mean()
tmp = 100*meanRise/(meanRise-meanDesc)
In [237]:
tmp
Out[237]:
In [477]:
minmax_normalization(slicedata,'RSI6')
In [478]:
# minmaxNMV5
# slicedata['minmaxNRSI6']
slicedata['RSI6'].min()
Out[478]:
In [476]:
slicedata[slicedata['RSI6']<-40]['RSI6']
Out[476]:
In [428]:
slicedata['RR'][420:425]
Out[428]:
In [479]:
data['RR']
Out[479]:
In [ ]: