In [126]:
import tushare as ts
import pandas as pd
import numpy as np
from xpinyin import Pinyin
In [127]:
df=ts.get_stock_basics()
df.head(5)
att=df.columns.values.tolist()
#clommun_show = ['name', 'pe', 'outstanding', 'totals', 'totalAssets', 'liquidAssets', 'fixedAssets',
#'esp', 'bvps', 'pb', 'perundp', 'rev', 'profit', 'gpr', 'npr', 'holders']
pin=Pinyin()
df['UP'] = None
for index, row in df.iterrows():
name_str = df.name[index]
#print(name_str)
up_letter = pin.get_initials(name_str,u'')
#print(up_letter)
df.at[index,['UP']]=up_letter
#df[df['UP']=='HTGD']
df['code']=df.index
#print(df.UP)
code,代码 name,名称 industry,所属行业 area,地区 pe,市盈率 outstanding,流通股本(亿) totals,总股本(亿) totalAssets,总资产(万) liquidAssets,流动资产 fixedAssets,固定资产 reserved,公积金 reservedPerShare,每股公积金 esp,每股收益 bvps,每股净资 pb,市净率 timeToMarket,上市日期 undp,未分利润 perundp, 每股未分配 rev,收入同比(%) profit,利润同比(%) gpr,毛利率(%) npr,净利润率(%) holders,股东人数 ['name', 'pe', 'outstanding', 'totals', 'totalAssets', 'liquidAssets', 'fixedAssets', 'esp', 'bvps', 'pb', 'perundp', 'rev', 'profit', 'gpr', 'npr', 'holders']
In [128]:
col_show = ['name', 'open', 'pre_close', 'price', 'high', 'low', 'volume', 'amount', 'time', 'code']
initial_letter = ['HTGD','OFKJ','CDKJ','ZJXC','GXKJ','FHTX','DZJG']
code =[]
for letter in initial_letter:
code.append(df[df['UP']==letter].code[0])
#print(code)
if code != '': #not empty != ''
df_price = ts.get_realtime_quotes(code)
#print(df_price)
#df_price.columns.values.tolist()
df_price[col_show]
Out[128]:
In [129]:
vec_data=df_price.to_dict(orient='dict')
#vec_data['name'][0]
In [130]:
np_data0 = df_price.values
#print(np_data0[0])
np_data1 = np.array(df_price)
#print(np_data1[0])
In [131]:
#df_k = ts.get_k_data()
#df_hist=ts.get_hist_data('600848') #一次性获取全部日k线数据
df_selected=ts.get_hist_data('600487',start='2018-01-01',end='2019-03-09')
In [132]:
df_selected.head(5)
Out[132]:
In [133]:
df_index = ts.get_index()
In [134]:
#df_index
#df_index[["code","name"]]
index_to_track=['000001','000300','000016','399001','399106','399006']
df_index[df_index["code"].isin(index_to_track)]
Out[134]:
In [135]:
import tushare as ts
import pandas as pd #测试dataFrame
shareCode = '600487'
dfLoad = ts.get_k_data(shareCode,start='2018-01-05',end='2018-01-09')
dfUpda1 = ts.get_k_data(shareCode,start='2018-01-05',end='2018-01-12')
dfUpda2 = ts.get_k_data(shareCode,start='2018-01-15',end='2018-01-20')
dfConc = pd.concat([dfLoad,dfUpda2,dfUpda1])
#排序 #注意排序之后原本的数据不会变,而是返回一个排序完的值....记得用一个变量去接收
dfSort = dfConc.sort_values(by = 'date',ascending = False)
#去重- subset表示考虑哪一列 keep = 'first' 表示留下第一个
dfDrop = dfSort.drop_duplicates(subset = ['date'],keep = 'first')
#print(dfSort)
#print(dfDrop)
#这里可以选择是否保存index 看情况吧
dfDrop.to_csv(shareCode + '.csv',index = False)
#读取这里可以选择哪一列作为键值 (index) 否则读出来的数据会自动添加然后多一列..
#选择键值参数 index_col
dfRead = pd.read_csv(shareCode + '.csv')
#直接连接 这里不适用merge #merge 合并后会分开左右两边列名相同的值
#dfRead = pd.concat([dfRead,dfSort])
#显示全部列
pd.set_option('display.max_column',None)
#print('dfRead:')
#print(dfRead)
In [137]:
df_all=pd.merge(df,df_price)
#df_all['code']
pd.set_option('display.max_column',None)
#df_all.columns.values.tolist()
In [164]:
df_dd = ts.get_sina_dd('600487', date='2019-03-08',vol=1000) #默认400手
#df = ts.get_sina_dd('600848', date='2015-12-24', vol=500) #指定大于等于500手的数据
df_dd_big = df_dd[df_dd['type']=='买盘'][df_dd['volume'] > 100*2000] #Warnings
df_dd_big_sorted=df_dd_big.sort_values(['time'], ascending=True)
In [162]:
df_dd_big = df_dd[df_dd['type']=='买盘']
df_dd_big[df_dd_big['volume'] > 100*2000].sort_values(['time'], ascending=True)
Out[162]: