In [126]:
import tushare as ts
import pandas as pd
import numpy as np

from xpinyin import Pinyin

In [127]:
df=ts.get_stock_basics()
df.head(5)
att=df.columns.values.tolist()
#clommun_show = ['name', 'pe', 'outstanding', 'totals', 'totalAssets', 'liquidAssets', 'fixedAssets',
#'esp', 'bvps', 'pb',  'perundp', 'rev', 'profit', 'gpr', 'npr', 'holders']

pin=Pinyin()
df['UP'] = None
for index, row in df.iterrows():
    name_str = df.name[index]
    #print(name_str)
    up_letter = pin.get_initials(name_str,u'')
    #print(up_letter)
    df.at[index,['UP']]=up_letter
#df[df['UP']=='HTGD']
df['code']=df.index
#print(df.UP)

code,代码 name,名称 industry,所属行业 area,地区 pe,市盈率 outstanding,流通股本(亿) totals,总股本(亿) totalAssets,总资产(万) liquidAssets,流动资产 fixedAssets,固定资产 reserved,公积金 reservedPerShare,每股公积金 esp,每股收益 bvps,每股净资 pb,市净率 timeToMarket,上市日期 undp,未分利润 perundp, 每股未分配 rev,收入同比(%) profit,利润同比(%) gpr,毛利率(%) npr,净利润率(%) holders,股东人数 ['name', 'pe', 'outstanding', 'totals', 'totalAssets', 'liquidAssets', 'fixedAssets', 'esp', 'bvps', 'pb', 'perundp', 'rev', 'profit', 'gpr', 'npr', 'holders']


In [128]:
col_show = ['name', 'open', 'pre_close', 'price', 'high', 'low', 'volume', 'amount',  'time', 'code']
initial_letter = ['HTGD','OFKJ','CDKJ','ZJXC','GXKJ','FHTX','DZJG']
code =[]
for letter in initial_letter:
    code.append(df[df['UP']==letter].code[0])
    #print(code)
if code != '': #not empty != ''
    df_price = ts.get_realtime_quotes(code)
    #print(df_price)
    #df_price.columns.values.tolist()
df_price[col_show]


Out[128]:
name open pre_close price high low volume amount time code
0 亨通光电 21.500 22.000 21.960 23.110 21.200 98727052 2198642229.000 15:00:00 600487
1 欧菲科技 14.000 14.740 14.110 14.940 13.800 170908883 2454933765.380 15:00:03 002456
2 长电科技 14.300 14.700 15.900 16.170 14.020 165795445 2632525261.000 15:00:00 600584
3 中际旭创 56.990 59.370 58.400 64.390 56.000 9156938 549740196.910 15:00:03 300308
4 光迅科技 31.000 31.620 32.580 34.500 30.680 47640638 1578763475.520 15:00:03 002281
5 烽火通信 31.700 32.370 32.290 33.460 31.250 50910497 1661789943.000 15:00:00 600498
6 大族激光 40.600 42.440 42.550 45.100 39.800 56482328 2428853657.020 15:00:03 002008

In [129]:
vec_data=df_price.to_dict(orient='dict')
#vec_data['name'][0]

In [130]:
np_data0 = df_price.values
#print(np_data0[0])
np_data1 = np.array(df_price)
#print(np_data1[0])

In [131]:
#df_k = ts.get_k_data()
#df_hist=ts.get_hist_data('600848') #一次性获取全部日k线数据
df_selected=ts.get_hist_data('600487',start='2018-01-01',end='2019-03-09')

In [132]:
df_selected.head(5)


Out[132]:
open high close low volume price_change p_change ma5 ma10 ma20 v_ma5 v_ma10 v_ma20
date
2019-03-08 21.50 23.11 21.96 21.20 987270.50 -0.04 -0.18 22.196 22.007 20.917 779852.06 740909.02 704698.25
2019-03-07 22.30 22.30 22.00 21.71 748656.00 -0.48 -2.13 22.164 21.961 20.705 654157.73 731786.67 667341.65
2019-03-06 22.84 23.22 22.48 22.13 683858.94 -0.08 -0.35 22.076 21.801 20.470 572493.08 751318.73 642678.70
2019-03-05 22.06 22.56 22.56 21.81 671705.62 0.58 2.64 21.926 21.562 20.199 557541.50 724878.92 628772.43
2019-03-04 22.43 22.65 21.98 21.87 807769.25 0.18 0.83 21.770 21.325 19.964 604499.67 718289.61 609536.72

In [133]:
df_index = ts.get_index()

In [134]:
#df_index
#df_index[["code","name"]]
index_to_track=['000001','000300','000016','399001','399106','399006']
df_index[df_index["code"].isin(index_to_track)]


Out[134]:
code name change open preclose close high low volume amount
0 000001 上证指数 -4.40 3038.3362 3106.4179 2969.8614 3075.0461 2969.5815 577855473 5255.9825
8 000016 上证50 -3.73 2731.5301 2789.7001 2685.5920 2753.5526 2684.1969 72033928 963.8838
10 000300 沪深300 -3.97 3721.4016 3808.8497 3657.5790 3774.5431 3656.1917 317990077 3492.9669
12 399001 深证成指 -3.25 9447.5980 9678.1140 9363.7240 9726.3050 9346.7280 70948978499 6585.0709
17 399006 创业板指 -2.24 1647.8480 1692.4160 1654.4850 1718.3840 1635.4500 17086233045 2028.9394
21 399106 深证综指 -3.79 1630.0930 1668.5330 1605.2790 1673.4040 1605.2790 70948978499 6585.0709

In [135]:
import tushare as ts 
import pandas as pd #测试dataFrame 
shareCode = '600487' 
dfLoad = ts.get_k_data(shareCode,start='2018-01-05',end='2018-01-09') 
dfUpda1 = ts.get_k_data(shareCode,start='2018-01-05',end='2018-01-12') 
dfUpda2 = ts.get_k_data(shareCode,start='2018-01-15',end='2018-01-20') 
dfConc = pd.concat([dfLoad,dfUpda2,dfUpda1]) 
#排序 #注意排序之后原本的数据不会变,而是返回一个排序完的值....记得用一个变量去接收 
dfSort = dfConc.sort_values(by = 'date',ascending = False) 
#去重- subset表示考虑哪一列   keep = 'first' 表示留下第一个 
dfDrop = dfSort.drop_duplicates(subset = ['date'],keep = 'first') 
#print(dfSort) 

#print(dfDrop) 
#这里可以选择是否保存index 看情况吧 
dfDrop.to_csv(shareCode + '.csv',index = False) 
#读取这里可以选择哪一列作为键值 (index) 否则读出来的数据会自动添加然后多一列.. 
#选择键值参数 index_col 
dfRead = pd.read_csv(shareCode + '.csv') 
#直接连接 这里不适用merge #merge 合并后会分开左右两边列名相同的值 
#dfRead = pd.concat([dfRead,dfSort]) 
#显示全部列 
pd.set_option('display.max_column',None) 
#print('dfRead:') 
#print(dfRead)

In [137]:
df_all=pd.merge(df,df_price)
#df_all['code']
pd.set_option('display.max_column',None) 
#df_all.columns.values.tolist()

In [164]:
df_dd = ts.get_sina_dd('600487', date='2019-03-08',vol=1000) #默认400手
#df = ts.get_sina_dd('600848', date='2015-12-24', vol=500)  #指定大于等于500手的数据
df_dd_big = df_dd[df_dd['type']=='买盘'][df_dd['volume'] > 100*2000]  #Warnings
df_dd_big_sorted=df_dd_big.sort_values(['time'], ascending=True)

In [162]:
df_dd_big = df_dd[df_dd['type']=='买盘']
df_dd_big[df_dd_big['volume'] > 100*2000].sort_values(['time'], ascending=True)


Out[162]:
code name time price volume preprice type
137 600487 亨通光电 09:25:01 21.50 574500 0.00 买盘
134 600487 亨通光电 09:30:07 21.40 311920 21.40 买盘
113 600487 亨通光电 10:20:43 21.99 201700 21.97 买盘
102 600487 亨通光电 10:26:49 22.31 298400 22.26 买盘
95 600487 亨通光电 10:28:25 22.40 353800 22.40 买盘
93 600487 亨通光电 10:29:46 22.45 268000 22.44 买盘
82 600487 亨通光电 10:31:43 22.59 205200 22.59 买盘
64 600487 亨通光电 10:38:10 22.65 281880 22.65 买盘
63 600487 亨通光电 10:38:22 22.70 342860 22.67 买盘
62 600487 亨通光电 10:38:34 22.76 292500 22.75 买盘
45 600487 亨通光电 10:43:34 22.97 234700 22.93 买盘
43 600487 亨通光电 10:44:04 23.05 293140 22.99 买盘
0 600487 亨通光电 15:00:00 21.96 847200 21.96 买盘

TO-DO

Add the map from initial to code

build up a dataframe with fundamental and indicotors

For Leadings, need cache more data for the begining data