In [1]:
import pandas as pd
import numpy as np
from pandas_datareader import data, wb # 需要安装 pip install pandas_datareader
import datetime
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
%matplotlib inline
In [4]:
sh_table = pd.read_csv('000001.SS.csv')
In [28]:
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2016,5,20)
In [29]:
sh = data.DataReader("000001.SS", 'yahoo', start, end)
In [30]:
sh.head(3) # 数据获取成功
Out[30]:
In [31]:
sh.describe() # 数据整体概览
Out[31]:
In [32]:
sh['Close'].plot();
In [33]:
sh = sh.drop('Volume',axis=1)
In [34]:
sh.head(2) # Volume列消失了
Out[34]:
In [35]:
pd.isnull(sh).head() # or sh.isnull()
Out[35]:
In [36]:
sh.isnull().values.any()
Out[36]:
In [37]:
sh.isnull().values.sum()
Out[37]:
In [38]:
change = sh.Close.diff()
change.fillna(change.mean(),inplace=True)
sh['Change'] = change
In [39]:
sh['pct_change'] = sh.Change.pct_change()
sh.iloc[5:9]
Out[39]:
In [40]:
sh['group_index'] = sh.index.map(lambda x: 100*x.year + x.month)
In [18]:
sh.head()
Out[18]:
In [41]:
sh.index
Out[41]:
In [42]:
sh[['Open']].plot();
plt.show()
In [ ]: