In [1]:
#-*- coding: utf-8 -*-
import pandas as pd
import pandas_datareader.data as web
import datetime

def get_file_path(code):
    return "../data/" + code

In [15]:
def download(code, year1, month1, day1, year2, month2, day2):
    start = datetime.datetime(year1, month1, day1)
    end = datetime.datetime(year2, month2, day2)
    df = web.DataReader("%s.KS" % code, "yahoo", start, end)
    df.to_pickle(get_file_path(code))
    
    return df

In [16]:
def load(code):
    df = pd.read_pickle(get_file_path(code))
    return df

삼성전자의 데이터를 다운로드 한다.


In [17]:
df = download("005930", 2016, 1, 1, 2016, 11, 1)

In [18]:
df.describe()


Out[18]:
Open High Low Close Volume Adj Close
count 2.180000e+02 2.180000e+02 2.180000e+02 2.180000e+02 2.180000e+02 2.180000e+02
mean 1.381495e+06 1.395876e+06 1.369876e+06 1.383977e+06 2.321674e+05 1.383450e+06
std 1.683836e+05 1.725185e+05 1.673190e+05 1.712811e+05 1.356109e+05 1.716604e+05
min 1.088000e+06 1.133000e+06 1.088000e+06 1.126000e+06 0.000000e+00 1.125195e+06
25% 1.257000e+06 1.264250e+06 1.246250e+06 1.253000e+06 1.669250e+05 1.252104e+06
50% 1.330000e+06 1.356500e+06 1.324000e+06 1.349000e+06 2.106000e+05 1.348036e+06
75% 1.547250e+06 1.568750e+06 1.536750e+06 1.547250e+06 2.728250e+05 1.547250e+06
max 1.700000e+06 1.716000e+06 1.690000e+06 1.706000e+06 1.250500e+06 1.706000e+06

In [19]:
#df.quantile([.25, .5, .75])

In [ ]:


In [20]:
import matplotlib.pyplot as plt


Bin : 1088000, Frequency = 11
Bin : 1149200, Frequency = 33
Bin : 1210400, Frequency = 26
Bin : 1271600, Frequency = 39
Bin : 1332800, Frequency = 7
Bin : 1394000, Frequency = 18
Bin : 1455200, Frequency = 14
Bin : 1516400, Frequency = 34
Bin : 1577600, Frequency = 27
Bin : 1638800, Frequency = 9

In [25]:
from pandas.tools.plotting import scatter_matrix
scatter_matrix(df[['Open', 'High', 'Low', 'Close']], alpha=0.2, figsize=(6, 6), diagonal='kde')
plt.show()



In [28]:
df[['Open', 'High', 'Low', 'Close']].plot(kind='box')
plt.show()



In [ ]: