In [2]:

    
import matplotlib.pylab as plt
import numpy as np
%matplotlib inline

바 차트



In [5]:

    
y = [2, 3, 1]
x = np.arange(len(y))
xlabel = ["A","B","C"]



In [6]:

    
plt.bar(x, y)









    Out[6]:





<Container object of 3 artists>



In [9]:

    
plt.bar(x, y, align="center")
plt.xticks(x, xlabel)









    Out[9]:





([<matplotlib.axis.XTick at 0x1851d250780>,
  <matplotlib.axis.XTick at 0x1851d250438>,
  <matplotlib.axis.XTick at 0x1851d248390>],
 <a list of 3 Text xticklabel objects>)



In [11]:

    
people = ("Tom", "Dick", "Harry", "Slim", "Jim")
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))



In [12]:

    
plt.barh(y_pos, performance, xerr=error, align="center", alpha=0.4)









    Out[12]:





<Container object of 5 artists>



In [13]:

    
plt.barh(y_pos, performance, xerr=error, align="center", alpha=0.4)
plt.yticks(y_pos, people)









    Out[13]:





([<matplotlib.axis.YTick at 0x1851d3460b8>,
  <matplotlib.axis.YTick at 0x1851d20bf98>,
  <matplotlib.axis.YTick at 0x1851d33f208>,
  <matplotlib.axis.YTick at 0x1851d38aba8>,
  <matplotlib.axis.YTick at 0x1851d38e5f8>],
 <a list of 5 Text yticklabel objects>)



In [15]:

    
plt.barh(y_pos, performance, xerr=error, align="center", alpha=0.4)
plt.yticks(y_pos, people)
plt.xlabel("Performance");



In [16]:

    
n_groups = 5
means_men = (20, 35, 30, 35, 27)
std_men = (2, 3, 4, 1, 2)
means_women = (25, 32, 34, 20, 25)
std_women = (3, 5, 2, 3, 3)



In [26]:

    
fig, ax = plt.subplots()

index = np.arange(n_groups)
bar_width = 0.35

opacity = 0.4
error_config = {"ecolor": "0.3"}

retcs1 = plt.bar(index, means_men, bar_width,
               alpha = opacity,
               color="b",
               yerr=std_men,
               error_kw=error_config,
               label="Men")
retcs2 = plt.bar(index + bar_width, means_women, bar_width,
                alpha = opacity,
                color = "r",
                yerr=std_women,
                error_kw=error_config,
                label="Women")

plt.xlabel("Group")
plt.ylabel("Scores")
plt.title("Scores by group and gender")
plt.xticks(index + bar_width, ("A","B","C","D","E"))
plt.legend()

plt.tight_layout()



In [28]:

    
N = 5
menMeans = (20, 35, 30, 35, 27)
womenMeans = (25, 32, 34, 20, 25)

menStd = (2, 3, 4, 1, 2)
womenStd = (3, 5, 2, 3, 3)

ind = np.arange(N)
width = 0.35

p1 = plt.bar(ind, menMeans, width, color="r", yerr=menStd)
p2 = plt.bar(ind, womenMeans, width, color="y", bottom=menMeans, yerr=womenStd)

plt.ylabel("Scores")
plt.title("Scores by group and gender")
plt.xticks(ind + width/2, ("G1","G2","G3","G4","G5"))
plt.yticks(np.arange(0, 81, 10))
plt.legend((p1[0], p2[0]), ("Men", "Women"))









    Out[28]:





<matplotlib.legend.Legend at 0x1851e5d76a0>

스템 플롯



In [31]:

    
# 바 차트와 유사하지만 폭(width)이 없는 스템 플롯.
# 이산 확률 함수나 자기상관관계를 묘사할 때 사용

x = np.linspace(0.1, 2*np.pi, 10)
markerline, stemlines, baseline = plt.stem(x, np.cos(x), '-.')
plt.setp(markerline, "markerfacecolor",'b')
plt.setp(baseline, 'color', 'r', 'linewidth', 2);

파이 차트



In [32]:

    
#  카테고리별 값의 상대적인 비교를 할 때 pie 명령을 사용



In [36]:

    
labels = 'Frogs', 'Hogs', 'Dogs', 'Logs'
sizes = [15, 30, 45, 10]
colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral']
explode = (0, 0.1, 0, 0)
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=90)
plt.axis('equal');

히스토그램



In [38]:

    
x = np.random.randn(5000)
arrays, bins, patches = plt.hist(x, bins=50, normed=True)



In [39]:

    
arrays









    Out[39]:





array([ 0.00252461,  0.        ,  0.        ,  0.00252461,  0.        ,
        0.0012623 ,  0.00252461,  0.00631151,  0.01262303,  0.02777066,
        0.01514763,  0.03408218,  0.0429183 ,  0.06690205,  0.08204969,
        0.13506641,  0.13759101,  0.21332918,  0.25119827,  0.23983754,
        0.33577256,  0.33072335,  0.3534448 ,  0.35849401,  0.38500237,
        0.38626467,  0.37111704,  0.3976254 ,  0.34713328,  0.33955947,
        0.30547729,  0.24236215,  0.20954227,  0.18429622,  0.14642713,
        0.10098423,  0.06059054,  0.06311514,  0.02650836,  0.02650836,
        0.03408218,  0.01009842,  0.00883612,  0.00252461,  0.00504921,
        0.00252461,  0.00252461,  0.        ,  0.        ,  0.0012623 ])



In [40]:

    
bins









    Out[40]:





array([-3.99402697, -3.83558638, -3.6771458 , -3.51870522, -3.36026463,
       -3.20182405, -3.04338347, -2.88494288, -2.7265023 , -2.56806172,
       -2.40962114, -2.25118055, -2.09273997, -1.93429939, -1.7758588 ,
       -1.61741822, -1.45897764, -1.30053705, -1.14209647, -0.98365589,
       -0.82521531, -0.66677472, -0.50833414, -0.34989356, -0.19145297,
       -0.03301239,  0.12542819,  0.28386877,  0.44230936,  0.60074994,
        0.75919052,  0.91763111,  1.07607169,  1.23451227,  1.39295286,
        1.55139344,  1.70983402,  1.8682746 ,  2.02671519,  2.18515577,
        2.34359635,  2.50203694,  2.66047752,  2.8189181 ,  2.97735869,
        3.13579927,  3.29423985,  3.45268043,  3.61112102,  3.7695616 ,
        3.92800218])

스캐터 플롯



In [41]:

    
# 두 개의 데이터 집합! 상관관계



In [42]:

    
X = np.random.normal(0, 1, 1024)
Y = np.random.normal(0, 1, 1024)

plt.scatter(X, Y);



In [44]:

    
N = 50
x = np.random.rand(N)
y = np.random.rand(N)
colors = np.random.rand(N)
area = np.pi * (15 * np.random.rand(N))**2
plt.scatter(x, y, s=area, c=colors, alpha=0.5);

Imshow



In [ ]:

    
#  2차원 데이터 시각화



In [45]:

    
from sklearn.datasets import load_digits



In [46]:

    
digits = load_digits()
X = digits.images[0]
X









    Out[46]:





array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],
       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],
       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],
       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],
       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],
       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],
       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],
       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])



In [47]:

    
plt.imshow(X, interpolation='nearest');
plt.grid(False)



In [48]:

    
methods = [None, 'none', 'nearest', 'bilinear', 'bicubic', 'spline16',
           'spline36', 'hanning', 'hamming', 'hermite', 'kaiser', 'quadric',
           'catrom', 'gaussian', 'bessel', 'mitchell', 'sinc', 'lanczos']
fig, axes = plt.subplots(3, 6, figsize=(12, 6), subplot_kw={'xticks': [], 'yticks': []})
fig.subplots_adjust(hspace=0.3, wspace=0.05)
for ax, interp_method in zip(axes.flat, methods):
    ax.imshow(X, interpolation=interp_method)
    ax.set_title(interp_method)

컨투어 플롯



In [49]:

    
# 2차원 자료를 시각화하는 또다른 방법. 등고선을 사용
# contour 혹은 contourf 명령을 사용함



In [50]:

    
def f(x, y):
    return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 -y ** 2)

n = 256
x = np.linspace(-3, 3, n)
y = np.linspace(-3, 3, n)
XX, YY = np.meshgrid(x, y)
ZZ = f(XX, YY)



In [51]:

    
plt.contourf(XX, YY, ZZ, alpha=.75, cmap='jet');
plt.contour(XX, YY, ZZ, colors='black', linewidth=.5);

3D 서피스 플롯



In [52]:

    
from mpl_toolkits.mplot3d import Axes3D
X = np.arange(-4, 4, 0.25)
Y = np.arange(-4, 4, 0.25)
XX, YY = np.meshgrid(X, Y)
RR = np.sqrt(XX**2 + YY**2)
ZZ = np.sin(RR)



In [53]:

    
fig = plt.figure()
ax = Axes3D(fig)
ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, cmap='hot');

seaborn

matplotlib 기반으로 다양한 색상 테마와 통계용 차트 기능을 추가한 패키지
matplotlib, statsmodels에 의존

seaborn에서 제공하는 플롯의 종류는 다음과 같다.

분포 플롯 (distribution plot)
회귀 분석 플롯 (regression plot)
카테고리 플롯 (categorical plot)
행렬 플롯 (matrix plot)
시계열 플롯 (time series plot)



In [54]:

    
#  searborn 임포트하면 색상이 바뀜



In [55]:

    
import seaborn as sns
sns.set()
sns.set_color_codes()

x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
f, axarr = plt.subplots(2, sharex=True)
axarr[0].plot(x, y)
axarr[0].set_title('Sharing X axis')
axarr[1].scatter(x, y);

분포 플롯

단순 히스토그램에 커널 밀도와 러그 기능까지 제공



In [56]:

    
np.random.seed(0)
x = np.random.randn(100)

sns.rugplot(x);



In [59]:

    
sns.kdeplot(x);



In [60]:

    
sns.distplot(x, kde=True, rug=True);



In [61]:

    
tips = sns.load_dataset("tips")
sns.jointplot(x="total_bill", y="tip", data=tips);



In [63]:

    
iris = sns.load_dataset("iris")
sns.jointplot("sepal_width", "petal_length", data=iris, kind="kde", space=0, color="g")









    Out[63]:





<seaborn.axisgrid.JointGrid at 0x18523ddee48>

회귀 분석 플롯

regplot
residplot
Implot



In [64]:

    
#  regplot은 내부적으로 회귀분석을 실시하고 그 결과를 시각화
# 데이터 자체는 스캐터 플롯으로, 회귀 분석 결과는 라인 플롯으로
# 신뢰 구간은 fill 플롯



In [65]:

    
sns.regplot(x="total_bill", y="tip", data=tips);



In [66]:

    
sns.residplot(x="total_bill", y="tip", data=tips); # 잔차항



In [67]:

    
sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips);



In [68]:

    
sns.lmplot(x="total_bill", y="tip", col="smoker", data=tips);

카테고리 플롯



In [69]:

    
sns.barplot(x="day", y="total_bill", hue="sex", data=tips);



In [70]:

    
titanic = sns.load_dataset("titanic")
sns.countplot(x="class", hue="who", data=titanic);



In [71]:

    
sns.boxplot(x="day", y="total_bill", hue="smoker", data=tips);



In [72]:

    
sns.pointplot(x="time", y="total_bill", hue="smoker", data=tips, dodge=True);

boxplot과 pointplot이 중앙값, 표준 편차 등, 분포의 간략한 특성만 보여주는데 반해 violinplot, stripplot. swarmplot 등은 카테고리값에 따른 각 분포의 전체 형상을 보여준다는 장점이 있다. stripplot 과 swarmplot 은 보통 boxplot이나 violinplot과 같이 사용된다.



In [73]:

    
sns.violinplot(x="day", y="total_bill", hue="smoker", data=tips, palette="muted");



In [74]:

    
sns.violinplot(x="day", y="total_bill", hue="sex",
               data=tips, palette="Set2", split=True,
               scale="count", inner="quartile");



In [75]:

    
sns.stripplot(x="day", y="total_bill", hue="smoker",
              data=tips, jitter=True,
              palette="Set2", split=True);



In [76]:

    
sns.boxplot(x="tip", y="day", data=tips, whis=np.inf)
sns.stripplot(x="tip", y="day", data=tips, jitter=True, color=".3");



In [77]:

    
sns.swarmplot(x="day", y="total_bill", hue="sex", data=tips);



In [78]:

    
sns.violinplot(x="day", y="total_bill", data=tips, inner=None)
sns.swarmplot(x="day", y="total_bill", data=tips, color="white", edgecolor="gray");

행렬 플롯



In [79]:

    
# 2차원 카테고리 값 자료의 분포를 위한 것
# heatmap, clustermap



In [80]:

    
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
sns.heatmap(flights, annot=True, fmt="d");



In [81]:

    
sns.clustermap(flights);

시계열 플롯

tsplot



In [82]:

    
np.random.seed(22)
x = np.linspace(0, 15, 31)
data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10, 1)
sns.tsplot(data=data);



In [83]:

    
sns.tsplot(data=data, err_style="boot_traces", n_boot=500);



In [84]:

    
sns.tsplot(data=data, ci=[68, 95], color="m");

bokeh



In [86]:

    
import urllib
import pandas as pd

url = 'http://ichart.yahoo.com/table.csv?s=MSFT&a=0&b=1&c=2009'
data = pd.read_csv(url, parse_dates=['Date'])



In [87]:

    
import bokeh.plotting as bp



In [88]:

    
# 주피터 노트북에서 실행하여 출력하는 경우
bp.output_notebook()









    





    
        
        Loading BokehJS ...



In [89]:

    
p = bp.figure(title='Historical Stock Quotes', # 플롯 제목
              x_axis_type ='datetime',  # x 축은 날짜 정보
              tools = '')



In [90]:

    
p.line(
    data['Date'],  # x 좌표
    data['Close'], # y 좌표
    color ='#0066cc',  # 선 색상
    legend ='MSFT',  # 범례 이름
)









    Out[90]:





<bokeh.models.renderers.GlyphRenderer at 0x18519887cf8>



In [91]:

    
bp.show(p)









    






    







    Out[91]:




<Bokeh Notebook handle for In[91]>



In [92]:

    
p = bp.figure(title='Historical Stock Quotes', # 플롯 제목
              x_axis_type ='datetime',  # x 축은 날짜 정보
              tools = 'pan, wheel_zoom, box_zoom, reset, previewsave')
p.line(
    data['Date'],  # x 좌표
    data['Close'], # y 좌표
    color ='#0066cc',  # 선 색상
    legend ='MSFT',  # 범례 이름
)
bp.show(p)









    






    







    Out[92]:




<Bokeh Notebook handle for In[92]>



In [ ]: