넘파이, 판다스, 매트플랏립을 활용한 데이터 분석 실습


In [1]:
#그래프를 바로 보기 위한 명령어
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib

In [2]:
data = np.random.rand(50)

In [3]:
data


Out[3]:
array([ 0.69441075,  0.67115462,  0.97371209,  0.07478419,  0.38666964,
        0.86864553,  0.39693644,  0.43976044,  0.72006754,  0.5172352 ,
        0.67770745,  0.11730094,  0.33252131,  0.64829307,  0.22863674,
        0.94609219,  0.55635679,  0.04403149,  0.41776461,  0.32737617,
        0.19244272,  0.7930951 ,  0.79184269,  0.87282353,  0.2491409 ,
        0.26819145,  0.21537239,  0.2085542 ,  0.99498292,  0.22947829,
        0.40941102,  0.41394595,  0.75747041,  0.23900721,  0.54543583,
        0.43432801,  0.85986635,  0.70919305,  0.28934201,  0.12310873,
        0.34661269,  0.94458661,  0.76200474,  0.22947503,  0.08508857,
        0.53882323,  0.02440462,  0.82298375,  0.26419021,  0.17495114])

In [4]:
print(data)


[ 0.69441075  0.67115462  0.97371209  0.07478419  0.38666964  0.86864553
  0.39693644  0.43976044  0.72006754  0.5172352   0.67770745  0.11730094
  0.33252131  0.64829307  0.22863674  0.94609219  0.55635679  0.04403149
  0.41776461  0.32737617  0.19244272  0.7930951   0.79184269  0.87282353
  0.2491409   0.26819145  0.21537239  0.2085542   0.99498292  0.22947829
  0.40941102  0.41394595  0.75747041  0.23900721  0.54543583  0.43432801
  0.85986635  0.70919305  0.28934201  0.12310873  0.34661269  0.94458661
  0.76200474  0.22947503  0.08508857  0.53882323  0.02440462  0.82298375
  0.26419021  0.17495114]

In [5]:
data


Out[5]:
array([ 0.69441075,  0.67115462,  0.97371209,  0.07478419,  0.38666964,
        0.86864553,  0.39693644,  0.43976044,  0.72006754,  0.5172352 ,
        0.67770745,  0.11730094,  0.33252131,  0.64829307,  0.22863674,
        0.94609219,  0.55635679,  0.04403149,  0.41776461,  0.32737617,
        0.19244272,  0.7930951 ,  0.79184269,  0.87282353,  0.2491409 ,
        0.26819145,  0.21537239,  0.2085542 ,  0.99498292,  0.22947829,
        0.40941102,  0.41394595,  0.75747041,  0.23900721,  0.54543583,
        0.43432801,  0.85986635,  0.70919305,  0.28934201,  0.12310873,
        0.34661269,  0.94458661,  0.76200474,  0.22947503,  0.08508857,
        0.53882323,  0.02440462,  0.82298375,  0.26419021,  0.17495114])

In [6]:
seri = pd.Series(data)

In [7]:
seri


Out[7]:
0     0.694411
1     0.671155
2     0.973712
3     0.074784
4     0.386670
5     0.868646
6     0.396936
7     0.439760
8     0.720068
9     0.517235
10    0.677707
11    0.117301
12    0.332521
13    0.648293
14    0.228637
15    0.946092
16    0.556357
17    0.044031
18    0.417765
19    0.327376
20    0.192443
21    0.793095
22    0.791843
23    0.872824
24    0.249141
25    0.268191
26    0.215372
27    0.208554
28    0.994983
29    0.229478
30    0.409411
31    0.413946
32    0.757470
33    0.239007
34    0.545436
35    0.434328
36    0.859866
37    0.709193
38    0.289342
39    0.123109
40    0.346613
41    0.944587
42    0.762005
43    0.229475
44    0.085089
45    0.538823
46    0.024405
47    0.822984
48    0.264190
49    0.174951
dtype: float64

In [8]:
seri.plot()


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x65f9df0>

In [9]:
[1,2,3,4,5]


Out[9]:
[1, 2, 3, 4, 5]

In [10]:
[1,2,3,4,5].plot()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-10-560c1ce3ddc6> in <module>()
----> 1 [1,2,3,4,5].plot()

AttributeError: 'list' object has no attribute 'plot'

In [11]:
test = pd.Series([1,2,3,4,5])
test.plot()


Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x70d59f0>

In [12]:
matplotlib.style.use('ggplot')
seri.plot()


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x70e7530>

In [13]:
data_set = np.random.rand(10,3)
data_set


Out[13]:
array([[ 0.45048953,  0.2543953 ,  0.26103822],
       [ 0.38501321,  0.22375197,  0.79336317],
       [ 0.21419154,  0.56913687,  0.19629819],
       [ 0.02953066,  0.39383257,  0.643879  ],
       [ 0.24609666,  0.88385841,  0.55234135],
       [ 0.16365343,  0.67599191,  0.51914196],
       [ 0.16463227,  0.21324924,  0.99610886],
       [ 0.41283281,  0.22482757,  0.12229986],
       [ 0.54530067,  0.6910843 ,  0.47454587],
       [ 0.29693342,  0.3546303 ,  0.71297849]])

In [14]:
df = pd.DataFrame(data_set, columns=['A','B','C'])
df


Out[14]:
A B C
0 0.450490 0.254395 0.261038
1 0.385013 0.223752 0.793363
2 0.214192 0.569137 0.196298
3 0.029531 0.393833 0.643879
4 0.246097 0.883858 0.552341
5 0.163653 0.675992 0.519142
6 0.164632 0.213249 0.996109
7 0.412833 0.224828 0.122300
8 0.545301 0.691084 0.474546
9 0.296933 0.354630 0.712978

In [15]:
df.plot()


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x63fe3f0>

In [16]:
df.plot(kind='bar')


Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x4b068d0>

In [17]:
df.plot(kind='barh')


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x6c55270>

In [18]:
df.plot(kind="area")


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x70fee10>

In [19]:
df.plot(kind='area',stacked=False)


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x718aef0>

In [ ]: