Pandas Visualization


In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [7]:
df1 = pd.read_csv('df1.csv', index_col = 0)
df1.head()


Out[7]:
A B C D
2000-01-01 1.339091 -0.163643 -0.646443 1.041233
2000-01-02 -0.774984 0.137034 -0.882716 -2.253382
2000-01-03 -0.921037 -0.482943 -0.417100 0.478638
2000-01-04 -1.738808 -0.072973 0.056517 0.015085
2000-01-05 -0.905980 1.778576 0.381918 0.291436

In [8]:
df2 = pd.read_csv('df2.csv')
df2.head()


Out[8]:
a b c d
0 0.039762 0.218517 0.103423 0.957904
1 0.937288 0.041567 0.899125 0.977680
2 0.780504 0.008948 0.557808 0.797510
3 0.672717 0.247870 0.264071 0.444358
4 0.053829 0.520124 0.552264 0.190008

Historgram


In [12]:
df1['A'].hist(bins= 30)


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b716605c0>

In [13]:
df1['A'].plot(kind ='hist', bins = 30)


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b70909828>

In [15]:
df1['A'].plot.hist(bins = 30)


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b7192de10>

In [20]:
df2


Out[20]:
a b c d
0 0.039762 0.218517 0.103423 0.957904
1 0.937288 0.041567 0.899125 0.977680
2 0.780504 0.008948 0.557808 0.797510
3 0.672717 0.247870 0.264071 0.444358
4 0.053829 0.520124 0.552264 0.190008
5 0.286043 0.593465 0.907307 0.637898
6 0.430436 0.166230 0.469383 0.497701
7 0.312296 0.502823 0.806609 0.850519
8 0.187765 0.997075 0.895955 0.530390
9 0.908162 0.232726 0.414138 0.432007

Area Plot


In [45]:
df2.plot.area(alpha = 0.4)


Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b7504a630>

Bar Plot


In [21]:
# index will be the x-axis
df2.plot.bar()


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b72d91860>

In [22]:
df2.plot.bar(stacked = True)


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b72ef5c18>

In [23]:
df1['A'].plot.hist(bins = 50)


Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b730f47b8>

Line Plot


In [27]:
df1.plot.line(x = df1.index, y = 'B', figsize=(12,3), lw = 0.5)


Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b7339b9b0>

Scatter Plot


In [30]:
df1.plot.scatter(x='A', y='B', c = 'C', cmap= 'coolwarm')


Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b74626b38>

In [34]:
df1.plot.scatter(x='A', y='B', s = df1['C'] * 10) # s = size


E:\Anaconda3\lib\site-packages\matplotlib\collections.py:877: RuntimeWarning: invalid value encountered in sqrt
  scale = np.sqrt(self._sizes) * dpi / 72.0 * self._factor
Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b74859cf8>

Box Plot


In [35]:
df2.plot.box()


Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b74934da0>

In [38]:
df = pd.DataFrame(np.random.randn(1000,2), columns=['a', 'b'])
df.head(10)


Out[38]:
a b
0 0.081154 0.337768
1 1.214901 0.345866
2 0.108947 -0.696851
3 0.904440 -0.578257
4 2.100105 -0.808473
5 0.010445 0.925250
6 0.212086 0.214145
7 1.861743 -0.177592
8 -0.802073 1.093624
9 1.386362 0.325175

Hexbin Plot


In [39]:
df.plot.hexbin(x = 'a', y = 'b')


Out[39]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b74b1add8>

In [41]:
df.plot.hexbin(x = 'a', y = 'b', gridsize = 25, cmap = 'coolwarm')


Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b74d806d8>

Kernel Density Estimation Plot


In [42]:
df2['a'].plot.kde()


Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b74eb1320>

In [43]:
df2['a'].plot.density()


Out[43]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b74b1fef0>

In [44]:
df2.plot.kde()


Out[44]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b75018f60>

Pandas Visualization continued.... 2


In [46]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [47]:
mcdon = pd.read_csv('mcdonalds.csv', index_col = 'Date', parse_dates = True)
mcdon.head()


Out[47]:
Adj. Close Adj. Volume
Date
1970-01-02 0.209761 2825604.0
1970-01-05 0.213316 2210449.5
1970-01-06 0.214501 1951168.5
1970-01-07 0.213316 2728768.5
1970-01-08 0.213316 2242404.0

In [49]:
# Note the Adj. Volume is way scaled-up than Adj. Close, hence the plot is not so nice
mcdon.plot()


Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b75209cf8>

In [50]:
mcdon['Adj. Close'].plot()


Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b762fa438>

In [52]:
mcdon['Adj. Volume'].plot(figsize=(12,4))


Out[52]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b76401048>

In [55]:
mcdon['Adj. Close'].plot(xlim = ['2007-01-01', '2009-01-01']) #xlimit


Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b76787518>

In [59]:
#xlimit and ylimit , [list] or (tuple) both accepted
mcdon['Adj. Close'].plot(xlim = ['2007-01-01', '2009-01-01'], ylim = (20,50))


Out[59]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b77f67470>

In [60]:
mcdon['Adj. Close'].plot(xlim = ['2007-01-01', '2009-01-01'], ylim = (20,50), ls = '--', c='red')


Out[60]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b77f6d6a0>

In [61]:
import matplotlib.dates as pltdates

In [62]:
mcdon['Adj. Close'].plot(xlim = ['2007-01-01', '2009-01-01'], ylim = (20,50))


Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b780c98d0>

In [63]:
idx = mcdon.index
idx


Out[63]:
DatetimeIndex(['1970-01-02', '1970-01-05', '1970-01-06', '1970-01-07',
               '1970-01-08', '1970-01-09', '1970-01-12', '1970-01-13',
               '1970-01-14', '1970-01-15',
               ...
               '2017-06-30', '2017-07-03', '2017-07-05', '2017-07-06',
               '2017-07-07', '2017-07-10', '2017-07-11', '2017-07-12',
               '2017-07-13', '2017-07-14'],
              dtype='datetime64[ns]', name='Date', length=11993, freq=None)

In [64]:
idx = mcdon.loc['2007-01-01':'2007-05-01'].index
idx


Out[64]:
DatetimeIndex(['2007-01-03', '2007-01-04', '2007-01-05', '2007-01-08',
               '2007-01-09', '2007-01-10', '2007-01-11', '2007-01-12',
               '2007-01-16', '2007-01-17', '2007-01-18', '2007-01-19',
               '2007-01-22', '2007-01-23', '2007-01-24', '2007-01-25',
               '2007-01-26', '2007-01-29', '2007-01-30', '2007-01-31',
               '2007-02-01', '2007-02-02', '2007-02-05', '2007-02-06',
               '2007-02-07', '2007-02-08', '2007-02-09', '2007-02-12',
               '2007-02-13', '2007-02-14', '2007-02-15', '2007-02-16',
               '2007-02-20', '2007-02-21', '2007-02-22', '2007-02-23',
               '2007-02-26', '2007-02-27', '2007-02-28', '2007-03-01',
               '2007-03-02', '2007-03-05', '2007-03-06', '2007-03-07',
               '2007-03-08', '2007-03-09', '2007-03-12', '2007-03-13',
               '2007-03-14', '2007-03-15', '2007-03-16', '2007-03-19',
               '2007-03-20', '2007-03-21', '2007-03-22', '2007-03-23',
               '2007-03-26', '2007-03-27', '2007-03-28', '2007-03-29',
               '2007-03-30', '2007-04-02', '2007-04-03', '2007-04-04',
               '2007-04-05', '2007-04-09', '2007-04-10', '2007-04-11',
               '2007-04-12', '2007-04-13', '2007-04-16', '2007-04-17',
               '2007-04-18', '2007-04-19', '2007-04-20', '2007-04-23',
               '2007-04-24', '2007-04-25', '2007-04-26', '2007-04-27',
               '2007-04-30', '2007-05-01'],
              dtype='datetime64[ns]', name='Date', freq=None)

In [65]:
stock = mcdon.loc['2007-01-01':'2007-05-01']['Adj. Close']
stock


Out[65]:
Date
2007-01-03    31.662754
2007-01-04    31.424580
2007-01-05    31.424580
2007-01-08    31.547276
2007-01-09    31.605015
2007-01-10    31.944233
2007-01-11    32.124668
2007-01-12    31.915364
2007-01-16    32.167973
2007-01-17    32.377277
2007-01-18    32.182407
2007-01-19    32.341190
2007-01-22    32.009190
2007-01-23    32.370060
2007-01-24    31.872059
2007-01-25    31.034840
2007-01-26    30.984318
2007-01-29    31.200840
2007-01-30    31.590580
2007-01-31    32.009190
2007-02-01    32.103016
2007-02-02    32.146320
2007-02-05    32.139103
2007-02-06    32.312321
2007-02-07    32.276234
2007-02-08    32.009190
2007-02-09    32.160755
2007-02-12    32.283451
2007-02-13    32.406147
2007-02-14    32.492756
                ...    
2007-03-20    31.980320
2007-03-21    32.196842
2007-03-22    32.110233
2007-03-23    32.514408
2007-03-26    32.564930
2007-03-27    32.507191
2007-03-28    32.355625
2007-03-29    32.384495
2007-03-30    32.514408
2007-04-02    32.348408
2007-04-03    32.665974
2007-04-04    32.644321
2007-04-05    33.041279
2007-04-09    33.553714
2007-04-10    33.423801
2007-04-11    33.517627
2007-04-12    33.654758
2007-04-13    34.383716
2007-04-16    34.715717
2007-04-17    35.372500
2007-04-18    35.278674
2007-04-19    35.206500
2007-04-20    34.903369
2007-04-23    35.105456
2007-04-24    34.982761
2007-04-25    35.098239
2007-04-26    35.531283
2007-04-27    35.329196
2007-04-30    34.845630
2007-05-01    35.466327
Name: Adj. Close, Length: 82, dtype: float64

In [72]:
fig, ax = plt.subplots()
ax.plot_date(idx, stock,'-')
ax.yaxis.grid(True)
ax.xaxis.grid(True)
fig.autofmt_xdate()  # automatically format the x-date axis
plt.tight_layout()


Date formating using major_locator, major_formatter, minor_locator, minor_locator


In [81]:
fig, ax = plt.subplots()
ax.plot_date(idx, stock,'-')
ax.xaxis.grid(True)
ax.yaxis.grid(True)

# http://strftime.org/
ax.xaxis.set_major_locator(pltdates.MonthLocator())
ax.xaxis.set_major_formatter(pltdates.DateFormatter('\n\n%b-%Y'))

ax.xaxis.set_minor_locator(pltdates.WeekdayLocator(byweekday=0))
ax.xaxis.set_minor_formatter(pltdates.DateFormatter('%d'))  # try %a

fig.autofmt_xdate()  # automatically format the x-date axis
plt.tight_layout()



In [ ]: