Pandas Visualization


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
# see the pre-defined styles provided.
plt.style.available


Out[2]:
['seaborn-muted',
 'seaborn-dark-palette',
 'seaborn-notebook',
 'seaborn-dark',
 'seaborn-paper',
 'seaborn-deep',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-talk',
 'seaborn-white',
 'seaborn',
 'classic',
 'dark_background',
 'seaborn-pastel',
 'seaborn-whitegrid',
 '_classic_test',
 'ggplot',
 'grayscale',
 'seaborn-darkgrid',
 'seaborn-poster',
 'bmh',
 'fivethirtyeight',
 'seaborn-ticks']

In [3]:
# use the 'seaborn-colorblind' style
plt.style.use('seaborn-colorblind')

DataFrame.plot


In [4]:
np.random.seed(123)

df = pd.DataFrame({'A': np.random.randn(365).cumsum(0), 
                   'B': np.random.randn(365).cumsum(0) + 20,
                   'C': np.random.randn(365).cumsum(0) - 20}, 
                  index=pd.date_range('1/1/2017', periods=365))
df.head()


Out[4]:
A B C
2017-01-01 -1.085631 20.059291 -20.230904
2017-01-02 -0.088285 21.803332 -16.659325
2017-01-03 0.194693 20.835588 -17.055481
2017-01-04 -1.311601 21.255156 -17.093802
2017-01-05 -1.890202 21.462083 -19.518638

In [5]:
df.plot(); # add a semi-colon to the end of the plotting call to suppress unwanted output


We can select which plot we want to use by passing it into the 'kind' parameter.


In [6]:
df.plot('A','B', kind = 'scatter');


You can also choose the plot kind by using the DataFrame.plot.kind methods instead of providing the kind keyword argument.

kind :

  • 'line' : line plot (default)
  • 'bar' : vertical bar plot
  • 'barh' : horizontal bar plot
  • 'hist' : histogram
  • 'box' : boxplot
  • 'kde' : Kernel Density Estimation plot
  • 'density' : same as 'kde'
  • 'area' : area plot
  • 'pie' : pie plot
  • 'scatter' : scatter plot
  • 'hexbin' : hexbin plot

In [7]:
# create a scatter plot of columns 'A' and 'C', with changing color (c) and size (s) based on column 'B'
df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f59b5101f60>

In [8]:
ax = df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')
ax.set_aspect('equal')



In [9]:
df.plot.box();



In [10]:
df.plot.hist(alpha=0.7);


Kernel density estimation plots are useful for deriving a smooth continuous function from a given sample.


In [11]:
df.plot.kde();


pandas.tools.plotting


In [12]:
iris = pd.read_csv('iris.csv')
iris.head()


---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-12-6a3c5393696a> in <module>()
----> 1 iris = pd.read_csv('iris.csv')
      2 iris.head()

/usr/local/lib/python3.5/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    653                     skip_blank_lines=skip_blank_lines)
    654 
--> 655         return _read(filepath_or_buffer, kwds)
    656 
    657     parser_f.__name__ = name

/usr/local/lib/python3.5/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    403 
    404     # Create the parser.
--> 405     parser = TextFileReader(filepath_or_buffer, **kwds)
    406 
    407     if chunksize or iterator:

/usr/local/lib/python3.5/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    760             self.options['has_index_names'] = kwds['has_index_names']
    761 
--> 762         self._make_engine(self.engine)
    763 
    764     def close(self):

/usr/local/lib/python3.5/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
    964     def _make_engine(self, engine='c'):
    965         if engine == 'c':
--> 966             self._engine = CParserWrapper(self.f, **self.options)
    967         else:
    968             if engine == 'python':

/usr/local/lib/python3.5/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1580         kwds['allow_leading_cols'] = self.index_col is not False
   1581 
-> 1582         self._reader = parsers.TextReader(src, **kwds)
   1583 
   1584         # XXX

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__ (pandas/_libs/parsers.c:4209)()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source (pandas/_libs/parsers.c:8873)()

FileNotFoundError: File b'iris.csv' does not exist

In [ ]:
pd.tools.plotting.scatter_matrix(iris);

In [ ]:
plt.figure()
pd.tools.plotting.parallel_coordinates(iris, 'Name');

Seaborn


In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib notebook

In [ ]:
np.random.seed(1234)

v1 = pd.Series(np.random.normal(0,10,1000), name='v1')
v2 = pd.Series(2*v1 + np.random.normal(60,15,1000), name='v2')

In [ ]:
plt.figure()
plt.hist(v1, alpha=0.7, bins=np.arange(-50,150,5), label='v1');
plt.hist(v2, alpha=0.7, bins=np.arange(-50,150,5), label='v2');
plt.legend();

In [ ]:
# plot a kernel density estimation over a stacked barchart
plt.figure()
plt.hist([v1, v2], histtype='barstacked', normed=True);
v3 = np.concatenate((v1,v2))
sns.kdeplot(v3);

In [ ]:
plt.figure()
# we can pass keyword arguments for each individual component of the plot
sns.distplot(v3, hist_kws={'color': 'Teal'}, kde_kws={'color': 'Navy'});

In [ ]:
sns.jointplot(v1, v2, alpha=0.4);

In [ ]:
grid = sns.jointplot(v1, v2, alpha=0.4);
grid.ax_joint.set_aspect('equal')

In [ ]:
sns.jointplot(v1, v2, kind='hex');

In [ ]:
# set the seaborn style for all the following plots
sns.set_style('white')

sns.jointplot(v1, v2, kind='kde', space=0);

In [ ]:
iris = pd.read_csv('iris.csv')
iris.head()

In [ ]:
sns.pairplot(iris, hue='Name', diag_kind='kde', size=2);

In [ ]:
plt.figure(figsize=(8,6))
plt.subplot(121)
sns.swarmplot('Name', 'PetalLength', data=iris);
plt.subplot(122)
sns.violinplot('Name', 'PetalLength', data=iris);