Data Viz - Intro



In [ ]:
# Name a more iconic trio... I'll wait…
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [ ]:
import warnings
warnings.filterwarnings("ignore")

In [ ]:
# sample array from normal distribution
x = np.random.normal(size=100)

In [ ]:
# plot histogram with Matplotlib
_ = plt.hist(x)

Seaborn


In [ ]:
# import and set Seaborn style
# notice how this will update Matplotlib style too
import seaborn as sns
sns.set(style="darkgrid")
sns.set_context("talk", font_scale=1)

Notebook Modes

Use matplotlib magic to switch between notebook and inline mode. tk for new external plotting windows.


In [ ]:
# interactive figure controls
%matplotlib notebook

In [ ]:
sns.distplot(np.random.normal(size=100))
plt.show()

In [ ]:
# default to static image
%matplotlib inline

In [ ]:
# plot histogram with seaborn
sns.distplot(np.random.normal(size=100), kde=False) # kde, rug

In [ ]:
# boxplot, violinplot
sns.violinplot(np.random.normal(size=100))

Dataset Distribution


In [ ]:
# load iris dataset
iris_df = sns.load_dataset('iris')
iris_df.head()

In [ ]:
# exaple pairplot for iris dataset
# notice we define what column to use for hue variations 
sns.pairplot(iris_df, hue='species')

Categorical Data


In [ ]:
# load titanic dataset
titanic_df = sns.load_dataset('titanic')
titanic_df.head()

In [ ]:
# exaple factorplot/catplot for titanic dataset
sns.catplot(data=titanic_df, x='alive', col='deck', row='sex', 
            kind='count')

In [ ]:
# exaple factorplot/catplot for titanic dataset (bar, violin, swarm, box)
sns.catplot(data=titanic_df, y='age', col='deck', kind='bar')

Time-Series Data


In [ ]:
# create dummy time-series dataframe
df = pd.DataFrame(dict(time=pd.date_range("2017-1-1", periods=500),
                       value=np.random.randn(500).cumsum()))
# plot with seaborn
g = sns.relplot(x="time", y="value", kind="line", data=df)
g.fig.autofmt_xdate()

Pandas Plotting


In [ ]:
# direct plot from Pandas df
iris_df.plot()

In [ ]:
# compute and plot average attribute value for each species
iris_df.groupby('species').agg(np.mean).plot(kind='bar')

Plotly


In [ ]:
# install plotly and cufflinks in current virtual-env

In [ ]:
pip install plotly
pip install cufflinks

In [ ]:
# Import libraries and set offline mode
from plotly.offline import init_notebook_mode, plot, iplot
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline(connected=True)
#enable_mpl_offline()

In [ ]:
# Plot data via plotly
import plotly.graph_objs as go

data = [go.Histogram(x=x)]
iplot(data)

In [ ]:
# plot Pandas data via plotly
iris_df.iplot()

In [ ]:
# Export cufflink plot
fig = iris_df.iplot(asFigure=True)
plot(fig, filename="iris.html")

Transfer Matplotlib Figure to Plotly


In [ ]:
import plotly.tools as tls

# create figure and plot in matplot
mpl_fig = plt.figure()
ax = mpl_fig.add_subplot(111)
ax.hist(np.random.normal(size=100))

# convert and plot in plotly
plotly_fig = tls.mpl_to_plotly(ax.figure)
iplot(plotly_fig)

Animation


In [ ]:
from matplotlib import animation

%matplotlib notebook

fig, ax = plt.subplots(dpi=100, figsize=(5, 4))
ax.set_xlim(0, 2)
ax.set_ylim(-2, 2)

line, = ax.plot([], [], lw=2)
#epoch_text = ax.text(0, 0, "Epoch 0")

def animate(i, line):
    x = np.linspace(0, 2, 1000)
    y = np.sin(2 * np.pi * (x - 0.01 * i))
    #epoch_text.set_text("Epoch {}".format(i, cost))
    line.set_data(x, y)
    return line

ani = animation.FuncAnimation(fig, animate, 100, interval=10, repeat=True,
                             fargs=[line])

Jupyter Magic


In [ ]:
!ls

In [ ]:
%timeit -r 1 x+1

In [ ]:
%%html
<h1>Table</h1>
<table style="width:100%">
  <tr>
    <th>Firstname</th>
    <th>Lastname</th> 
    <th>Age</th>
  </tr>
  <tr>
    <td>Jill</td>
    <td>Smith</td> 
    <td>50</td>
  </tr>
  <tr>
    <td>Eve</td>
    <td>Jackson</td> 
    <td>94</td>
  </tr>
</table>

In [ ]:
# Debug
import pdb

counter = 0
for i in range(10):
    foo = 2
    pdb.set_trace()
    counter += i

In [ ]:
# Debug in Python 3.7
b = 0
for i in range(10):
    a = 2
    breakpoint()
    b += i

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
fig, ax = plt.subplots(figsize=(6, 3))
ax.set_title("Should I use piecharts?")
ax.pie([70, 30], labels=['No', 'Also no, but in red'], colors=['Blue', 'Red'])
plt.show()

In [ ]: