In [ ]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

sns.set_style("darkgrid")

Edit-run-repeat: Stopping the cycle of pain

1. No more docs-guessing


In [ ]:
df = pd.read_csv("../data/water-pumps.csv", index=0)
df.head(1)

In [ ]:
pd.read_csv?

In [ ]:
df = pd.read_csv("../data/water-pumps.csv",
                 index_col=0,
                 parse_dates="date_recorded")
df.head(1)

2. No more copy pasta

Don't repeat yourself.


In [ ]:
plot_data = df['construction_year']
plot_data = plot_data[plot_data != 0]
sns.kdeplot(plot_data, bw=0.1)
plt.show()

plot_data = df['longitude']
plot_data = plot_data[plot_data != 0]
sns.kdeplot(plot_data, bw=0.1)
plt.show()

plot_data = df['amount_tsh']
plot_data = plot_data[plot_data > 20000]
sns.kdeplot(plot_data, bw=0.1)
plt.show()

plot_data = df['latitude']
plot_data = plot_data[plot_data > 20000]
sns.kdeplot(plot_data, bw=0.1)
plt.show()

In [ ]:
def kde_plot(dataframe, variable, upper=0.0, lower=0.0, bw=0.1):
    plot_data = dataframe[variable]
    plot_data = plot_data[(plot_data > lower) & (plot_data < upper)]
    sns.kdeplot(plot_data, bw=bw)
    plt.show()

In [ ]:
kde_plot(df, 'construction_year', upper=2016)
kde_plot(df, 'longitude', upper=42)

In [ ]:
kde_plot(df, 'amount_tsh', lower=20000, upper=400000)

3. No more guess-and-check

Use pdb the Python debugger to debug inside a notebook. Key commands are:

  • p: Evaluate and print Python code
  • w: Where in the stack trace am I?
  • u: Go up a frame in the stack trace.
  • d: Go down a frame in the stack trace.
  • c: Continue execution
  • q: Stop execution

There are two ways to activate the debugger:

  • %pdb: toggles wether or not the debugger will be called on an exception
  • %debug: enters the debugger at the line where this magic is

In [ ]:
kde_plot(df, 'date_recorded')

In [ ]:
# "1" turns pdb on, "0" turns pdb off
%pdb 1

kde_plot(df, 'date_recorded')

In [ ]:
# turn off debugger
%pdb 0

4. No more "Restart & Run All"

assert is the poor man's unit test: stops execution if condition is False, continues silently if True


In [ ]:
def gimme_the_mean(series):
    return np.mean(series)

assert gimme_the_mean([0.0]*10) == 0.0

In [ ]:
data = np.random.normal(0.0, 1.0, 1000000)
assert gimme_the_mean(data) == 0.0

In [ ]:
np.testing.assert_almost_equal(gimme_the_mean(data),
                               0.0,
                               decimal=1)


In [ ]: