In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("darkgrid")
In [ ]:
df = pd.read_csv("../data/water-pumps.csv", index=0)
df.head(1)
In [ ]:
pd.read_csv?
In [ ]:
df = pd.read_csv("../data/water-pumps.csv",
index_col=0,
parse_dates="date_recorded")
df.head(1)
In [ ]:
plot_data = df['construction_year']
plot_data = plot_data[plot_data != 0]
sns.kdeplot(plot_data, bw=0.1)
plt.show()
plot_data = df['longitude']
plot_data = plot_data[plot_data != 0]
sns.kdeplot(plot_data, bw=0.1)
plt.show()
plot_data = df['amount_tsh']
plot_data = plot_data[plot_data > 20000]
sns.kdeplot(plot_data, bw=0.1)
plt.show()
plot_data = df['latitude']
plot_data = plot_data[plot_data > 20000]
sns.kdeplot(plot_data, bw=0.1)
plt.show()
In [ ]:
def kde_plot(dataframe, variable, upper=0.0, lower=0.0, bw=0.1):
plot_data = dataframe[variable]
plot_data = plot_data[(plot_data > lower) & (plot_data < upper)]
sns.kdeplot(plot_data, bw=bw)
plt.show()
In [ ]:
kde_plot(df, 'construction_year', upper=2016)
kde_plot(df, 'longitude', upper=42)
In [ ]:
kde_plot(df, 'amount_tsh', lower=20000, upper=400000)
Use pdb the Python debugger to debug inside a notebook. Key commands are:
p
: Evaluate and print Python codew
: Where in the stack trace am I?u
: Go up a frame in the stack trace.d
: Go down a frame in the stack trace.c
: Continue executionq
: Stop executionThere are two ways to activate the debugger:
%pdb
: toggles wether or not the debugger will be called on an exception%debug
: enters the debugger at the line where this magic is
In [ ]:
kde_plot(df, 'date_recorded')
In [ ]:
# "1" turns pdb on, "0" turns pdb off
%pdb 1
kde_plot(df, 'date_recorded')
In [ ]:
# turn off debugger
%pdb 0
In [ ]:
def gimme_the_mean(series):
return np.mean(series)
assert gimme_the_mean([0.0]*10) == 0.0
In [ ]:
data = np.random.normal(0.0, 1.0, 1000000)
assert gimme_the_mean(data) == 0.0
In [ ]:
np.testing.assert_almost_equal(gimme_the_mean(data),
0.0,
decimal=1)
In [ ]: