In [14]:
from ggplot import ggplot
import ggplot as gg
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import pandas as pd
import qgrid
import seaborn as sns
%matplotlib inline
qgrid.nbinstall()
# Pull in the CSV, drop NAs
df = pd.read_csv('mthood_snotel.csv', header=7, parse_dates=['Date']).dropna()
qgrid.show_grid(df, remote_js=True)
In [15]:
# Let's start with some basic histograms of our key dimensions
sns.set_context(rc={"figure.figsize": (15, 7)})
sns.distplot(df['Precipitation Accumulation (in)'], bins=50)
Out[15]:
In [16]:
sns.distplot(df['Snow Water Equivalent (in)'], bins=100)
Out[16]:
In [17]:
sns.kdeplot(df['Air Temperature Maximum (degF)'], shade=True);
sns.kdeplot(df['Air Temperature Minimum (degF)'], shade=True);
sns.kdeplot(df['Air Temperature Average (degF)'], shade=True);
In [18]:
# We can use Seaborn + IPython interact widgets to do quick comparison of dimensions
subset = df.drop(['Date'], axis=1)
dims = subset.columns.tolist()
@interact
def linear_comp(x=dims, y=dims):
sns.jointplot(x, y, data=subset, size=9)
In [19]:
# How closely to average and Maximum temps follow one another?
sns.lmplot("Air Temperature Minimum (degF)", "Air Temperature Maximum (degF)", df, size=10)
Out[19]:
In [20]:
# Now to use some Pandas timeseries magic to look at monthly trends
# First we need to set the Date column as the Index
indexed = df.set_index('Date')
resampled = indexed.resample('MS').dropna()
qgrid.show_grid(resampled, remote_js=True)
In [21]:
# Exploratory: Pandas plotting should let us take a nice quick look at the data
# Going to use Seaborn to set our plot context
sns.set_context(rc={"figure.figsize": (18, 9)})
resampled.plot()
Out[21]:
In [22]:
# ggplot is quite good at handling timeseries. Let's use it to look at long-term trends
resampled['Date'] = resampled.index
(ggplot(gg.aes(x='Date', y='Snow Water Equivalent (in)'), data=resampled)
+ gg.geom_line()
+ gg.stat_smooth())
Out[22]:
In [23]:
# What about temperatures?
(ggplot(gg.aes(x='Date', y='Air Temperature Average (degF)'), data=resampled)
+ gg.geom_line()
+ gg.stat_smooth())
Out[23]:
In [24]:
# I want to look at monthly statistics, so need to create a column that's just months
resampled['Month'] = resampled.index.month
monthly_grouped = resampled.groupby('Month').mean()
# Matplotlib now has context managers to set styles. Let's try the bmh style
with plt.style.context('bmh'):
sns.set_context(rc={"figure.figsize": (18, 9)})
monthly_grouped.plot()
In [25]:
res_dims = resampled.columns.tolist()
@interact
def res_comp(x=res_dims, y=res_dims):
sns.jointplot(x, y, data=resampled, size=9)
In [26]:
qgrid.show_grid(monthly_grouped)
In [27]:
# Back to ggplot
monthly_grouped['Month'] = monthly_grouped.index
ggplot(gg.aes(x='Month', y='Snow Water Equivalent (in)'),
data=monthly_grouped) + gg.geom_line()
Out[27]:
In [28]:
# Let's do some faceting to look at some monthly statistics
(ggplot(gg.aes(x='Air Temperature Average (degF)'), data=resampled)
+ gg.geom_density(alpha=0.25)
+ gg.facet_wrap('Month')
+ gg.labs("Air Temperature Average (degF)", "Freq"))
Out[28]:
In [29]:
(ggplot(gg.aes(x='Snow Water Equivalent (in)'), data=resampled)
+ gg.geom_density(alpha=0.25)
+ gg.facet_wrap('Month')
+ gg.labs("Snow Water Equivalent (in)", "Freq"))
Out[29]:
In [30]:
# Seaborn also has very powerful faceting mechanisms. Let's look at the monthly average temperatures
# again, but in a FacetGrid
months = resampled['Month'].unique()
months.sort()
months
g = sns.FacetGrid(resampled, row="Month", hue="Month", palette="deep",
size=1.8, aspect=4, hue_order=months, row_order=months)
g.map(sns.distplot, 'Air Temperature Average (degF)');
In [31]:
pair_cols = resampled[['Snow Water Equivalent (in)', 'Precipitation Accumulation (in)',
'Air Temperature Average (degF)', 'Month']].reset_index(drop=True)
pair_cols.head()
pair = sns.PairGrid(pair_cols, hue="Month", palette="GnBu_d")
pair.map(plt.scatter)
pair.add_legend()
In [37]:
from IPython.core.display import HTML
# Use the following if running locally:
# styles = open("styles/custom.css", "r").read()
# This is for nbviewer:
styles = open("custom.css", "r").read()
HTML(styles)
Out[37]: