Mt. Hood Snotel Data

Portland Data Science Meetup, November 2014


In [14]:
from ggplot import ggplot
import ggplot as gg
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import pandas as pd
import qgrid
import seaborn as sns

%matplotlib inline
qgrid.nbinstall()

# Pull in the CSV, drop NAs
df = pd.read_csv('mthood_snotel.csv', header=7, parse_dates=['Date']).dropna()
qgrid.show_grid(df, remote_js=True)



In [15]:
# Let's start with some basic histograms of our key dimensions
sns.set_context(rc={"figure.figsize": (15, 7)})
sns.distplot(df['Precipitation Accumulation (in)'], bins=50)


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x111854650>

In [16]:
sns.distplot(df['Snow Water Equivalent (in)'], bins=100)


Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x111972710>

In [17]:
sns.kdeplot(df['Air Temperature Maximum (degF)'], shade=True);
sns.kdeplot(df['Air Temperature Minimum (degF)'], shade=True);
sns.kdeplot(df['Air Temperature Average (degF)'], shade=True);



In [18]:
# We can use Seaborn + IPython interact widgets to do quick comparison of dimensions
subset = df.drop(['Date'], axis=1)
dims = subset.columns.tolist()
@interact 
def linear_comp(x=dims, y=dims):
    sns.jointplot(x, y, data=subset, size=9)



In [19]:
# How closely to average and Maximum temps follow one another?
sns.lmplot("Air Temperature Minimum (degF)", "Air Temperature Maximum (degF)", df, size=10)


Out[19]:
<seaborn.axisgrid.FacetGrid at 0x11287b650>

In [20]:
# Now to use some Pandas timeseries magic to look at monthly trends

# First we need to set the Date column as the Index
indexed = df.set_index('Date')
resampled = indexed.resample('MS').dropna()
qgrid.show_grid(resampled, remote_js=True)



In [21]:
# Exploratory: Pandas plotting should let us take a nice quick look at the data
# Going to use Seaborn to set our plot context
sns.set_context(rc={"figure.figsize": (18, 9)})
resampled.plot()


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x1156aee50>

In [22]:
# ggplot is quite good at handling timeseries. Let's use it to look at long-term trends
resampled['Date'] = resampled.index
(ggplot(gg.aes(x='Date', y='Snow Water Equivalent (in)'), data=resampled) 
 + gg.geom_line()
 + gg.stat_smooth())


Out[22]:
<ggplot: (290809233)>

In [23]:
# What about temperatures?
(ggplot(gg.aes(x='Date', y='Air Temperature Average (degF)'), data=resampled) 
 + gg.geom_line()
 + gg.stat_smooth())


Out[23]:
<ggplot: (286913285)>

In [24]:
# I want to look at monthly statistics, so need to create a column that's just months
resampled['Month'] = resampled.index.month
monthly_grouped = resampled.groupby('Month').mean()
# Matplotlib now has context managers to set styles. Let's try the bmh style
with plt.style.context('bmh'):
    sns.set_context(rc={"figure.figsize": (18, 9)})
    monthly_grouped.plot()



In [25]:
res_dims = resampled.columns.tolist()
@interact 
def res_comp(x=res_dims, y=res_dims):
    sns.jointplot(x, y, data=resampled, size=9)



In [26]:
qgrid.show_grid(monthly_grouped)



In [27]:
# Back to ggplot
monthly_grouped['Month'] = monthly_grouped.index
ggplot(gg.aes(x='Month', y='Snow Water Equivalent (in)'),
       data=monthly_grouped) + gg.geom_line()


Out[27]:
<repr(<ggplot.ggplot.ggplot at 0x115348450>) failed: KeyError: 0>

In [28]:
# Let's do some faceting to look at some monthly statistics
(ggplot(gg.aes(x='Air Temperature Average (degF)'), data=resampled)
 + gg.geom_density(alpha=0.25)
 + gg.facet_wrap('Month')
 + gg.labs("Air Temperature Average (degF)", "Freq"))


Out[28]:
<ggplot: (288846193)>

In [29]:
(ggplot(gg.aes(x='Snow Water Equivalent (in)'), data=resampled)
 + gg.geom_density(alpha=0.25)
 + gg.facet_wrap('Month')
 + gg.labs("Snow Water Equivalent (in)", "Freq"))


Out[29]:
<ggplot: (292700801)>

In [30]:
# Seaborn also has very powerful faceting mechanisms. Let's look at the monthly average temperatures
# again, but in a FacetGrid
months = resampled['Month'].unique()
months.sort()
months
g = sns.FacetGrid(resampled, row="Month", hue="Month", palette="deep",
                  size=1.8, aspect=4, hue_order=months, row_order=months)
g.map(sns.distplot, 'Air Temperature Average (degF)');



In [31]:
pair_cols = resampled[['Snow Water Equivalent (in)', 'Precipitation Accumulation (in)', 
                       'Air Temperature Average (degF)', 'Month']].reset_index(drop=True)
pair_cols.head()
pair = sns.PairGrid(pair_cols, hue="Month", palette="GnBu_d")
pair.map(plt.scatter)
pair.add_legend()



In [37]:
from IPython.core.display import HTML

# Use the following if running locally:
# styles = open("styles/custom.css", "r").read()

# This is for nbviewer:
styles = open("custom.css", "r").read()

HTML(styles)


Out[37]: