In [30]:
# Imports from __future__ in case we're running Python 2
from __future__ import division, print_function
from __future__ import absolute_import, unicode_literals
#pandas
import pandas as pd
# Our numerical workhorses
import numpy as np
import scipy.integrate
# Import pyplot for plotting
import matplotlib.pyplot as plt
# Seaborn, useful for graphics
import seaborn as sns
#pretty tables with plotly
import plotly
plotly.offline.init_notebook_mode() # run at the start of every notebook
# Import Bokeh modules for interactive plotting
import bokeh.io
import bokeh.mpl
import bokeh.plotting
# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline
# This enables SVG graphics inline. There is a bug, so uncomment if it works.
# %config InlineBackend.figure_formats = {'svg',}
# This enables high resolution PNGs. SVG is preferred, but has problems
# rendering vertical and horizontal lines
%config InlineBackend.figure_formats = {'png', 'retina'}
import beeswarm as bs
# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2,
'axes.labelsize': 18,
'axes.titlesize': 18,
'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)
# Set up Bokeh for inline viewing
bokeh.io.output_notebook()
# Suppress future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
In [31]:
def pretty_activity_plot(ax, selector, selection, col, df, xlabel= 'time (hr)', ylabel= \
'activity (sec/min)', lw= 0.25, color= None):
"""
"""
#Make sure selection input is iterable
if type(selection) in [str, int, float]:
selection= [selection]
#plot time traces of column col for each fish
for sel in selection:
df_plot= df[df[selector]==sel] #pull out record of interest
#generate plot
if color is None:
ax.plot(df_plot.zeit, df_plot[col], '-', lw= lw)
else:
ax.plot(df_plot.zeit, df_plot[col], '-', lw= lw, color= color)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_xlim((df_plot.zeit.min(), df_plot.zeit.max()))
ax.set_ylim((0, ax.get_ylim()[1]))
# Overlay night shading
ax.fill_between(df_plot.zeit, 0.0, ax.get_ylim()[1],
where=~df_plot.light, color='gray', alpha=0.3, zorder=0)
return ax
In [32]:
# Load the DataFrame from Tutorial 2a.
df = pd.read_csv('../input/130315_10_minute_intervals.csv')
df_dense = pd.read_csv('../input/130315_1_minute_intervals.csv')
In [33]:
# To plot all the wild type fish, we pass all columns except zeit as y var.
fishes = df.fish[df.genotype=='wt'].unique()
fig, ax = plt.subplots()
ax = pretty_activity_plot(ax, 'fish', fishes, 'activity', df, lw=0.5,
ylabel='activity (sec/10 min)')
In [34]:
fig, ax= plt.subplots(nrows= 3, ncols= 1, sharex= True, sharey=True, figsize= (8,8))
gtypes= ['wt', 'het', 'mut']
for i, gtype in enumerate(gtypes):
fishes= df.fish[df.genotype==gtype].unique()
x_label= y_label= ''
#place the ylabel on the y-axis of the second plot
if i == 1:
ylabel= 'activity (sec/10min)'
#place the xlabel on the x-axis of the third plot (bottom)
elif i== 2:
x_label= 'time (hr)'
ax[i]= pretty_activity_plot(ax[i], 'fish', fishes, 'activity', df, lw= 0.5,\
ylabel= y_label, xlabel= x_label)
ax[i].text(0.175, 0.815, gtype, fontsize= 12, transform= ax[i].transAxes)
In [35]:
#compute mean activity over fish of a given genotype at each timepoint
mean_activity= df.groupby(('zeit', 'genotype', 'light'))['activity'].mean()
mean_activity.head()
Out[35]:
In [36]:
#the above is a multi index df. reset the index to make tidy
mean_activity= mean_activity.reset_index()
mean_activity.head()
Out[36]:
In [40]:
#plot the result using muted color paletter to show up better
with sns.color_palette('muted', 3):
fig, ax= plt.subplots()
for gtype in gtypes:
ax= pretty_activity_plot(ax, 'genotype', gtype, 'activity', \
mean_activity, lw= 1)
plt.legend(('wt', 'het', 'mut'), loc= 'upper left')
In [41]:
#get a view into the third night
df_n2= df[(df['day']==2) & (df['light']==False)]
mean_night_act= df_n2.groupby(('fish', 'genotype'))['activity'].mean().reset_index()
list_of_acts= [mean_night_act.activity[mean_night_act.genotype== 'wt'],
mean_night_act.activity[mean_night_act.genotype=='het'],
mean_night_act.activity[mean_night_act.genotype=='mut']]
#generate beeswarm plot
_ = bs.beeswarm(list_of_acts, labels=['wt', 'het', 'mut'])
In [42]:
import scipy.special
def student_t(mu, x):
"""
Returns the student t distribution for values of mu with data x
"""
#Number of data
n= len(x)
#data mean
x_mean= x.mean()
#r^2
r2= ((x-x_mean)**2).sum()/n
t= (1.0 + (mu-x_mean)**2/r2)**(-n/2.0)
return -scipy.special.beta(-0.5, n/2.0)/2.0/np.pi/np.sqrt(r2)*t
In [43]:
mu= np.linspace(0.0, 60.0, 300)
#compute posterior for each sample
post_wt= student_t(mu, mean_night_act.activity[mean_night_act.genotype=='wt'])
post_het= student_t(mu, mean_night_act.activity[mean_night_act.genotype=='het'])
post_mt= student_t(mu, mean_night_act.activity[mean_night_act.genotype=='mut'])
plt.plot(mu, post_wt)
plt.plot(mu, post_het)
plt.plot(mu, post_mt)
plt.xlabel(r'$\mu$(sec/10min)')
plt.ylabel(r'$P(\mu|D,I)$')
lg= plt.legend(('wt', 'het', 'mut'), loc= 'upper right')
In [46]:
import numpy as np
def bout_lengths(s, wake_threshold= 1e-5, rest= True):
"""
Given an activity series, returns length of rest bouts/ length
of active_bouts if rest is True/False
First return value is array of rest bout lengths and second return value
is array of active bout lenghts
The first/last bouts are not included bc we don't know where they
begin/end. The exception is if the fish is always asleep or awake
"""
#get boolean for activity
active= (s > wake_threshold)
#convert to np array
if type(active) is pd.core.series.Series:
active= active.values
#if there isn't even one switch, return an array in the right format
if np.all(active):
if rest:
return np.array([])
else:
return np.array([len(s)])
elif np.all(~active):
if rest:
return np.array([len(s)])
else:
return np.array([])
#if there's at least one switch, figure out where the switches
#happen
#find indices where state switches
#switches[i] is the index of first timepoint in it switched to
switches= np.where(np.diff(active))[0] +1
#compute bout lengths from switches again using np.diff
bouts= np.diff(switches)
if active[0]: #if first entry was active
if rest:
return bouts[::2] #take every second entry of bouts
else:
return bouts[1::2] #take every second entry of bouts starting from index 1 entry
else:
if rest:
return bouts[1::2]
else:
return bouts[::2]
In [86]:
#get indixes for third night
inds= (~df_dense.light) & (df_dense.day==2)
#group the df
df_gb= df_dense[inds].groupby(('fish', 'genotype'))['activity']
#compute rest bout lengths
df_rest_bout= df_gb.apply(bout_lengths, rest= True).reset_index()
In [87]:
df_rest_bout.head()
Out[87]:
In [88]:
#Rename activity column
df_rest_bout= df_rest_bout.rename(columns={'activity': 'rest_bout_lengths'})
In [89]:
#Compute the mean of mean bout lengths
df_rest_bout['mean_rest_bout_length']= \
df_rest_bout['rest_bout_lengths'].map(lambda x: x.mean())
#take a look
df_rest_bout.head()
Out[89]:
In [90]:
#replace nans with zeros to prevent the error seen above
df_rest_bout['mean_rest_bout_length']\
= df_rest_bout['mean_rest_bout_length'].fillna(0)
In [91]:
rest_bouts= []
for gtype in gtypes:
inds= df_rest_bout.genotype == gtype
rest_bouts.append(df_rest_bout[inds]['mean_rest_bout_length'].values)
#beeswarm plots
_= bs.beeswarm(rest_bouts, labels= ['wt','het','mut'])
plt.grid(axis= 'x')
plt.ylabel('Mean rest bout length (min)')
Out[91]:
In [92]:
#set up values of mu to consider
mu= np.linspace(0.0, 4.0, 300)
#compute posterior for each sample
post_wt= student_t(mu, rest_bouts[0])
post_het= student_t(mu, rest_bouts[1])
post_mut= student_t(mu, rest_bouts[2])
plt.plot(mu, post_wt)
plt.plot(mu, post_het)
plt.plot(mu, post_mut)
plt.margins(y= 0.02)
plt.xlabel(r'mean of mean rest bout length, $\mu_\mathrm{mm}$ (min)')
plt.ylabel(r'$P(\mu_\mathrm{mm}|D,I)$')
plt.legend(('wt', 'het', 'mut'), loc='upper right')
Out[92]:
In [94]:
#group the rest bout df by genotype
df_rest_bout_gb= df_rest_bout.groupby('genotype')['rest_bout_lengths']
#concatenate arrays of each genotype
wt_bouts= np.concatenate(df_rest_bout_gb.get_group('wt').values)
het_bouts= np.concatenate(df_rest_bout_gb.get_group('het').values)
mut_bouts= np.concatenate(df_rest_bout_gb.get_group('mut').values)
In [97]:
#set up bins
bins= np.arange(0.5, wt_bouts.max()+1.5)
#Plot histograms
_= plt.hist(wt_bouts, bins= bins, align= 'mid', normed= True)
# Labels
plt.xlabel('rest bout length')
plt.ylabel('normed count')
Out[97]:
In [105]:
def posterior_exp(lam, tau):
"""
Posterior probability distribution for exponentially
distributed waiting times tau
"""
n= len(tau)
a= tau.sum()
log_dist= n*np.log(a) - (n+1)*np.log(lam) - a/lam -\
scipy.special.gammaln(n)
return np.exp(log_dist)
In [107]:
#set up values of mu
lam= np.linspace(1, 3, 600)
#compute posterior
post_wt= posterior_exp(lam, wt_bouts)
post_het= posterior_exp(lam, het_bouts)
post_mut= posterior_exp(lam, mut_bouts)
# Plot the result
plt.plot(lam, post_wt)
plt.plot(lam, post_het)
plt.plot(lam, post_mut)
plt.margins(y=0.02)
plt.xlabel(r'mean rest bout length, $\mu_\mathrm{bout}$ (min)')
plt.ylabel(r'$P(\mu_\mathrm{bout}|D,I)$')
plt.legend(('wt', 'het', 'mut'), loc='upper right')
Out[107]: