notebook.community

Edit and run



In [30]:

    
# Imports from __future__ in case we're running Python 2
from __future__ import division, print_function
from __future__ import absolute_import, unicode_literals

#pandas
import pandas as pd

# Our numerical workhorses
import numpy as np
import scipy.integrate

# Import pyplot for plotting
import matplotlib.pyplot as plt

# Seaborn, useful for graphics
import seaborn as sns

#pretty tables with plotly
import plotly
plotly.offline.init_notebook_mode() # run at the start of every notebook


# Import Bokeh modules for interactive plotting
import bokeh.io
import bokeh.mpl
import bokeh.plotting

# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline

# This enables SVG graphics inline.  There is a bug, so uncomment if it works.
# %config InlineBackend.figure_formats = {'svg',}

# This enables high resolution PNGs. SVG is preferred, but has problems
# rendering vertical and horizontal lines
%config InlineBackend.figure_formats = {'png', 'retina'}


import beeswarm as bs

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)

# Set up Bokeh for inline viewing
bokeh.io.output_notebook()

# Suppress future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)









    











    




    


    

    
        
        BokehJS successfully loaded.



In [31]:

    
def pretty_activity_plot(ax, selector, selection, col, df, xlabel= 'time (hr)', ylabel= \
                        'activity (sec/min)', lw= 0.25, color= None):
    """
    """
    
    #Make sure selection input is iterable
    if type(selection) in [str, int, float]:
        selection= [selection]
        
    #plot time traces of column col for each fish
    for sel in selection:
        df_plot= df[df[selector]==sel] #pull out record of interest
        
        #generate plot
        if color is None:
            ax.plot(df_plot.zeit, df_plot[col], '-', lw= lw)
        else:
            ax.plot(df_plot.zeit, df_plot[col], '-', lw= lw, color= color)
        
        
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    
    ax.set_xlim((df_plot.zeit.min(), df_plot.zeit.max()))
    ax.set_ylim((0, ax.get_ylim()[1]))
    
    # Overlay night shading
    ax.fill_between(df_plot.zeit, 0.0, ax.get_ylim()[1], 
                    where=~df_plot.light, color='gray', alpha=0.3, zorder=0)    
    
    
    return ax



In [32]:

    
# Load the DataFrame from Tutorial 2a.
df = pd.read_csv('../input/130315_10_minute_intervals.csv')
df_dense = pd.read_csv('../input/130315_1_minute_intervals.csv')



In [33]:

    
# To plot all the wild type fish, we pass all columns except zeit as y var.
fishes = df.fish[df.genotype=='wt'].unique()
fig, ax = plt.subplots()
ax = pretty_activity_plot(ax, 'fish', fishes, 'activity', df, lw=0.5, 
                          ylabel='activity (sec/10 min)')



In [34]:

    
fig, ax= plt.subplots(nrows= 3, ncols= 1, sharex= True, sharey=True, figsize= (8,8))

gtypes= ['wt', 'het', 'mut']

for i, gtype in enumerate(gtypes):
    fishes= df.fish[df.genotype==gtype].unique()
    x_label= y_label= ''
    
    #place the ylabel on the y-axis of the second plot
    if i == 1:
        ylabel= 'activity (sec/10min)'
    #place the xlabel on the x-axis of the third plot (bottom)
    elif i== 2:
        x_label= 'time (hr)'
    
    ax[i]= pretty_activity_plot(ax[i], 'fish', fishes, 'activity', df, lw= 0.5,\
                                ylabel= y_label, xlabel= x_label)
    ax[i].text(0.175, 0.815, gtype, fontsize= 12, transform= ax[i].transAxes)



In [35]:

    
#compute mean activity over fish of a given genotype at each timepoint
mean_activity= df.groupby(('zeit', 'genotype', 'light'))['activity'].mean()

mean_activity.head()









    Out[35]:





zeit   genotype  light
9.819  het       True      7.900000
       mut       True      9.359091
       wt        True     16.170588
9.986  het       True      4.891176
       mut       True      3.618182
Name: activity, dtype: float64



In [36]:

    
#the above is a multi index df. reset the index to make tidy
mean_activity= mean_activity.reset_index()

mean_activity.head()



In [40]:

    
#plot the result using muted color paletter to show up better
with sns.color_palette('muted', 3):
    fig, ax= plt.subplots()
    for gtype in gtypes:
        ax= pretty_activity_plot(ax, 'genotype', gtype, 'activity', \
                                mean_activity, lw= 1)
    plt.legend(('wt', 'het', 'mut'), loc= 'upper left')









    



/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/matplotlib/__init__.py:892: UserWarning:

axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.

/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/matplotlib/__init__.py:872: UserWarning:

axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.



In [41]:

    
#get a view into the third night
df_n2= df[(df['day']==2) & (df['light']==False)]

mean_night_act= df_n2.groupby(('fish', 'genotype'))['activity'].mean().reset_index()
   
list_of_acts= [mean_night_act.activity[mean_night_act.genotype== 'wt'],
              mean_night_act.activity[mean_night_act.genotype=='het'],
              mean_night_act.activity[mean_night_act.genotype=='mut']]
               
#generate beeswarm plot
_ = bs.beeswarm(list_of_acts, labels=['wt', 'het', 'mut'])



In [42]:

    
import scipy.special

def student_t(mu, x):
    """
    Returns the student t distribution for values of mu with data x
    """
    
    #Number of data
    n= len(x)
    
    #data mean
    x_mean= x.mean()
    
    #r^2
    r2= ((x-x_mean)**2).sum()/n
    
    t= (1.0 + (mu-x_mean)**2/r2)**(-n/2.0)
    
    return -scipy.special.beta(-0.5, n/2.0)/2.0/np.pi/np.sqrt(r2)*t



In [43]:

    
mu= np.linspace(0.0, 60.0, 300)

#compute posterior for each sample
post_wt= student_t(mu, mean_night_act.activity[mean_night_act.genotype=='wt'])
post_het= student_t(mu, mean_night_act.activity[mean_night_act.genotype=='het'])
post_mt= student_t(mu, mean_night_act.activity[mean_night_act.genotype=='mut'])

plt.plot(mu, post_wt)
plt.plot(mu, post_het)
plt.plot(mu, post_mt)

plt.xlabel(r'$\mu$(sec/10min)')
plt.ylabel(r'$P(\mu|D,I)$')
lg= plt.legend(('wt', 'het', 'mut'), loc= 'upper right')

Bouts



In [46]:

    
import numpy as np
def bout_lengths(s, wake_threshold= 1e-5, rest= True):
    """
    Given an activity series, returns length of rest bouts/ length
    of active_bouts if rest is True/False
    
    First return value is array of rest bout lengths and second return value 
    is array of active bout lenghts
    
    The first/last bouts are not included bc we don't know where they 
    begin/end. The exception is if the fish is always asleep or awake
    """
    
    #get boolean for activity
    active= (s > wake_threshold)
    
    #convert to np array
    if type(active) is pd.core.series.Series:
        active= active.values
    
    #if there isn't even one switch, return an array in the right format
    if np.all(active):
        if rest: 
            return np.array([])
        else:
            return np.array([len(s)])
        
    elif np.all(~active):
        if rest:
            return np.array([len(s)])
        else:
            return np.array([])
        
    #if there's at least one switch, figure out where the switches
    #happen
    
    #find indices where state switches
    #switches[i] is the index of first timepoint in it switched to
    switches= np.where(np.diff(active))[0] +1
    
    #compute bout lengths from switches again using np.diff
    bouts= np.diff(switches)
    
    if active[0]: #if first entry was active
        if rest:
            return bouts[::2] #take every second entry of bouts
        else: 
            return bouts[1::2] #take every second entry of bouts starting from index 1 entry 
    else:
        if rest:
            return bouts[1::2]
        else:
            return bouts[::2]



In [86]:

    
#get indixes for third night
inds= (~df_dense.light) & (df_dense.day==2)

#group the df
df_gb= df_dense[inds].groupby(('fish', 'genotype'))['activity']

#compute rest bout lengths
df_rest_bout= df_gb.apply(bout_lengths, rest= True).reset_index()



In [87]:

    
df_rest_bout.head()









    Out[87]:






  
    
      
      fish
      genotype
      activity
    
  
  
    
      0
      FISH1
      het
      [1, 3, 3, 1, 5, 1, 2, 3, 4, 2, 3, 1, 4, 1, 1, ...
    
    
      1
      FISH10
      het
      [3, 4, 11, 1, 2, 2, 1, 1, 4, 3, 1, 1, 5, 2, 1,...
    
    
      2
      FISH11
      mut
      [1, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 7, ...
    
    
      3
      FISH12
      mut
      [1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 5, 4, 1, 1, 1, ...
    
    
      4
      FISH13
      mut
      [15, 4, 1, 3, 7, 3, 1, 1, 2, 5, 3, 2, 2, 1, 2,...



In [88]:

    
#Rename activity column
df_rest_bout= df_rest_bout.rename(columns={'activity': 'rest_bout_lengths'})



In [89]:

    
#Compute the mean of mean bout lengths
df_rest_bout['mean_rest_bout_length']= \
        df_rest_bout['rest_bout_lengths'].map(lambda x: x.mean())
    
#take a look
df_rest_bout.head()









    



/Users/davidangeles/anaconda/envs/py35/lib/python3.5/site-packages/numpy/core/_methods.py:59: RuntimeWarning:

Mean of empty slice.







    Out[89]:






  
    
      
      fish
      genotype
      rest_bout_lengths
      mean_rest_bout_length
    
  
  
    
      0
      FISH1
      het
      [1, 3, 3, 1, 5, 1, 2, 3, 4, 2, 3, 1, 4, 1, 1, ...
      2.486486
    
    
      1
      FISH10
      het
      [3, 4, 11, 1, 2, 2, 1, 1, 4, 3, 1, 1, 5, 2, 1,...
      1.700855
    
    
      2
      FISH11
      mut
      [1, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 7, ...
      2.255556
    
    
      3
      FISH12
      mut
      [1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 5, 4, 1, 1, 1, ...
      1.529412
    
    
      4
      FISH13
      mut
      [15, 4, 1, 3, 7, 3, 1, 1, 2, 5, 3, 2, 2, 1, 2,...
      2.373626



In [90]:

    
#replace nans with zeros to prevent the error seen above
df_rest_bout['mean_rest_bout_length']\
    = df_rest_bout['mean_rest_bout_length'].fillna(0)



In [91]:

    
rest_bouts= []
for gtype in gtypes:
    inds= df_rest_bout.genotype == gtype
    rest_bouts.append(df_rest_bout[inds]['mean_rest_bout_length'].values)

#beeswarm plots
_= bs.beeswarm(rest_bouts, labels= ['wt','het','mut'])
plt.grid(axis= 'x')
plt.ylabel('Mean rest bout length (min)')









    Out[91]:





<matplotlib.text.Text at 0x10f9c5668>



In [92]:

    
#set up values of mu to consider
mu= np.linspace(0.0, 4.0, 300)

#compute posterior for each sample
post_wt= student_t(mu, rest_bouts[0])
post_het= student_t(mu, rest_bouts[1])
post_mut= student_t(mu, rest_bouts[2])



plt.plot(mu, post_wt)
plt.plot(mu, post_het)
plt.plot(mu, post_mut)
plt.margins(y= 0.02)
plt.xlabel(r'mean of mean rest bout length, $\mu_\mathrm{mm}$ (min)')
plt.ylabel(r'$P(\mu_\mathrm{mm}|D,I)$')
plt.legend(('wt', 'het', 'mut'), loc='upper right')









    Out[92]:





<matplotlib.legend.Legend at 0x10ea6e550>

Mean of pooled rest bout length

The above was a mean of means -- this should be normally distributed but pooling the rest bout lengths of all fish should yield an exp. decaying distribution



In [94]:

    
#group the rest bout df by genotype
df_rest_bout_gb= df_rest_bout.groupby('genotype')['rest_bout_lengths']

#concatenate arrays of each genotype
wt_bouts= np.concatenate(df_rest_bout_gb.get_group('wt').values)
het_bouts= np.concatenate(df_rest_bout_gb.get_group('het').values)
mut_bouts= np.concatenate(df_rest_bout_gb.get_group('mut').values)



In [97]:

    
#set up bins
bins= np.arange(0.5, wt_bouts.max()+1.5)

#Plot histograms
_= plt.hist(wt_bouts, bins= bins, align= 'mid', normed= True)

# Labels
plt.xlabel('rest bout length')
plt.ylabel('normed count')









    Out[97]:





<matplotlib.text.Text at 0x10fa478d0>

The above histogram looks exp. distributed. Such an exponential distribution can be fit by using

$$P(\lambda| D, I)= \frac{a^n}{(n-1)!\lambda^{n+1}} e^{-\frac{a}{\lambda}}$$

$$a= \sum\limits_{{i\in D}}{\tau_i}$$



In [105]:

    
def posterior_exp(lam, tau):
    """
    Posterior probability distribution for exponentially
    distributed waiting times tau
    """
    n= len(tau)
    a= tau.sum()
    log_dist= n*np.log(a) - (n+1)*np.log(lam) - a/lam -\
                scipy.special.gammaln(n)
    
    return np.exp(log_dist)



In [107]:

    
#set up values of mu
lam= np.linspace(1, 3, 600)

#compute posterior
post_wt= posterior_exp(lam, wt_bouts)
post_het= posterior_exp(lam, het_bouts)
post_mut= posterior_exp(lam, mut_bouts)


# Plot the result
plt.plot(lam, post_wt)
plt.plot(lam, post_het)
plt.plot(lam, post_mut)
plt.margins(y=0.02)
plt.xlabel(r'mean rest bout length, $\mu_\mathrm{bout}$ (min)')
plt.ylabel(r'$P(\mu_\mathrm{bout}|D,I)$')
plt.legend(('wt', 'het', 'mut'), loc='upper right')









    Out[107]:





<matplotlib.legend.Legend at 0x1103ecba8>

	zeit	genotype	light	activity
0	9.819	het	True	7.900000
1	9.819	mut	True	9.359091
2	9.819	wt	True	16.170588
3	9.986	het	True	4.891176
4	9.986	mut	True	3.618182

	fish	genotype	activity
0	FISH1	het	[1, 3, 3, 1, 5, 1, 2, 3, 4, 2, 3, 1, 4, 1, 1, ...
1	FISH10	het	[3, 4, 11, 1, 2, 2, 1, 1, 4, 3, 1, 1, 5, 2, 1,...
2	FISH11	mut	[1, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 7, ...
3	FISH12	mut	[1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 5, 4, 1, 1, 1, ...
4	FISH13	mut	[15, 4, 1, 3, 7, 3, 1, 1, 2, 5, 3, 2, 2, 1, 2,...

	fish	genotype	rest_bout_lengths	mean_rest_bout_length
0	FISH1	het	[1, 3, 3, 1, 5, 1, 2, 3, 4, 2, 3, 1, 4, 1, 1, ...	2.486486
1	FISH10	het	[3, 4, 11, 1, 2, 2, 1, 1, 4, 3, 1, 1, 5, 2, 1,...	1.700855
2	FISH11	mut	[1, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 7, ...	2.255556
3	FISH12	mut	[1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 5, 4, 1, 1, 1, ...	1.529412
4	FISH13	mut	[15, 4, 1, 3, 7, 3, 1, 1, 2, 5, 3, 2, 2, 1, 2,...	2.373626