In [28]:
# Imports from __future__ in case we're running Python 2
from __future__ import division, print_function
from __future__ import absolute_import, unicode_literals
#pandas
import pandas as pd
# Our numerical workhorses
import numpy as np
import scipy.integrate
# Import pyplot for plotting
import matplotlib.pyplot as plt
# Seaborn, useful for graphics
import seaborn as sns
#pretty tables with plotly
import plotly
plotly.offline.init_notebook_mode() # run at the start of every notebook
# Import Bokeh modules for interactive plotting
import bokeh.io
import bokeh.mpl
import bokeh.plotting
# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline
# This enables SVG graphics inline. There is a bug, so uncomment if it works.
# %config InlineBackend.figure_formats = {'svg',}
# This enables high resolution PNGs. SVG is preferred, but has problems
# rendering vertical and horizontal lines
%config InlineBackend.figure_formats = {'png', 'retina'}
# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2,
'axes.labelsize': 18,
'axes.titlesize': 18,
'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)
# Set up Bokeh for inline viewing
bokeh.io.output_notebook()
In [9]:
import beeswarm as bs
In [11]:
fname= '../input/frog_tongue_adhesion.csv'
df= pd.read_csv(fname, comment= '#')
In [13]:
df['impact force (mN)']= df['impact force (mN)'].astype(np.float64)
In [15]:
inds= df['impact force (mN)'] > 1000
df_big_force= df[inds]
df_big_force
Out[15]:
In [17]:
# Make a dictionary to rename columns
rename_dict = {'trial number' : 'trial',
'contact area with mucus / contact area without mucus' : 'ca_ratio'}
# Rename the columns
df = df.rename(columns=rename_dict)
df = df.rename(columns={'impact force (mN)': 'impf'})
In [19]:
plt.plot(df.impf, 'o')
plt.margins(0.02)
plt.xlabel('order in DataFrame')
plt.ylabel('impace force (mN)')
Out[19]:
In [20]:
#histogram plot; bins kwarg gives number of bars
_= plt.hist(df.impf, bins=20, normed= False)
plt.xlabel('impace force (mN)')
plt.ylabel('freq')
Out[20]:
In [33]:
#See how many data points we have per frog
data_matrix= (['FrogID', 'Sample No.'],
['I', df.ID[df.ID=='I'].count()],
['II', df.ID[df.ID=='II'].count()],
['III', df.ID[df.ID=='III'].count()],
['IV', df.ID[df.ID=='IV'].count()])
table = FF.create_table(data_matrix)
py.iplot(table, filename='my_first_table')
Out[33]:
In [36]:
#Compute mean impacts
mean_impact= df.groupby('ID').impf.mean()
#Use SD as error bar
std_impacts= df.groupby('ID').impf.std()
#Bar locs
x= np.arange(4)
#bar width
bar_width= 0.5
#bar labels
bar_labels= df.ID.unique()
#plot
plt.bar(x, mean_impact, yerr= std_impacts, width= bar_width,\
align= 'center', error_kw= {'ecolor' : 'black'})
plt.grid(axis= 'x')
plt.xticks(x, bar_labels)
plt.ylabel('impact force (mN)')
Out[36]:
In [37]:
yerr= df.groupby('ID').impf.std()
#plot
ax= df.groupby('ID').impf.mean().plot(kind= 'bar', yerr= yerr)
ax.grid(axis= 'x')
ax.set_ylabel('Impact Force (mN)')
Out[37]:
In [39]:
#Now with seaborn
ax= sns.boxplot(x= 'ID', y= 'impf', data= df, width= 0.5)
#relabel axes
ax.set_xlabel('Frog')
ax.set_ylabel('Impact Force (mN)')
Out[39]:
In [41]:
#Make a jitter plot
ax= sns.stripplot(x= 'ID', y= 'impf', data= df, jitter= True, alpha= 0.6)
#Relabel axes
ax.set_xlabel('Frog')
ax.set_ylabel('Impact Force (mN)')
Out[41]:
In [44]:
#Use seaborn to make a box plot
ax= sns.boxplot(x= 'ID', y= 'impf', data= df, width= 0.5)
#make a jitter plot
ax= sns.stripplot(x= 'ID', y= 'impf', data= df, jitter= True, marker= 'o',\
alpha= 0.8, edgecolor= 'white')
#relabel axes
ax.set_xlabel('Frog')
ax.set_ylabel('Impact Force (mN)')
Out[44]:
In [47]:
list_of_impfs= [df.impf[df.ID== 'I'], df.impf[df.ID=='II'], \
df.impf[df.ID=='III'], df.impf[df.ID=='IV']]
#generate beeswarm
_ = bs.beeswarm(list_of_impfs, labels= ['I', 'II', 'III', 'IV'])
plt.grid(axis= 'x')
plt.ylabel('impact force (mN)')
Out[47]:
In [52]:
#get list of dates, use unique
dates= df.date.unique()
n_dates= len(dates)
#assign colors to date names with colormap between 0 and 1
colors= []
for i in range(n_dates):
colors.append(plt.cm.Set1(float(i)/float(n_dates)))
#make a dictionary of dates and colors
color_dict= dict(zip(dates, colors))
#sort by ID to ensure color labels are in correct order in beeswarm plot
df_sorted= df.sort_values('ID')
#color each point by the date it was measured in and make a new colors array
colors= []
#this is why we sorted above^^^
for i in range(len(df)):
colors.append(color_dict[df_sorted.date.iloc[i]])
#Make beeswarm
bs_plot, ax= bs.beeswarm(list_of_impfs, labels= ['I', 'II', 'III','IV'],\
col= colors)
In [55]:
#plot adhesive force vs impact force
df.rename(columns= {'adhesive force (mN)' : 'adhf'}, inplace= True)
#Plot adhesive force vs impact force
plt.plot(df.impf, df.adhf, 'o', alpha= 0.6)
plt.xlabel('impact force (mN)')
plt.ylabel('adhesive force (mN)')
Out[55]: