The pre-release version of this pipeline assumes the data to be in a very specific format. Please contact alex@gavruskin.com if you wish to give it a try.
git clone https://github.com/gavruskin/microinteractions.git
cd microinteractions
total_CFUs
, DailyFecundity
, Development
, Survival
treat
python data_preprocess_total_CFUs.py
python data_preprocess_DailyFecundity.py
python data_preprocess_Development.py
python data_preprocess_Survival.py
jupyter notebook
Load dependencies:
In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib
import matplotlib.pyplot as plt
import sys
from IPython.display import set_matplotlib_formats
set_matplotlib_formats("png", "pdf", "svg")
matplotlib.style.use('ggplot')
%matplotlib inline
In [2]:
data_total_CFUs = pd.read_csv("flygut_cfus_expts1345_totals_processed.csv")
lm_total_CFUs = smf.ols(formula="total_CFUs ~ a + a1 + a2 + a3 + a4 + a5 +"
"b12 + b13 + b14 + b15 + b23 + b24 + b25 + b34 + b35 + b45 +"
"c123 + c124 + c125 + c134 + c135 + c145 + c234 + c235 + c245 + c345 +"
"d1234 + d1235 + d1245 + d1345 + d2345 + e12345", data=data_total_CFUs).fit()
In [3]:
lm_total_CFUs.summary()
Out[3]:
In [4]:
conf_int_total_CFUs = pd.DataFrame(lm_total_CFUs.conf_int())
conf_int_total_CFUs[2] = [lm_total_CFUs.params.a,
lm_total_CFUs.params.a,
lm_total_CFUs.params.a1,
lm_total_CFUs.params.a2,
lm_total_CFUs.params.a3,
lm_total_CFUs.params.a4,
lm_total_CFUs.params.a5,
lm_total_CFUs.params.b12,
lm_total_CFUs.params.b13,
lm_total_CFUs.params.b14,
lm_total_CFUs.params.b15,
lm_total_CFUs.params.b23,
lm_total_CFUs.params.b24,
lm_total_CFUs.params.b25,
lm_total_CFUs.params.b34,
lm_total_CFUs.params.b35,
lm_total_CFUs.params.b45,
lm_total_CFUs.params.c123,
lm_total_CFUs.params.c124,
lm_total_CFUs.params.c125,
lm_total_CFUs.params.c134,
lm_total_CFUs.params.c135,
lm_total_CFUs.params.c145,
lm_total_CFUs.params.c234,
lm_total_CFUs.params.c235,
lm_total_CFUs.params.c245,
lm_total_CFUs.params.c345,
lm_total_CFUs.params.d1234,
lm_total_CFUs.params.d1235,
lm_total_CFUs.params.d1245,
lm_total_CFUs.params.d1345,
lm_total_CFUs.params.d2345,
lm_total_CFUs.params.e12345]
conf_int_total_CFUs.columns = ["95% conf. int. bottom", "95% conf. int. top", "coef"]
# Set Intercept and a to 0, as otherwise the rest of the plot vanishes.
conf_int_total_CFUs["coef"].Intercept = 0
conf_int_total_CFUs["95% conf. int. bottom"].Intercept = 0
conf_int_total_CFUs["95% conf. int. top"].Intercept = 0
conf_int_total_CFUs["coef"].a = 0
conf_int_total_CFUs["95% conf. int. bottom"].a = 0
conf_int_total_CFUs["95% conf. int. top"].a = 0
conf_int_total_CFUs.plot.bar(figsize=(20,10))
Out[4]:
In [5]:
data_DailyFecundity = pd.read_csv("DailyFecundityData_processed.csv")
lm_DailyFecundity = smf.ols(formula="DailyFecundity ~ a + a1 + a2 + a3 + a4 + a5 +"
"b12 + b13 + b14 + b15 + b23 + b24 + b25 + b34 + b35 + b45 +"
"c123 + c124 + c125 + c134 + c135 + c145 + c234 + c235 + c245 + c345 +"
"d1234 + d1235 + d1245 + d1345 + d2345 + e12345", data=data_DailyFecundity).fit()
In [6]:
lm_DailyFecundity.summary()
Out[6]:
In [7]:
conf_int_DailyFecundity = pd.DataFrame(lm_DailyFecundity.conf_int())
conf_int_DailyFecundity[2] = [lm_DailyFecundity.params.a,
lm_DailyFecundity.params.a,
lm_DailyFecundity.params.a1,
lm_DailyFecundity.params.a2,
lm_DailyFecundity.params.a3,
lm_DailyFecundity.params.a4,
lm_DailyFecundity.params.a5,
lm_DailyFecundity.params.b12,
lm_DailyFecundity.params.b13,
lm_DailyFecundity.params.b14,
lm_DailyFecundity.params.b15,
lm_DailyFecundity.params.b23,
lm_DailyFecundity.params.b24,
lm_DailyFecundity.params.b25,
lm_DailyFecundity.params.b34,
lm_DailyFecundity.params.b35,
lm_DailyFecundity.params.b45,
lm_DailyFecundity.params.c123,
lm_DailyFecundity.params.c124,
lm_DailyFecundity.params.c125,
lm_DailyFecundity.params.c134,
lm_DailyFecundity.params.c135,
lm_DailyFecundity.params.c145,
lm_DailyFecundity.params.c234,
lm_DailyFecundity.params.c235,
lm_DailyFecundity.params.c245,
lm_DailyFecundity.params.c345,
lm_DailyFecundity.params.d1234,
lm_DailyFecundity.params.d1235,
lm_DailyFecundity.params.d1245,
lm_DailyFecundity.params.d1345,
lm_DailyFecundity.params.d2345,
lm_DailyFecundity.params.e12345]
conf_int_DailyFecundity.columns = ["95% conf. int. bottom", "95% conf. int. top", "coef"]
# Set Intercept and a to 0, as otherwise the rest of the plot vanishes.
conf_int_DailyFecundity["coef"].Intercept = 0
conf_int_DailyFecundity["95% conf. int. bottom"].Intercept = 0
conf_int_DailyFecundity["95% conf. int. top"].Intercept = 0
conf_int_DailyFecundity["coef"].a = 0
conf_int_DailyFecundity["95% conf. int. bottom"].a = 0
conf_int_DailyFecundity["95% conf. int. top"].a = 0
conf_int_DailyFecundity.plot.bar(figsize=(20,10))
Out[7]:
In [8]:
data_Development = pd.read_csv("DevelopmentData_processed.csv")
lm_Development = smf.ols(formula="Development ~ a + a1 + a2 + a3 + a4 + a5 +"
"b12 + b13 + b14 + b15 + b23 + b24 + b25 + b34 + b35 + b45 +"
"c123 + c124 + c125 + c134 + c135 + c145 + c234 + c235 + c245 + c345 +"
"d1234 + d1235 + d1245 + d1345 + d2345 + e12345", data=data_Development).fit()
In [9]:
lm_Development.summary()
Out[9]:
In [10]:
conf_int_Development = pd.DataFrame(lm_Development.conf_int())
conf_int_Development[2] = [lm_Development.params.a,
lm_Development.params.a,
lm_Development.params.a1,
lm_Development.params.a2,
lm_Development.params.a3,
lm_Development.params.a4,
lm_Development.params.a5,
lm_Development.params.b12,
lm_Development.params.b13,
lm_Development.params.b14,
lm_Development.params.b15,
lm_Development.params.b23,
lm_Development.params.b24,
lm_Development.params.b25,
lm_Development.params.b34,
lm_Development.params.b35,
lm_Development.params.b45,
lm_Development.params.c123,
lm_Development.params.c124,
lm_Development.params.c125,
lm_Development.params.c134,
lm_Development.params.c135,
lm_Development.params.c145,
lm_Development.params.c234,
lm_Development.params.c235,
lm_Development.params.c245,
lm_Development.params.c345,
lm_Development.params.d1234,
lm_Development.params.d1235,
lm_Development.params.d1245,
lm_Development.params.d1345,
lm_Development.params.d2345,
lm_Development.params.e12345]
conf_int_Development.columns = ["95% conf. int. bottom", "95% conf. int. top", "coef"]
# Comment all the following lines out to plot the Intercept and a.
conf_int_Development["coef"].Intercept = 0
conf_int_Development["95% conf. int. bottom"].Intercept = 0
conf_int_Development["95% conf. int. top"].Intercept = 0
conf_int_Development["coef"].a = 0
conf_int_Development["95% conf. int. bottom"].a = 0
conf_int_Development["95% conf. int. top"].a = 0
conf_int_Development.plot.bar(figsize=(20,10))
Out[10]:
In [11]:
data_Survival = pd.read_csv("SurvivalData_processed.csv")
lm_Survival = smf.ols(formula="Survival ~ a + a1 + a2 + a3 + a4 + a5 +"
"b12 + b13 + b14 + b15 + b23 + b24 + b25 + b34 + b35 + b45 +"
"c123 + c124 + c125 + c134 + c135 + c145 + c234 + c235 + c245 + c345 +"
"d1234 + d1235 + d1245 + d1345 + d2345 + e12345", data=data_Survival).fit()
In [12]:
lm_Survival.summary()
Out[12]:
In [13]:
conf_int_Survival = pd.DataFrame(lm_Survival.conf_int())
conf_int_Survival[2] = [lm_Survival.params.a,
lm_Survival.params.a,
lm_Survival.params.a1,
lm_Survival.params.a2,
lm_Survival.params.a3,
lm_Survival.params.a4,
lm_Survival.params.a5,
lm_Survival.params.b12,
lm_Survival.params.b13,
lm_Survival.params.b14,
lm_Survival.params.b15,
lm_Survival.params.b23,
lm_Survival.params.b24,
lm_Survival.params.b25,
lm_Survival.params.b34,
lm_Survival.params.b35,
lm_Survival.params.b45,
lm_Survival.params.c123,
lm_Survival.params.c124,
lm_Survival.params.c125,
lm_Survival.params.c134,
lm_Survival.params.c135,
lm_Survival.params.c145,
lm_Survival.params.c234,
lm_Survival.params.c235,
lm_Survival.params.c245,
lm_Survival.params.c345,
lm_Survival.params.d1234,
lm_Survival.params.d1235,
lm_Survival.params.d1245,
lm_Survival.params.d1345,
lm_Survival.params.d2345,
lm_Survival.params.e12345]
conf_int_Survival.columns = ["95% conf. int. bottom", "95% conf. int. top", "coef"]
# Comment all the following lines out to plot the Intercept and a.
conf_int_Survival["coef"].Intercept = 0
conf_int_Survival["95% conf. int. bottom"].Intercept = 0
conf_int_Survival["95% conf. int. top"].Intercept = 0
conf_int_Survival["coef"].a = 0
conf_int_Survival["95% conf. int. bottom"].a = 0
conf_int_Survival["95% conf. int. top"].a = 0
conf_int_Survival.plot.bar(figsize=(20,10))
Out[13]: