To investigate the growth rates of Venturia inaequalis isolates \gls(PDA) plates were inoculated with 10 $\mu$L of blended mycellia. The plates were scanned most days over a 29 day period. I found a number of statistics from the scanned images using ImageJ including: perimeter, diameter, and area. Here I process the output from ImageJ.

Required software

Analysis was performed with Python 3.4 in a Jupyter notebook.

The following packages were used in this analysis:


In [1]:
with open("requirements.txt", "r") as handle:
    print(handle.read())


decorator==4.0.2
ipykernel==4.0.3
ipython==4.0.0
ipython-genutils==0.1.0
ipywidgets==4.0.2
Jinja2==2.7.3
jsonschema==2.5.1
jupyter==1.0.0
jupyter-client==4.0.0
jupyter-console==4.0.2
jupyter-core==4.0.4
jupyterhub==0.1.0
MarkupSafe==0.23
matplotlib==1.4.3
mistune==0.7.1
nbconvert==4.0.0
nbformat==4.0.0
nose==1.3.7
notebook==4.0.4
numpy==1.9.2
pandas==0.16.2
path.py==8.1
pexpect==3.3
pickleshare==0.5
ptyprocess==0.5
Pygments==2.0.2
pyparsing==2.0.3
python-dateutil==2.4.2
pytz==2015.4
pyzmq==14.7.0
qtconsole==4.0.1
requests==2.7.0
scipy==0.15.1
simplegeneric==0.8.1
simplepam==0.1.5
six==1.9.0
SQLAlchemy==1.0.6
sympy==0.7.6
terminado==0.5
tornado==4.2
traitlets==4.0.0

To run this notebook for yourself create a new virtual environment in the prompt:

$ virtualenv env
$ source env/bin/activate
$ pip install numpy
$ pip install -r requirements

Then to run the notebook itself:

$ jupyter notebook

In [2]:
from collections import defaultdict
import os
from os.path import join as pjoin
import subprocess
import re
import datetime
import math

import pytz
import numpy as np
import pandas as pd
from scipy import stats
import scipy as sp
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.dates import YearLocator, MonthLocator, DayLocator, DateFormatter, drange
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.multicomp import MultiComparison
months = MonthLocator()  # every month
days = DayLocator()


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-2-87ee0532187d> in <module>()
     16 from matplotlib.colors import LinearSegmentedColormap
     17 from matplotlib.dates import YearLocator, MonthLocator, DayLocator, DateFormatter, drange
---> 18 from statsmodels.formula.api import ols
     19 from statsmodels.stats.anova import anova_lm
     20 from statsmodels.stats.multicomp import MultiComparison

ImportError: No module named 'statsmodels'

In [ ]:
%matplotlib inline

In [3]:
STEPS = {
    'data': 'data',
    'calc_areas': '01-areas_calculated',
    'images': '02-in_vitro_images',
    'processed_areas': '03-processed_areas_dataset',
    'plotting': '04-generated_plot',
    'found_diff': '05-test_differences',
    }

for step in STEPS.values():
    if not os.path.isdir(step):
        os.mkdir(step)

In [4]:
pd.set_option("display.max_colwidth", 800)
pd.options.display.max_rows = 999
THREADS = 2  # Number of CPUs available for calls to BLAST etc.


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-8ffdb5352339> in <module>()
----> 1 pd.set_option("display.max_colwidth", 800)
      2 pd.options.display.max_rows = 999
      3 THREADS = 2  # Number of CPUs available for calls to BLAST etc.

NameError: name 'pd' is not defined

In [5]:
cdict = {
    'red':   ((0., 59/256, 59/256),
              (0.25, 120/256, 120/256),
              (0.5, 235/256, 235/236),
              (0.75, 225/256, 225/256),
              (1., 242/256, 242/256)),
    'green': ((0., 156/256, 156/256),
              (0.25, 183/256, 183/256),
              (0.5, 204/256, 204/256),
              (0.75, 175/256, 175/256),
              (1., 26/256, 26/256)),
    'blue':  ((0., 178/256, 178/256),
              (0.25, 197/256, 197/256),
              (0.5, 42/256, 42/256),
              (0.75, 0., 0.),
              (1., 0., 0.))
    }

zissou = LinearSegmentedColormap('Zissou', cdict)
plt.register_cmap(cmap=zissou)

cat_colours = ["#FF0000", "#00A08A", "#F2AD00", "#F98400", "#5BBCD6", "#046C9A", '#35274A', '#B40F20', "#D69C4E"]


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-1ae6bfd351f9> in <module>()
     17     }
     18 
---> 19 zissou = LinearSegmentedColormap('Zissou', cdict)
     20 plt.register_cmap(cmap=zissou)
     21 

NameError: name 'LinearSegmentedColormap' is not defined

In [6]:
mpl.rcParams['text.usetex']=True
mpl.rcParams['text.latex.preamble'] = [
       r'\usepackage{siunitx}',   # i need upright \micro symbols, but you need...
       r'\usepackage{inconsolata}',
       r'\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts
       r'\usepackage{helvet}',    # set the normal font here
       r'\usepackage{sansmath}',  # load up the sansmath so that math -> helvet
       r'\sansmath'               # <- tricky! -- gotta actually tell tex to use!
]


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-7476ea2f9c7e> in <module>()
----> 1 mpl.rcParams['text.usetex']=True
      2 mpl.rcParams['text.latex.preamble'] = [
      3        r'\usepackage{siunitx}',   # i need upright \micro symbols, but you need...
      4        r'\usepackage{inconsolata}',
      5        r'\sisetup{detect-all}',   # ...this to force siunitx to actually use your fonts

NameError: name 'mpl' is not defined

Step - processed_areas

We have the problem that because some of the samples didn't have any growth at the start, they aren't there. I need to add those points in manually with the measurements of 0 because they aren't just missing data.

I want to get the sampling time loaded in as a column. I could use the label but since we have the creation time in the files metadata, i'll use that so that we have the time of day too.

And I want the Venturia isolate in there as a category.


In [8]:
#  Additional notes

contaminated = {
    '2015-08-05-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-06-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-07-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-09-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-09-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-09-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-10-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-10-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-10-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-11-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-11-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-11-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-12-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-12-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-12-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-13-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-13-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-13-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-13-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-14-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-14-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-14-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-14-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-17-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-17-20150731DJ-18-B.jpg': 'Bacterial',
    '2015-08-17-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-17-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-17-20150731DJ-51-B.jpg': 'Bacterial',
    '2015-08-17-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-19-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-19-20150731DJ-18-B.jpg': 'Bacterial',
    '2015-08-19-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-19-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-19-20150731DJ-51-B.jpg': 'Bacterial',
    '2015-08-19-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-20-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-20-20150731DJ-18-B.jpg': 'Bacterial',
    '2015-08-20-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-20-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-20-20150731DJ-51-B.jpg': 'Bacterial',
    '2015-08-20-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-21-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-21-20150731DJ-10-B.jpg': 'Bacterial',
    '2015-08-21-20150731DJ-18-B.jpg': 'Bacterial',
    '2015-08-21-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-21-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-21-20150731DJ-51-B.jpg': 'Bacterial',
    '2015-08-21-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-26-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-26-20150731DJ-10-B.jpg': 'Bacterial',
    '2015-08-26-20150731DJ-18-B.jpg': 'Bacterial',
    '2015-08-26-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-26-20150731DJ-35-B.jpg': 'Bacterial',
    '2015-08-26-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-26-20150731DJ-51-B.jpg': 'Bacterial',
    '2015-08-26-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-27-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-27-20150731DJ-10-B.jpg': 'Bacterial',
    '2015-08-27-20150731DJ-18-B.jpg': 'Bacterial',
    '2015-08-27-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-27-20150731DJ-35-B.jpg': 'Bacterial',
    '2015-08-27-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-27-20150731DJ-51-B.jpg': 'Bacterial',
    '2015-08-27-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-28-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-28-20150731DJ-10-B.jpg': 'Bacterial',
    '2015-08-28-20150731DJ-18-B.jpg': 'Bacterial',
    '2015-08-28-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-28-20150731DJ-35-B.jpg': 'Bacterial',
    '2015-08-28-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-28-20150731DJ-51-B.jpg': 'Bacterial',
    '2015-08-28-20150731DJ-53-B.jpg': 'Fungal',
    '2015-08-29-20150731DJ-2-B.jpg': 'Bacterial',
    '2015-08-29-20150731DJ-10-B.jpg': 'Bacterial',
    '2015-08-29-20150731DJ-18-B.jpg': 'Bacterial',
    '2015-08-29-20150731DJ-26-B.jpg': 'Bacterial',
    '2015-08-29-20150731DJ-35-B.jpg': 'Bacterial',
    '2015-08-29-20150731DJ-36-B.jpg': 'Fungal',
    '2015-08-29-20150731DJ-51-B.jpg': 'Bacterial',
    '2015-08-29-20150731DJ-53-B.jpg': 'Fungal',
    }
colour = {}
notes = {
    '2015-08-05-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-06-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-07-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-09-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-09-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-09-20150731DJ-53-B.jpg': 'Early fungal contamination.',
    '2015-08-10-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-10-20150731DJ-11-B.jpg': 'Beginning of breakout growth.',
    '2015-08-10-20150731DJ-12-B.jpg': 'Beginning of breakout growth, from two sides.',
    '2015-08-10-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-10-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-11-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-11-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-11-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-12-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-12-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-12-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-13-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-13-20150731DJ-26-B.jpg': 'Bacterial contamination near Venturia.',
    '2015-08-13-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-13-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-14-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-14-20150731DJ-26-B.jpg': 'Bacterial contamination near Venturia.',
    '2015-08-14-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-14-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-17-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-17-20150731DJ-7-B.jpg': 'Beginning of white growth from main growth.',
    '2015-08-17-20150731DJ-10-B.jpg': 'Beginning of white growth from main growth.',
    '2015-08-17-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-17-20150731DJ-22-B.jpg': 'Beginning of white growth from main growth.',
    '2015-08-17-20150731DJ-26-B.jpg': 'Bacterial contamination near Venturia.',
    '2015-08-17-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-17-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-17-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-19-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-19-20150731DJ-7-B.jpg': 'White growth from main growth.',
    '2015-08-19-20150731DJ-10-B.jpg': 'White growth from main growth.',
    '2015-08-19-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-19-20150731DJ-22-B.jpg': 'White growth from main growth.',
    '2015-08-19-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
    '2015-08-19-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-19-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-19-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-20-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-20-20150731DJ-7-B.jpg': 'White growth from main growth.',
    '2015-08-20-20150731DJ-10-B.jpg': 'White growth from main growth.',
    '2015-08-20-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-20-20150731DJ-22-B.jpg': 'White growth from main growth.',
    '2015-08-20-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
    '2015-08-20-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-20-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-20-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-21-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-21-20150731DJ-7-B.jpg': 'White growth from main growth.',
    '2015-08-21-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-21-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-21-20150731DJ-22-B.jpg': 'White growth from main growth.',
    '2015-08-21-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
    '2015-08-21-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-21-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-21-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-26-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-26-20150731DJ-7-B.jpg': 'White growth from main growth.',
    '2015-08-26-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-26-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-26-20150731DJ-22-B.jpg': 'White growth from main growth.',
    '2015-08-26-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
    '2015-08-26-20150731DJ-35-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-26-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-26-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-26-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-27-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-27-20150731DJ-7-B.jpg': 'White growth from main growth.',
    '2015-08-27-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-27-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-27-20150731DJ-22-B.jpg': 'White growth from main growth.',
    '2015-08-27-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
    '2015-08-27-20150731DJ-35-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-27-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-27-20150731DJ-51-B.jpg': 'Bacterial growth, close to Venturia.',
    '2015-08-27-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-28-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-28-20150731DJ-7-B.jpg': 'White growth from main growth.',
    '2015-08-28-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-28-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-28-20150731DJ-22-B.jpg': 'White growth from main growth.',
    '2015-08-28-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
    '2015-08-28-20150731DJ-35-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-28-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-28-20150731DJ-51-B.jpg': 'Bacterial growth, close to Venturia.',
    '2015-08-28-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-29-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
    '2015-08-29-20150731DJ-7-B.jpg': 'White growth from main growth.',
    '2015-08-29-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-29-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-29-20150731DJ-22-B.jpg': 'White growth from main growth.',
    '2015-08-29-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
    '2015-08-29-20150731DJ-35-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
    '2015-08-29-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
    '2015-08-29-20150731DJ-51-B.jpg': 'Bacterial growth, close to Venturia.',
    '2015-08-29-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
    }

In [9]:
areas = pd.read_csv(
    pjoin(STEPS['calc_areas'], 'ImageJ_measurements_of_masked_images.csv'),
    header=False,
    usecols=[
        'Label', 'Area', 'Perim.',
        'Feret', 'FeretX', 'FeretY',
        'FeretAngle', 'MinFeret'
        ]
    )

" Add 0.0 for missing data. "
temp_df = defaultdict(list)
for file_ in os.listdir(STEPS['calc_areas']):
    if os.path.splitext(file_)[1] != '.jpg' or \
            (file_.split('-')[2] in {'02', '29'} and \
             not file_.endswith('.resampled.jpg')):
        continue
    label = os.path.splitext(file_)[0] + '.tif'
    if label not in set(areas["Label"]):
        for key in areas.keys():
            if key == 'Label':
                temp_df['Label'].append(label)
            else:
                temp_df[key].append(0.0)
areas = pd.concat([areas, pd.DataFrame(temp_df)], ignore_index=True)

""" Create new dataframe with sum of all parameters where multiple
entries exist for one sample at a given time. I.E. There are multiple
particles. """
new_rows = list()
i = 0
for label, tab in areas.groupby('Label'):
    row = dict()
    for key in tab.keys():
        if key in {"Isolate", "Sample", "Label"}:
            row[key] = tab[key].values[0]
        else:
            row[key] = tab[key].sum()
            row['Count'] = len(tab[key])
    new_rows.append(row)
areas = pd.DataFrame(new_rows)

" Find the sample number and add it as a new column. "
sample_numbers = list()
for i, row in areas.iterrows():
    # Yes, the following line is risky but the files should all be the same
    number = row['Label'].split('-')[4]
    sample_numbers.append(int(number))
areas.insert(0, 'Sample', pd.Series(sample_numbers, dtype="category"))

" Find the isolate of the sample based on the sample numbers. "
sample_to_isolate = list()
isolates = [
    'I37A', 'MNH120', 'I17V',
    'USR5', 'I9A', 'B04',
    'I61A', 'I65V', 'I19A'
    ]
for isolate in isolates:
    sample_to_isolate.extend([isolate] * 6)
isolates = list()
for i, row in areas.iterrows():
    number = row['Sample']
    isolates.append(sample_to_isolate[number - 1])
areas.insert(0, 'Isolate', pd.Series(isolates, dtype="category"))

# 3 started out with larger inoculum area

notes_col = list()
for i, row in areas.iterrows():
    label = row['Label'].strip('.jpg.resampled.tif').strip('.tif') + '.jpg'
    if label in notes:
        notes_col.append(notes[label])
    else:
        notes_col.append(None)
contamination_col = list()
for i, row in areas.iterrows():
    label = row['Label'].strip('.jpg.resampled.tif').strip('.tif') + '.jpg'
    if label in contaminated:
        contamination_col.append(contaminated[label])
    else:
        contamination_col.append(None)
colour_col = list()
for i, row in areas.iterrows():
    label = row['Label'].strip('.jpg.resampled.tif').strip('.tif') + '.jpg'
    if label in colour:
        colour_col.append(colour[label])
    else:
        colour_col.append(None)

areas.insert(
    len(areas.columns),
    'Colour',
    pd.Series(colour_col, dtype="category")
    )
areas.insert(
    len(areas.columns),
    'Contamination',
    pd.Series(contamination_col, dtype="category")
    )
areas.insert(
    len(areas.columns),
    'Notes',
    pd.Series(notes_col)
    )

" Find the scan time of the sample from the image timestamp. "
times = list()
for i, row in areas.iterrows():
    filename = os.path.splitext(row["Label"])[0].strip(".jpg.resampled")
    time = os.stat(pjoin(STEPS['calc_areas'], filename + '.jpg'))
    times.append(datetime.datetime.fromtimestamp(time.st_mtime))
areas.insert(0, 'Time', pd.DatetimeIndex(times))
areas = areas.set_index('Time')  # Set time as index

areas.to_csv(pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv'))

In [10]:
#for day, tab in areas[areas['Area'] == 0.].groupby([pd.TimeGrouper('D')]):
#    print("\n".join(tab.sort(['Sample'])['Label'].values))

Step - generated plot


In [11]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

fig, ax = plt.subplots()

i = 0
for iso, group in areas.groupby('Isolate'):
    ax.plot_date(
        x=group.index.to_pydatetime(),
        y=group['Area'].values,
        label=iso,
        color=cat_colours[i]
        )
    i += 1

plt.legend(loc=2)

ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator(interval=1))
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=90)

plt.show()



In [12]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

y_col = 'Area'

fig, ax = plt.subplots(figsize=(6, 10))

i = 0
for iso, group in areas.groupby('Isolate'):
    mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
    se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
    ax.fill_between(
        x=se.index.to_pydatetime(),
        y1= mean.values - se.values,
        y2= mean.values + se.values,
        color=cat_colours[i],
        alpha=0.4,
        lw=0.
        )
    ax.plot_date(
        x=mean.index.to_pydatetime(),
        y=mean.values,
        linestyle='-',
        label=iso,
        markersize=0.,
        color=cat_colours[i]
        )
    i += 1

ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left')
plt.show()



In [13]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

y_col = 'AreaPerim'
area_perim = (areas['Area'] / areas['Perim.'])
area_perim.fillna(0.)
areas.insert(3, 'AreaPerim', area_perim)

fig, ax = plt.subplots()

i = 0
for iso, group in areas.groupby('Isolate'):
    mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
    se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
    ax.fill_between(
        x=se.index.to_pydatetime(),
        y1=mean.values - se.values,
        y2=mean.values + se.values,
        color=cat_colours[i],
        alpha=0.4,
        lw=0.
        )
    ax.plot_date(
        x=mean.index.to_pydatetime(),
        y=mean.values,
        linestyle='-',
        label=iso,
        markersize=0.,
        color=cat_colours[i]
        )
    i += 1

ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left')
plt.show()



In [14]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

y_col = 'Feret'

fig, ax = plt.subplots()

i = 0
for iso, group in areas.groupby('Isolate'):
    mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
    se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
    ax.fill_between(
        x=se.index.to_pydatetime(),
        y1=mean.values - se.values,
        y2=mean.values + se.values,
        color=cat_colours[i],
        alpha=0.4,
        lw=0.
        )
    ax.plot_date(
        x=mean.index.to_pydatetime(),
        y=mean.values,
        linestyle='-',
        label=iso,
        markersize=0.,
        color=cat_colours[i]
        )
    i += 1

ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.show()



In [15]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

y_col = 'DiameterDifference'
diameter_difference = (areas['Feret'] - areas['MinFeret'])
areas.insert(3, 'DiameterDifference', diameter_difference)

fig, ax = plt.subplots()

i = 0
for iso, group in areas.groupby('Isolate'):
    mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
    se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
    ax.fill_between(
        x=se.index.to_pydatetime(),
        y1=mean.values - se.values,
        y2=mean.values + se.values,
        color=cat_colours[i],
        alpha=0.4,
        lw=0.
        )
    ax.plot_date(
        x=mean.index.to_pydatetime(),
        y=mean.values,
        linestyle='-',
        label=iso,
        markersize=0.,
        color=cat_colours[i]
        )
    i += 1

ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.show()



In [16]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

y_col = 'Perim.'

fig, ax = plt.subplots()

i = 0
for iso, group in areas.groupby('Isolate'):
    mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
    se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
    ax.fill_between(
        x=se.index.to_pydatetime(),
        y1=mean.values - se.values,
        y2=mean.values + se.values,
        color=cat_colours[i],
        alpha=0.4,
        lw=0.
        )
    ax.plot_date(
        x=mean.index.to_pydatetime(),
        y=mean.values,
        linestyle='-',
        label=iso,
        markersize=0.,
        color=cat_colours[i]
        )
    i += 1

ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.show()


Step - found_diff


In [17]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
areas.sort(inplace=True)

new_areas = list()
for sample, sample_tab in areas.groupby(['Sample']):
    for i in range(0, len(sample_tab)):
        row = dict()
        for key in sample_tab.keys():
            row['Elapsed'] = (sample_tab.index[i] - sample_tab.index[0])
            row['Time'] = sample_tab.index[i]
            if key in {
                    'Sample', 'Isolate', 'Label',
                    'Colour', 'Notes', 'Contamination'
                    }:
                row[key] = sample_tab.ix[i, key]
            else:
                row[key] = sample_tab.ix[i, key] - sample_tab.ix[0, key]
        new_areas.append(row)

areas = pd.DataFrame(new_areas)
areas = areas.set_index('Time')
areas['Elapsed'] = areas['Elapsed'].astype('timedelta64[s]')

diameter_difference = (areas['Feret'] - areas['MinFeret'])
areas.insert(3, 'DiameterDifference', diameter_difference)

In [18]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

anova_tests = list()
for day, tab in areas.groupby(pd.TimeGrouper('D')):
    if len(tab) == 0:
        continue
    lm = ols('Area ~ C(Isolate)', tab).fit()
    a_lm = anova_lm(lm)
    axes = a_lm.axes
    matrix = a_lm.as_matrix()
    residual_row = {'Day': day, 'Residual': 1, 'Isolate': 0}
    for col, val in zip(axes[1], matrix[1]):
        residual_row[col] = val
    isolate_row = {'Day': day, 'Residual': 0, 'Isolate': 1}
    for col, val in zip(axes[1], matrix[0]):
        isolate_row[col] = val
    anova_tests.append(residual_row)
    anova_tests.append(isolate_row)
anova_tests = pd.DataFrame(anova_tests)
filename = pjoin(STEPS['found_diff'], 'anova_tests.csv')
anova_tests.to_csv(filename, sep='\t')
#anova_tests

In [19]:
def tukeys_group(table):
    cmps = defaultdict(dict)
    groups = list()
    for i, cmp in table.iterrows():
        cmps[cmp['group1']][cmp['group2']] = cmp['reject']
        cmps[cmp['group2']][cmp['group1']] = cmp['reject']

    for iso1, d in cmps.items():
        if len(groups) == 0:
            groups.append([iso1])
        for iso2, reject in d.items():
            added = False
            for group in groups:
                any_reject = False
                for giso in group:
                    if giso == iso2:
                        continue
                    elif cmps[iso2][giso]:
                        any_reject = True
                if not any_reject:
                    if iso2 not in group:
                        group.append(iso2)
                    added = True
            if not added:
                groups.append([iso2])
    return groups

In [20]:
filename = pjoin(
    STEPS['processed_areas'],
    'Processed_areas_table.csv'
    )
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

tukeys_tests = list()
first = True
for day, tab in areas.groupby(pd.TimeGrouper('D')):
    if len(tab) == 0:
        continue
    mod = MultiComparison(tab['Area'], tab['Isolate'])
    hsd = mod.tukeyhsd().summary()
    if first:
        col_names = ['time'] + hsd.data[0]
    hsd = hsd.data[1:]
    for h in hsd:
        h.insert(0, day.to_pydatetime())
    tukeys_tests.extend(hsd)
    first = False

tukeys_tests = pd.DataFrame(tukeys_tests, columns=col_names)
tukeys_tests.loc[:,'time'] = pd.DatetimeIndex(
    tukeys_tests['time'].values
    )
tukeys_tests = tukeys_tests.set_index('time')  # Set time as index
filename = pjoin(STEPS['found_diff'], 'tukeys_tests.csv')
tukeys_tests.to_csv(filename, sep='\t')

In [21]:
def f_theta(se):
    return se / np.sqrt(se**2 + 1) + (1 / se)/(1 + se**(-2))

def z_beta(a, se, df):
    return sp.stats.t._ppf(a/2, df) / f_theta(se)

def adj_ci(se, n, cmp, alpha=0.05):
    h = z_beta(alpha/cmp, se, n-1) * se
    return h

def nCr(n,r):
    f = math.factorial
    return f(n) / (f(r) * f(n-r))

class myFormatter(mpl.ticker.Formatter):
    def __init__(self, start, fmt='{days}', **kwargs):
        super().__init__(**kwargs)
        self.start = datetime.datetime.fromtimestamp(start)
        self.start = self.start.replace(tzinfo=pytz.utc)
        self.fmt = fmt
    def __call__(self, x, pos=None):
        x = mpl.dates.num2date(x)
        delta_x = x - self.start
        elements = {
            'days': delta_x.days,
            'seconds': delta_x.seconds,
            }
        return self.fmt.format(**elements)

In [22]:
filename = pjoin(
    STEPS['processed_areas'],
    'Processed_areas_table.csv'
    )
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']

y_col = 'Area'

fig, ax = plt.subplots(figsize=(8, 6))
confidence = 0.95
i = 0

labels = list()

for iso, group in areas.groupby('Isolate'):
    mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
    se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
    n = group.groupby([pd.TimeGrouper('D')]).count()[y_col]
    interval = se * sp.stats.t.ppf((1 - confidence)/2, n - 1)
    timestamps = mean.index
    ax.fill_between(
        x=se.index.to_pydatetime(),
        y1= mean.values - interval,
        y2= mean.values + interval,
        color=cat_colours[i],
        alpha=0.4,
        lw=0.
        )
    ax.plot_date(
        x=mean.index.to_pydatetime(),
        y=mean.values,
        linestyle='-',
        label=iso,
        #markersize=0.,
        color=cat_colours[i]
        )
    labels.append((mean.values[-1], iso, cat_colours[i]))
    i += 1

ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(myFormatter(
        start=mean.index.to_pydatetime()[0].timestamp()
        ))

ax.set_xlim(
    mpl.dates.date2num(timestamps[5]),
    mpl.dates.date2num(timestamps[-1])
    )
ax.set_ylim(0, 1000)

y_pos, labels, colour = zip(*labels)
ax2 = ax.twinx()
ax2.set_ylim(*ax.get_ylim())
ax2.set_yticks(y_pos)
ax2.set_yticklabels(labels)
ax2.yaxis.set_tick_params(direction='out')
ax2.grid(False)

ax.set_xlabel('Days post inoculation')
ax.set_ylabel('Area (mm$^2$)')

filename = pjoin(STEPS['found_diff'], 'area_time')
fig.savefig(filename + '.svg')



In [32]:
filename = pjoin(
    STEPS['processed_areas'],
    'Processed_areas_table.csv'
    )
areas = pd.read_csv(filename, index_col=0, parse_dates=True)

date = '2015-08-29'
groups = tukeys_group(tukeys_tests[tukeys_tests.index == date])
areas = areas[areas['Contamination'] != 'Bacterial']
areas = areas[date]

confidence = 0.95

mean = areas.groupby('Isolate').mean()['Area']
se = areas.groupby('Isolate').sem()['Area']
n = areas.groupby('Isolate').count()['Area']
print(n)
isolates = mean.index

interval = se * z_beta((1-confidence), se, n - 1)
interval2 = se * sp.stats.t.ppf((1 - confidence)/2., n - 1)

fig,(ax,ax2) = plt.subplots(
    2, 1,
    sharex=True,
    gridspec_kw={
        'height_ratios': [33, 1],
        'hspace': 0.05
        }
    )

order = {
    'I17V': 4,
    'I19A': 5,
    'I37A': 0,
    'I61A': 8,
    'I65V': 7,
    'I9A': 3,
    'B04': 6,
    'MNH120': 2,
    'USR5': 1,
    }
index = np.array([order[i] for i in isolates])

error = ax.errorbar(
    index + 0.5,
    mean,
    yerr=interval2,
    fmt='o',
    capsize=0,
    ecolor=cat_colours[1],
    markeredgecolor='k',
    markerfacecolor='k',
    markersize=6,
    elinewidth=2,
    zorder=3,
    )

error = ax.errorbar(
    index + 0.5,
    mean,
    yerr=se,
    fmt='o',
    capsize=0,
    ecolor=cat_colours[1],
    markeredgecolor='k',
    markerfacecolor='k',
    markersize=6,
    elinewidth=4,
    zorder=3,
    )

## Draw Broken y-axis
d = .01
offset = 0 #0.001
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot(
    (-d - offset, +d - offset),
    (0, 0),
    **kwargs
    )  # top-left diagonal
ax.plot(
    (1- d - offset, 1 + d - offset),
    (0, 0),
    **kwargs
    ) # top-right diagonal
kwargs.update(transform=ax2.transAxes)
ax2.plot(
    (-d - offset, +d - offset),
    (1, 1),
    **kwargs
    ) # bottom-left diagonal
ax2.plot(
    (1 - d - offset, 1 + d - offset),
    (1, 1),
    **kwargs
    ) # bottom-right diagonal

# Add tukeys codes
iso_groups = defaultdict(list)
groups.sort(key=lambda x: len(x), reverse=True)
codes = list("abcdefghijk")[:len(groups)][::-1]
for code, group in zip(codes, groups):
    for gmem in group:
        iso_groups[gmem].append(code)
max_ngroups = 0
for iso, group in iso_groups.items():
    if len(group) > max_ngroups:
        max_ngroups = len(group)
codes_transform = mpl.transforms.blended_transform_factory(
    ax.transData,
    ax.transAxes,
    )

base_ypos = 1.015
y_text_offset = 0.025
if max_ngroups == 1:
    ypos = [base_ypos] * len(codes)
else:
    ypos = list()
    for code in codes:
        ypos.append(base_ypos)
        base_ypos += y_text_offset

ypos = dict(zip(codes, ypos))
for isolate, group in iso_groups.items():
    x = order[isolate] + 0.5
    for code in group:
        y = ypos[code]
        ax.text(x, y, code, 
                transform=codes_transform, 
                horizontalalignment='center'
                )

ax.set_xticks(index + 0.5)
ax.set_xticklabels(isolates)

ax.set_ylim(175, 1000)
ax2.set_ylim(0, 25)
ax2.set_yticks([0,])

ax.spines['bottom'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax.xaxis.tick_top()
ax.tick_params(labeltop='off')
ax2.xaxis.tick_bottom()

isolate_labels = ax2.xaxis.get_ticklabels()
plt.setp(isolate_labels, rotation=80)
ax.yaxis.grid(True, zorder=0)
ax.set_ylabel('Colony size (mm$^2$)')

filename = pjoin(STEPS['found_diff'], 'endpoint_intervals')

fig.set_size_inches(6, 6)
fig.savefig(filename + '.svg')
fig.savefig(filename + '.pdf')
fig.savefig(filename + '.png', dpi=600)


Isolate
B04       5
I17V      5
I19A      5
I37A      5
I61A      6
I65V      6
I9A       5
MNH120    5
USR5      6
Name: Area, dtype: int64