To investigate the growth rates of Venturia inaequalis isolates \gls(PDA) plates were inoculated with 10 $\mu$L of blended mycellia. The plates were scanned most days over a 29 day period. I found a number of statistics from the scanned images using ImageJ including: perimeter, diameter, and area. Here I process the output from ImageJ.
Analysis was performed with Python 3.4 in a Jupyter notebook.
The following packages were used in this analysis:
In [1]:
with open("requirements.txt", "r") as handle:
print(handle.read())
To run this notebook for yourself create a new virtual environment in the prompt:
$ virtualenv env
$ source env/bin/activate
$ pip install numpy
$ pip install -r requirements
Then to run the notebook itself:
$ jupyter notebook
In [2]:
from collections import defaultdict
import os
from os.path import join as pjoin
import subprocess
import re
import datetime
import math
import pytz
import numpy as np
import pandas as pd
from scipy import stats
import scipy as sp
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.dates import YearLocator, MonthLocator, DayLocator, DateFormatter, drange
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.multicomp import MultiComparison
months = MonthLocator() # every month
days = DayLocator()
In [ ]:
%matplotlib inline
In [3]:
STEPS = {
'data': 'data',
'calc_areas': '01-areas_calculated',
'images': '02-in_vitro_images',
'processed_areas': '03-processed_areas_dataset',
'plotting': '04-generated_plot',
'found_diff': '05-test_differences',
}
for step in STEPS.values():
if not os.path.isdir(step):
os.mkdir(step)
In [4]:
pd.set_option("display.max_colwidth", 800)
pd.options.display.max_rows = 999
THREADS = 2 # Number of CPUs available for calls to BLAST etc.
In [5]:
cdict = {
'red': ((0., 59/256, 59/256),
(0.25, 120/256, 120/256),
(0.5, 235/256, 235/236),
(0.75, 225/256, 225/256),
(1., 242/256, 242/256)),
'green': ((0., 156/256, 156/256),
(0.25, 183/256, 183/256),
(0.5, 204/256, 204/256),
(0.75, 175/256, 175/256),
(1., 26/256, 26/256)),
'blue': ((0., 178/256, 178/256),
(0.25, 197/256, 197/256),
(0.5, 42/256, 42/256),
(0.75, 0., 0.),
(1., 0., 0.))
}
zissou = LinearSegmentedColormap('Zissou', cdict)
plt.register_cmap(cmap=zissou)
cat_colours = ["#FF0000", "#00A08A", "#F2AD00", "#F98400", "#5BBCD6", "#046C9A", '#35274A', '#B40F20', "#D69C4E"]
In [6]:
mpl.rcParams['text.usetex']=True
mpl.rcParams['text.latex.preamble'] = [
r'\usepackage{siunitx}', # i need upright \micro symbols, but you need...
r'\usepackage{inconsolata}',
r'\sisetup{detect-all}', # ...this to force siunitx to actually use your fonts
r'\usepackage{helvet}', # set the normal font here
r'\usepackage{sansmath}', # load up the sansmath so that math -> helvet
r'\sansmath' # <- tricky! -- gotta actually tell tex to use!
]
processed_areas
We have the problem that because some of the samples didn't have any growth at the start, they aren't there. I need to add those points in manually with the measurements of 0 because they aren't just missing data.
I want to get the sampling time loaded in as a column. I could use the label but since we have the creation time in the files metadata, i'll use that so that we have the time of day too.
And I want the Venturia isolate in there as a category.
In [8]:
# Additional notes
contaminated = {
'2015-08-05-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-06-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-07-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-09-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-09-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-09-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-10-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-10-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-10-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-11-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-11-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-11-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-12-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-12-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-12-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-13-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-13-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-13-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-13-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-14-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-14-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-14-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-14-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-17-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-17-20150731DJ-18-B.jpg': 'Bacterial',
'2015-08-17-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-17-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-17-20150731DJ-51-B.jpg': 'Bacterial',
'2015-08-17-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-19-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-19-20150731DJ-18-B.jpg': 'Bacterial',
'2015-08-19-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-19-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-19-20150731DJ-51-B.jpg': 'Bacterial',
'2015-08-19-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-20-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-20-20150731DJ-18-B.jpg': 'Bacterial',
'2015-08-20-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-20-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-20-20150731DJ-51-B.jpg': 'Bacterial',
'2015-08-20-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-21-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-21-20150731DJ-10-B.jpg': 'Bacterial',
'2015-08-21-20150731DJ-18-B.jpg': 'Bacterial',
'2015-08-21-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-21-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-21-20150731DJ-51-B.jpg': 'Bacterial',
'2015-08-21-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-26-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-26-20150731DJ-10-B.jpg': 'Bacterial',
'2015-08-26-20150731DJ-18-B.jpg': 'Bacterial',
'2015-08-26-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-26-20150731DJ-35-B.jpg': 'Bacterial',
'2015-08-26-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-26-20150731DJ-51-B.jpg': 'Bacterial',
'2015-08-26-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-27-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-27-20150731DJ-10-B.jpg': 'Bacterial',
'2015-08-27-20150731DJ-18-B.jpg': 'Bacterial',
'2015-08-27-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-27-20150731DJ-35-B.jpg': 'Bacterial',
'2015-08-27-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-27-20150731DJ-51-B.jpg': 'Bacterial',
'2015-08-27-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-28-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-28-20150731DJ-10-B.jpg': 'Bacterial',
'2015-08-28-20150731DJ-18-B.jpg': 'Bacterial',
'2015-08-28-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-28-20150731DJ-35-B.jpg': 'Bacterial',
'2015-08-28-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-28-20150731DJ-51-B.jpg': 'Bacterial',
'2015-08-28-20150731DJ-53-B.jpg': 'Fungal',
'2015-08-29-20150731DJ-2-B.jpg': 'Bacterial',
'2015-08-29-20150731DJ-10-B.jpg': 'Bacterial',
'2015-08-29-20150731DJ-18-B.jpg': 'Bacterial',
'2015-08-29-20150731DJ-26-B.jpg': 'Bacterial',
'2015-08-29-20150731DJ-35-B.jpg': 'Bacterial',
'2015-08-29-20150731DJ-36-B.jpg': 'Fungal',
'2015-08-29-20150731DJ-51-B.jpg': 'Bacterial',
'2015-08-29-20150731DJ-53-B.jpg': 'Fungal',
}
colour = {}
notes = {
'2015-08-05-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-06-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-07-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-09-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-09-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-09-20150731DJ-53-B.jpg': 'Early fungal contamination.',
'2015-08-10-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-10-20150731DJ-11-B.jpg': 'Beginning of breakout growth.',
'2015-08-10-20150731DJ-12-B.jpg': 'Beginning of breakout growth, from two sides.',
'2015-08-10-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-10-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-11-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-11-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-11-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-12-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-12-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-12-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-13-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-13-20150731DJ-26-B.jpg': 'Bacterial contamination near Venturia.',
'2015-08-13-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-13-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-14-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-14-20150731DJ-26-B.jpg': 'Bacterial contamination near Venturia.',
'2015-08-14-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-14-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-17-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-17-20150731DJ-7-B.jpg': 'Beginning of white growth from main growth.',
'2015-08-17-20150731DJ-10-B.jpg': 'Beginning of white growth from main growth.',
'2015-08-17-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-17-20150731DJ-22-B.jpg': 'Beginning of white growth from main growth.',
'2015-08-17-20150731DJ-26-B.jpg': 'Bacterial contamination near Venturia.',
'2015-08-17-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-17-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-17-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-19-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-19-20150731DJ-7-B.jpg': 'White growth from main growth.',
'2015-08-19-20150731DJ-10-B.jpg': 'White growth from main growth.',
'2015-08-19-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-19-20150731DJ-22-B.jpg': 'White growth from main growth.',
'2015-08-19-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
'2015-08-19-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-19-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-19-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-20-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-20-20150731DJ-7-B.jpg': 'White growth from main growth.',
'2015-08-20-20150731DJ-10-B.jpg': 'White growth from main growth.',
'2015-08-20-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-20-20150731DJ-22-B.jpg': 'White growth from main growth.',
'2015-08-20-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
'2015-08-20-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-20-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-20-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-21-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-21-20150731DJ-7-B.jpg': 'White growth from main growth.',
'2015-08-21-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-21-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-21-20150731DJ-22-B.jpg': 'White growth from main growth.',
'2015-08-21-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
'2015-08-21-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-21-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-21-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-26-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-26-20150731DJ-7-B.jpg': 'White growth from main growth.',
'2015-08-26-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-26-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-26-20150731DJ-22-B.jpg': 'White growth from main growth.',
'2015-08-26-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
'2015-08-26-20150731DJ-35-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-26-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-26-20150731DJ-51-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-26-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-27-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-27-20150731DJ-7-B.jpg': 'White growth from main growth.',
'2015-08-27-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-27-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-27-20150731DJ-22-B.jpg': 'White growth from main growth.',
'2015-08-27-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
'2015-08-27-20150731DJ-35-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-27-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-27-20150731DJ-51-B.jpg': 'Bacterial growth, close to Venturia.',
'2015-08-27-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-28-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-28-20150731DJ-7-B.jpg': 'White growth from main growth.',
'2015-08-28-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-28-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-28-20150731DJ-22-B.jpg': 'White growth from main growth.',
'2015-08-28-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
'2015-08-28-20150731DJ-35-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-28-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-28-20150731DJ-51-B.jpg': 'Bacterial growth, close to Venturia.',
'2015-08-28-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-29-20150731DJ-2-B.jpg': 'Bacterial contamination at plate edge.',
'2015-08-29-20150731DJ-7-B.jpg': 'White growth from main growth.',
'2015-08-29-20150731DJ-10-B.jpg': 'White growth from main growth. Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-29-20150731DJ-18-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-29-20150731DJ-22-B.jpg': 'White growth from main growth.',
'2015-08-29-20150731DJ-26-B.jpg': 'Bacterial contamination in contact with Venturia.',
'2015-08-29-20150731DJ-35-B.jpg': 'Bacterial growth ~ halfway between edge of growth and plate.',
'2015-08-29-20150731DJ-36-B.jpg': 'Fungal contamination, appears to be Venturia.',
'2015-08-29-20150731DJ-51-B.jpg': 'Bacterial growth, close to Venturia.',
'2015-08-29-20150731DJ-53-B.jpg': 'Fungal contamination, appears to be Venturia.',
}
In [9]:
areas = pd.read_csv(
pjoin(STEPS['calc_areas'], 'ImageJ_measurements_of_masked_images.csv'),
header=False,
usecols=[
'Label', 'Area', 'Perim.',
'Feret', 'FeretX', 'FeretY',
'FeretAngle', 'MinFeret'
]
)
" Add 0.0 for missing data. "
temp_df = defaultdict(list)
for file_ in os.listdir(STEPS['calc_areas']):
if os.path.splitext(file_)[1] != '.jpg' or \
(file_.split('-')[2] in {'02', '29'} and \
not file_.endswith('.resampled.jpg')):
continue
label = os.path.splitext(file_)[0] + '.tif'
if label not in set(areas["Label"]):
for key in areas.keys():
if key == 'Label':
temp_df['Label'].append(label)
else:
temp_df[key].append(0.0)
areas = pd.concat([areas, pd.DataFrame(temp_df)], ignore_index=True)
""" Create new dataframe with sum of all parameters where multiple
entries exist for one sample at a given time. I.E. There are multiple
particles. """
new_rows = list()
i = 0
for label, tab in areas.groupby('Label'):
row = dict()
for key in tab.keys():
if key in {"Isolate", "Sample", "Label"}:
row[key] = tab[key].values[0]
else:
row[key] = tab[key].sum()
row['Count'] = len(tab[key])
new_rows.append(row)
areas = pd.DataFrame(new_rows)
" Find the sample number and add it as a new column. "
sample_numbers = list()
for i, row in areas.iterrows():
# Yes, the following line is risky but the files should all be the same
number = row['Label'].split('-')[4]
sample_numbers.append(int(number))
areas.insert(0, 'Sample', pd.Series(sample_numbers, dtype="category"))
" Find the isolate of the sample based on the sample numbers. "
sample_to_isolate = list()
isolates = [
'I37A', 'MNH120', 'I17V',
'USR5', 'I9A', 'B04',
'I61A', 'I65V', 'I19A'
]
for isolate in isolates:
sample_to_isolate.extend([isolate] * 6)
isolates = list()
for i, row in areas.iterrows():
number = row['Sample']
isolates.append(sample_to_isolate[number - 1])
areas.insert(0, 'Isolate', pd.Series(isolates, dtype="category"))
# 3 started out with larger inoculum area
notes_col = list()
for i, row in areas.iterrows():
label = row['Label'].strip('.jpg.resampled.tif').strip('.tif') + '.jpg'
if label in notes:
notes_col.append(notes[label])
else:
notes_col.append(None)
contamination_col = list()
for i, row in areas.iterrows():
label = row['Label'].strip('.jpg.resampled.tif').strip('.tif') + '.jpg'
if label in contaminated:
contamination_col.append(contaminated[label])
else:
contamination_col.append(None)
colour_col = list()
for i, row in areas.iterrows():
label = row['Label'].strip('.jpg.resampled.tif').strip('.tif') + '.jpg'
if label in colour:
colour_col.append(colour[label])
else:
colour_col.append(None)
areas.insert(
len(areas.columns),
'Colour',
pd.Series(colour_col, dtype="category")
)
areas.insert(
len(areas.columns),
'Contamination',
pd.Series(contamination_col, dtype="category")
)
areas.insert(
len(areas.columns),
'Notes',
pd.Series(notes_col)
)
" Find the scan time of the sample from the image timestamp. "
times = list()
for i, row in areas.iterrows():
filename = os.path.splitext(row["Label"])[0].strip(".jpg.resampled")
time = os.stat(pjoin(STEPS['calc_areas'], filename + '.jpg'))
times.append(datetime.datetime.fromtimestamp(time.st_mtime))
areas.insert(0, 'Time', pd.DatetimeIndex(times))
areas = areas.set_index('Time') # Set time as index
areas.to_csv(pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv'))
In [10]:
#for day, tab in areas[areas['Area'] == 0.].groupby([pd.TimeGrouper('D')]):
# print("\n".join(tab.sort(['Sample'])['Label'].values))
In [11]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
fig, ax = plt.subplots()
i = 0
for iso, group in areas.groupby('Isolate'):
ax.plot_date(
x=group.index.to_pydatetime(),
y=group['Area'].values,
label=iso,
color=cat_colours[i]
)
i += 1
plt.legend(loc=2)
ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator(interval=1))
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=90)
plt.show()
In [12]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
y_col = 'Area'
fig, ax = plt.subplots(figsize=(6, 10))
i = 0
for iso, group in areas.groupby('Isolate'):
mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
ax.fill_between(
x=se.index.to_pydatetime(),
y1= mean.values - se.values,
y2= mean.values + se.values,
color=cat_colours[i],
alpha=0.4,
lw=0.
)
ax.plot_date(
x=mean.index.to_pydatetime(),
y=mean.values,
linestyle='-',
label=iso,
markersize=0.,
color=cat_colours[i]
)
i += 1
ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left')
plt.show()
In [13]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
y_col = 'AreaPerim'
area_perim = (areas['Area'] / areas['Perim.'])
area_perim.fillna(0.)
areas.insert(3, 'AreaPerim', area_perim)
fig, ax = plt.subplots()
i = 0
for iso, group in areas.groupby('Isolate'):
mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
ax.fill_between(
x=se.index.to_pydatetime(),
y1=mean.values - se.values,
y2=mean.values + se.values,
color=cat_colours[i],
alpha=0.4,
lw=0.
)
ax.plot_date(
x=mean.index.to_pydatetime(),
y=mean.values,
linestyle='-',
label=iso,
markersize=0.,
color=cat_colours[i]
)
i += 1
ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left')
plt.show()
In [14]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
y_col = 'Feret'
fig, ax = plt.subplots()
i = 0
for iso, group in areas.groupby('Isolate'):
mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
ax.fill_between(
x=se.index.to_pydatetime(),
y1=mean.values - se.values,
y2=mean.values + se.values,
color=cat_colours[i],
alpha=0.4,
lw=0.
)
ax.plot_date(
x=mean.index.to_pydatetime(),
y=mean.values,
linestyle='-',
label=iso,
markersize=0.,
color=cat_colours[i]
)
i += 1
ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.show()
In [15]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
y_col = 'DiameterDifference'
diameter_difference = (areas['Feret'] - areas['MinFeret'])
areas.insert(3, 'DiameterDifference', diameter_difference)
fig, ax = plt.subplots()
i = 0
for iso, group in areas.groupby('Isolate'):
mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
ax.fill_between(
x=se.index.to_pydatetime(),
y1=mean.values - se.values,
y2=mean.values + se.values,
color=cat_colours[i],
alpha=0.4,
lw=0.
)
ax.plot_date(
x=mean.index.to_pydatetime(),
y=mean.values,
linestyle='-',
label=iso,
markersize=0.,
color=cat_colours[i]
)
i += 1
ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.show()
In [16]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
y_col = 'Perim.'
fig, ax = plt.subplots()
i = 0
for iso, group in areas.groupby('Isolate'):
mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
ax.fill_between(
x=se.index.to_pydatetime(),
y1=mean.values - se.values,
y2=mean.values + se.values,
color=cat_colours[i],
alpha=0.4,
lw=0.
)
ax.plot_date(
x=mean.index.to_pydatetime(),
y=mean.values,
linestyle='-',
label=iso,
markersize=0.,
color=cat_colours[i]
)
i += 1
ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
date_labels = ax.xaxis.get_ticklabels()
plt.setp(date_labels, rotation=60)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.show()
In [17]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
areas.sort(inplace=True)
new_areas = list()
for sample, sample_tab in areas.groupby(['Sample']):
for i in range(0, len(sample_tab)):
row = dict()
for key in sample_tab.keys():
row['Elapsed'] = (sample_tab.index[i] - sample_tab.index[0])
row['Time'] = sample_tab.index[i]
if key in {
'Sample', 'Isolate', 'Label',
'Colour', 'Notes', 'Contamination'
}:
row[key] = sample_tab.ix[i, key]
else:
row[key] = sample_tab.ix[i, key] - sample_tab.ix[0, key]
new_areas.append(row)
areas = pd.DataFrame(new_areas)
areas = areas.set_index('Time')
areas['Elapsed'] = areas['Elapsed'].astype('timedelta64[s]')
diameter_difference = (areas['Feret'] - areas['MinFeret'])
areas.insert(3, 'DiameterDifference', diameter_difference)
In [18]:
filename = pjoin(STEPS['processed_areas'], 'Processed_areas_table.csv')
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
anova_tests = list()
for day, tab in areas.groupby(pd.TimeGrouper('D')):
if len(tab) == 0:
continue
lm = ols('Area ~ C(Isolate)', tab).fit()
a_lm = anova_lm(lm)
axes = a_lm.axes
matrix = a_lm.as_matrix()
residual_row = {'Day': day, 'Residual': 1, 'Isolate': 0}
for col, val in zip(axes[1], matrix[1]):
residual_row[col] = val
isolate_row = {'Day': day, 'Residual': 0, 'Isolate': 1}
for col, val in zip(axes[1], matrix[0]):
isolate_row[col] = val
anova_tests.append(residual_row)
anova_tests.append(isolate_row)
anova_tests = pd.DataFrame(anova_tests)
filename = pjoin(STEPS['found_diff'], 'anova_tests.csv')
anova_tests.to_csv(filename, sep='\t')
#anova_tests
In [19]:
def tukeys_group(table):
cmps = defaultdict(dict)
groups = list()
for i, cmp in table.iterrows():
cmps[cmp['group1']][cmp['group2']] = cmp['reject']
cmps[cmp['group2']][cmp['group1']] = cmp['reject']
for iso1, d in cmps.items():
if len(groups) == 0:
groups.append([iso1])
for iso2, reject in d.items():
added = False
for group in groups:
any_reject = False
for giso in group:
if giso == iso2:
continue
elif cmps[iso2][giso]:
any_reject = True
if not any_reject:
if iso2 not in group:
group.append(iso2)
added = True
if not added:
groups.append([iso2])
return groups
In [20]:
filename = pjoin(
STEPS['processed_areas'],
'Processed_areas_table.csv'
)
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
tukeys_tests = list()
first = True
for day, tab in areas.groupby(pd.TimeGrouper('D')):
if len(tab) == 0:
continue
mod = MultiComparison(tab['Area'], tab['Isolate'])
hsd = mod.tukeyhsd().summary()
if first:
col_names = ['time'] + hsd.data[0]
hsd = hsd.data[1:]
for h in hsd:
h.insert(0, day.to_pydatetime())
tukeys_tests.extend(hsd)
first = False
tukeys_tests = pd.DataFrame(tukeys_tests, columns=col_names)
tukeys_tests.loc[:,'time'] = pd.DatetimeIndex(
tukeys_tests['time'].values
)
tukeys_tests = tukeys_tests.set_index('time') # Set time as index
filename = pjoin(STEPS['found_diff'], 'tukeys_tests.csv')
tukeys_tests.to_csv(filename, sep='\t')
In [21]:
def f_theta(se):
return se / np.sqrt(se**2 + 1) + (1 / se)/(1 + se**(-2))
def z_beta(a, se, df):
return sp.stats.t._ppf(a/2, df) / f_theta(se)
def adj_ci(se, n, cmp, alpha=0.05):
h = z_beta(alpha/cmp, se, n-1) * se
return h
def nCr(n,r):
f = math.factorial
return f(n) / (f(r) * f(n-r))
class myFormatter(mpl.ticker.Formatter):
def __init__(self, start, fmt='{days}', **kwargs):
super().__init__(**kwargs)
self.start = datetime.datetime.fromtimestamp(start)
self.start = self.start.replace(tzinfo=pytz.utc)
self.fmt = fmt
def __call__(self, x, pos=None):
x = mpl.dates.num2date(x)
delta_x = x - self.start
elements = {
'days': delta_x.days,
'seconds': delta_x.seconds,
}
return self.fmt.format(**elements)
In [22]:
filename = pjoin(
STEPS['processed_areas'],
'Processed_areas_table.csv'
)
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
areas = areas[areas['Contamination'] != 'Bacterial']
y_col = 'Area'
fig, ax = plt.subplots(figsize=(8, 6))
confidence = 0.95
i = 0
labels = list()
for iso, group in areas.groupby('Isolate'):
mean = group.groupby([pd.TimeGrouper('D')]).mean()[y_col]
se = group.groupby([pd.TimeGrouper('D')]).sem()[y_col]
n = group.groupby([pd.TimeGrouper('D')]).count()[y_col]
interval = se * sp.stats.t.ppf((1 - confidence)/2, n - 1)
timestamps = mean.index
ax.fill_between(
x=se.index.to_pydatetime(),
y1= mean.values - interval,
y2= mean.values + interval,
color=cat_colours[i],
alpha=0.4,
lw=0.
)
ax.plot_date(
x=mean.index.to_pydatetime(),
y=mean.values,
linestyle='-',
label=iso,
#markersize=0.,
color=cat_colours[i]
)
labels.append((mean.values[-1], iso, cat_colours[i]))
i += 1
ax.xaxis.set_major_locator(DayLocator(interval=3))
ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_formatter(myFormatter(
start=mean.index.to_pydatetime()[0].timestamp()
))
ax.set_xlim(
mpl.dates.date2num(timestamps[5]),
mpl.dates.date2num(timestamps[-1])
)
ax.set_ylim(0, 1000)
y_pos, labels, colour = zip(*labels)
ax2 = ax.twinx()
ax2.set_ylim(*ax.get_ylim())
ax2.set_yticks(y_pos)
ax2.set_yticklabels(labels)
ax2.yaxis.set_tick_params(direction='out')
ax2.grid(False)
ax.set_xlabel('Days post inoculation')
ax.set_ylabel('Area (mm$^2$)')
filename = pjoin(STEPS['found_diff'], 'area_time')
fig.savefig(filename + '.svg')
In [32]:
filename = pjoin(
STEPS['processed_areas'],
'Processed_areas_table.csv'
)
areas = pd.read_csv(filename, index_col=0, parse_dates=True)
date = '2015-08-29'
groups = tukeys_group(tukeys_tests[tukeys_tests.index == date])
areas = areas[areas['Contamination'] != 'Bacterial']
areas = areas[date]
confidence = 0.95
mean = areas.groupby('Isolate').mean()['Area']
se = areas.groupby('Isolate').sem()['Area']
n = areas.groupby('Isolate').count()['Area']
print(n)
isolates = mean.index
interval = se * z_beta((1-confidence), se, n - 1)
interval2 = se * sp.stats.t.ppf((1 - confidence)/2., n - 1)
fig,(ax,ax2) = plt.subplots(
2, 1,
sharex=True,
gridspec_kw={
'height_ratios': [33, 1],
'hspace': 0.05
}
)
order = {
'I17V': 4,
'I19A': 5,
'I37A': 0,
'I61A': 8,
'I65V': 7,
'I9A': 3,
'B04': 6,
'MNH120': 2,
'USR5': 1,
}
index = np.array([order[i] for i in isolates])
error = ax.errorbar(
index + 0.5,
mean,
yerr=interval2,
fmt='o',
capsize=0,
ecolor=cat_colours[1],
markeredgecolor='k',
markerfacecolor='k',
markersize=6,
elinewidth=2,
zorder=3,
)
error = ax.errorbar(
index + 0.5,
mean,
yerr=se,
fmt='o',
capsize=0,
ecolor=cat_colours[1],
markeredgecolor='k',
markerfacecolor='k',
markersize=6,
elinewidth=4,
zorder=3,
)
## Draw Broken y-axis
d = .01
offset = 0 #0.001
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot(
(-d - offset, +d - offset),
(0, 0),
**kwargs
) # top-left diagonal
ax.plot(
(1- d - offset, 1 + d - offset),
(0, 0),
**kwargs
) # top-right diagonal
kwargs.update(transform=ax2.transAxes)
ax2.plot(
(-d - offset, +d - offset),
(1, 1),
**kwargs
) # bottom-left diagonal
ax2.plot(
(1 - d - offset, 1 + d - offset),
(1, 1),
**kwargs
) # bottom-right diagonal
# Add tukeys codes
iso_groups = defaultdict(list)
groups.sort(key=lambda x: len(x), reverse=True)
codes = list("abcdefghijk")[:len(groups)][::-1]
for code, group in zip(codes, groups):
for gmem in group:
iso_groups[gmem].append(code)
max_ngroups = 0
for iso, group in iso_groups.items():
if len(group) > max_ngroups:
max_ngroups = len(group)
codes_transform = mpl.transforms.blended_transform_factory(
ax.transData,
ax.transAxes,
)
base_ypos = 1.015
y_text_offset = 0.025
if max_ngroups == 1:
ypos = [base_ypos] * len(codes)
else:
ypos = list()
for code in codes:
ypos.append(base_ypos)
base_ypos += y_text_offset
ypos = dict(zip(codes, ypos))
for isolate, group in iso_groups.items():
x = order[isolate] + 0.5
for code in group:
y = ypos[code]
ax.text(x, y, code,
transform=codes_transform,
horizontalalignment='center'
)
ax.set_xticks(index + 0.5)
ax.set_xticklabels(isolates)
ax.set_ylim(175, 1000)
ax2.set_ylim(0, 25)
ax2.set_yticks([0,])
ax.spines['bottom'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax.xaxis.tick_top()
ax.tick_params(labeltop='off')
ax2.xaxis.tick_bottom()
isolate_labels = ax2.xaxis.get_ticklabels()
plt.setp(isolate_labels, rotation=80)
ax.yaxis.grid(True, zorder=0)
ax.set_ylabel('Colony size (mm$^2$)')
filename = pjoin(STEPS['found_diff'], 'endpoint_intervals')
fig.set_size_inches(6, 6)
fig.savefig(filename + '.svg')
fig.savefig(filename + '.pdf')
fig.savefig(filename + '.png', dpi=600)