notebook.community

Edit and run



In [1]:

    
import sys
import numpy as np
import pandas as pd

sys.path.append('/home/will/PySeqUtils/')
import PlottingTools

fname = '/home/will/KrebbsData/Results.csv'



In [2]:

    
from sklearn.covariance import EllipticEnvelope

data = pd.read_csv(fname, sep = '\t')
def safe_float(val):
    try:
        return float(val)
    except ValueError:
        return np.nan

cytos = ['VEGF','IL-1beta','G-CSF','EGF','IL-10','HGF','FGF-basic',
'IFN-alpha','IL-6','IL-12','Rantes','Eotaxin','IL-13','IL-15',
'IL-17','MIP-1alpha','GM-CSF','MIP-1beta','MCP-1','IL-5',
'IFN-gamma','TNF-alpha','IL-RA','IL-2','IL-7',
'IP-10','IL-2R','MIG','IL-4','IL-8']

for col in cytos:
    data[col] = data[col].map(safe_float)
    try:
        env = EllipticEnvelope().fit(data[col].dropna().values.reshape(-1,1))
        mask = env.predict(data[col].values.reshape(-1,1))
        data[col][mask == -1] = np.nan
    except:
        pass
        
    
    #print mask
    #break



In [3]:

    
pos = dict(zip('ABCDEFGH', range(8)))
def xpos(val):
    _, p = val.split('(')
    return pos[p.split(',')[1][0]]

def ypos(val):
    _, p = val.split('(')
    _, r = p.split(',')
    return int(''.join(l for l in r[:-1] if not l.isalpha()))
    
data['xpos'] = data['Location'].map(xpos)
data['ypos'] = data['Location'].map(ypos)



In [4]:

    
data['NumBad'] = data[cytos].applymap(np.isnan).sum(axis=1)



In [5]:

    
counts = pd.pivot_table(data, rows = 'xpos', cols = 'ypos', values = 'NumBad', aggfunc = 'sum').fillna(len(cytos))



In [6]:

    
import matplotlib.pyplot as plt

fig = PlottingTools.make_heatmap_df(counts, figsize = (10,4), colormap = 'copper_r')
plt.colorbar()









    Out[6]:





<matplotlib.colorbar.Colorbar instance at 0x513a0e0>



In [7]:

    
data.head().T









    Out[7]:






  
    
      
      0
      1
      2
      3
      4
    
  
  
    
      Location
        1(1,A1)
        2(1,B1)
        3(1,C1)
        4(1,D1)
         5(1,E1)
    
    
      Sample
       Standard
       Standard
       Standard
       Standard
        Standard
    
    
      Age
           skip
           skip
           skip
           skip
            skip
    
    
      Patient
           skip
           skip
           skip
           skip
            skip
    
    
      Exposed
           skip
           skip
           skip
           skip
            skip
    
    
      CellLine
           skip
           skip
           skip
           skip
            skip
    
    
      WellSide
           skip
           skip
           skip
           skip
            skip
    
    
      TimePoint
           skip
           skip
           skip
           skip
            skip
    
    
      VEGF
            NaN
            NaN
       53.18195
       5.638355
        1.565731
    
    
      IL-1beta
            NaN
            NaN
            NaN
            NaN
         39.5339
    
    
      G-CSF
            NaN
            NaN
            NaN
            NaN
             NaN
    
    
      EGF
            NaN
            NaN
            NaN
            NaN
             NaN
    
    
      IL-10
            NaN
            NaN
            NaN
            NaN
             NaN
    
    
      HGF
            NaN
       797.8447
       212.5651
       19.65983
        5.840041
    
    
      FGF-basic
            NaN
            NaN
            NaN
       40.03513
        16.44776
    
    
      IFN-alpha
            NaN
            NaN
            NaN
       162.2822
        59.11853
    
    
      IL-6
            NaN
            NaN
            NaN
       68.45924
          19.877
    
    
      IL-12
            NaN
            NaN
            NaN
       169.1865
        64.43608
    
    
      Rantes
            NaN
            NaN
            NaN
       74.29918
        27.08544
    
    
      Eotaxin
            NaN
            NaN
            NaN
       18.17663
        5.845376
    
    
      IL-13
            NaN
            NaN
            NaN
            NaN
             NaN
    
    
      IL-15
            NaN
            NaN
            NaN
       332.4042
         127.094
    
    
      IL-17
            NaN
            NaN
            NaN
            NaN
        109.8124
    
    
      MIP-1alpha
            NaN
            NaN
            NaN
            NaN
        65.67461
    
    
      GM-CSF
            NaN
            NaN
            NaN
            NaN
             NaN
    
    
      MIP-1beta
            NaN
            NaN
            NaN
       81.03217
        23.25415
    
    
      MCP-1
            NaN
            NaN
       2819.832
       264.4408
        83.74506
    
    
      IL-5
            NaN
            NaN
            NaN
       72.03586
        21.33605
    
    
      IFN-gamma
            NaN
            NaN
            NaN
            NaN
        67.72917
    
    
      TNF-alpha
            NaN
            NaN
            NaN
            NaN
             NaN
    
    
      IL-RA
            NaN
            NaN
            NaN
       492.8668
        191.1588
    
    
      IL-2
            NaN
            NaN
            NaN
            NaN
        82.19325
    
    
      IL-7
            NaN
            NaN
       755.6191
       84.30869
        26.54168
    
    
      IP-10
            NaN
       67.14169
       27.58456
       2.790785
       0.9082555
    
    
      IL-2R
            NaN
            NaN
            NaN
       227.0333
        75.35008
    
    
      MIG
            NaN
       433.8882
       151.1393
            NaN
             NaN
    
    
      IL-4
            NaN
            NaN
            NaN
            NaN
             NaN
    
    
      IL-8
            NaN
            NaN
       1187.953
       144.4454
        45.30223
    
    
      Total Events
           4698
           3905
           4423
           4878
            4395
    
    
      xpos
              0
              1
              2
              3
               4
    
    
      ypos
              1
              1
              1
              1
               1
    
    
      NumBad
             30
             27
             23
             13
               8



In [8]:

    
grouped = data.groupby(['CellLine', 'Age', 'Patient', 'Exposed', 'WellSide', 'TimePoint'], as_index=False).mean()



In [9]:

    
from itertools import combinations
from scipy.stats import ttest_ind
sf_data = data[(data['CellLine'] == 'Seminal Fluid')] 
old_sf =  sf_data['Age'] == 'Aged'
young_sf = sf_data['Age'] == 'Young'
pooled_sf = sf_data['Age'] == 'Pooled'

comp = [('Old', old_sf),
        ('Young', young_sf),
        ('Pooled', pooled_sf)]

results = []
for (g1_name, g1), (g2_name, g2) in combinations(comp, 2):
    for cyto in cytos:
        g1vals = sf_data[g1][cyto].dropna()
        g2vals = sf_data[g2][cyto].dropna()
        if (len(g1vals) > 2) and (len(g2vals) > 2):
            tstat, pval = ttest_ind(g1vals, g2vals)
            results.append({
                            'Group1':g1_name,
                            'Group2':g2_name,
                            'Tstat': tstat,
                            'Pval': pval,
                            'G1N':len(g1vals),
                            'G2N':len(g2vals),
                            'G1mean':g1vals.mean(),
                            'G2mean':g2vals.mean(),
                            'Cyto': cyto
                            })
    
res = pd.DataFrame(results)



In [10]:

    
from statsmodels.graphics.boxplots import beanplot
labels = ['Old', 'Young', 'Pooled']
for cyto in cytos:
    bins = []
    labs = []
    for gname, gmask in comp:
        td = sf_data[cyto][gmask].dropna()
    
        if len(td) > 2:
            bins.append(td.copy())
            labs.append(gname)
    if len(bins) > 1:
        fig = plt.figure()
        ax = plt.subplot(111)
        beanplot(bins, labels=labs, ax=ax)
        ax.set_ylim([0, ax.get_ylim()[1]])
        ax.set_ylabel(cyto)
        ax.set_title(cyto)
        plt.savefig('/home/will/KrebbsData/draft_figures/violin_plot_%s.png' % cyto)
        plt.close()



In [11]:

    
print res[(res['Pval'] < 0.1)][['Cyto','Group1', 'G1mean','Group2','G2mean','Pval']]









    



    Cyto Group1       G1mean  Group2       G2mean      Pval
11  IL-5    Old    53.775253   Young    33.959186  0.084954
16   MIG    Old   769.645050   Young   494.497445  0.070580
17  IL-8    Old  2513.067616   Young  1611.837118  0.080708
47  IL-5  Young    33.959186  Pooled    88.801680  0.000028



In [12]:

    
wanted_cl = set(['Ect1', 'End1', 'VK2'])
cl_data = data[data['CellLine'].map(lambda x: x in wanted_cl)]
exposed = cl_data['Exposed'].unique()
exposed_colors = 'kbrgm'
wellsides = ['Top', 'Bottom']



In [13]:

    
for cyto in cytos:
    fig, axs = plt.subplots(3,2, figsize = (10,10), sharey = True)
    for row, cl in enumerate(wanted_cl):
        for col, side in enumerate(wellsides):
            ax = axs[row, col]
            if ax.is_first_row():
                ax.set_title('%s %s' % (cyto, side))
            if ax.is_first_col():
                ax.set_ylabel('log(%s exp)' % cl)
            tmask = data['CellLine'] == cl
            tmask &= data['WellSide'] == side
            data['TimePoint'][tmask] = data['TimePoint'][tmask].map(safe_float)
            tmp = pd.pivot_table(data[tmask], cols = ['Exposed', 'TimePoint'], values = cyto, rows = 'Location')
            tmp.applymap(np.log10).boxplot(ax=ax, rot = 90)
            #data[tmask].pivot('Exposed', 'TimePoint', cyto).boxplot(ax=ax)
            #data[tmask].boxplot(column = cyto, by = ['Exposed', 'TimePoint'], ax = ax)
    fig.tight_layout()
    plt.savefig('/home/will/KrebbsData/draft_figures/boxplot_%s.png' % cyto)
    plt.close()



In [14]:

    
cl_data['nTimePoint'] = cl_data['TimePoint'].map(str)
gdata = cl_data.groupby(['nTimePoint', 'Exposed', 'CellLine', 'WellSide']).mean()



In [15]:

    
from scipy.stats import norm
effect = np.log2(gdata.ix['4']/gdata.ix['24'])[cytos]
mock_effects = effect.ix['Mock'].values.reshape(-1,1)
mock_effects[np.isnan(mock_effects)] = []
rvs = norm(*norm.fit(np.abs(mock_effects)))

pval_func = lambda x: rvs.sf(abs(x))
pvals = effect.applymap(pval_func)



In [16]:

    
from statsmodels.stats.multitest import multipletests
tpvals = pvals.values.flatten()
tpvals[np.isnan(tpvals)] = []
_, bh_pvals, _, _ = multipletests(tpvals, alpha = 0.05, method = 'bonferroni')
tdict = dict(zip(tpvals, bh_pvals))
tdict[np.nan] = np.nan

bon_pvals = pvals.applymap(tdict.get)
wmask = bon_pvals < 0.1

print effect.where(wmask)['IFN-gamma'].dropna()
print bon_pvals.where(wmask)['IFN-gamma'].dropna()









    



Exposed  CellLine  WellSide
Mock     Ect1      Bottom     -11.611741
         End1      Top         -9.283438
         VK2       Top         -9.698107
SF15     Ect1      Bottom      11.505369
SFV      Ect1      Bottom       8.699213
Name: IFN-gamma, dtype: float64
Exposed  CellLine  WellSide
Mock     Ect1      Bottom      0.000011
         End1      Top         0.005511
         VK2       Top         0.002012
SF15     Ect1      Bottom      0.000014
SFV      Ect1      Bottom      0.021059
Name: IFN-gamma, dtype: float64



In [18]:

    
fname = '/home/will/KrebbsData/MouseResults.csv'
mouse_data = pd.read_csv(fname, sep = '\t')

mouse_cytos = ['IL-1beta','IL-10','IL-6','IL-12','GM-CSF','IL-5', 'IFN-gamma','TNF-alpha','IL-2','IL-4']

for col in mouse_cytos:
    mouse_data[col] = mouse_data[col].map(safe_float)
    try:
        env = EllipticEnvelope().fit(mouse_data[col].dropna().values.reshape(-1,1))
        mask = env.predict(mouse_data[col].values.reshape(-1,1))
        mouse_data[col][mask == -1] = np.nan
    except:
        pass



In [37]:

    
check_cytos = ['IL-1beta','IL-6','IL-5','TNF-alpha','IL-2','IL-4']
wanted_reag = ['CpG', 'N-9', 'Imiquimod'] #PBS on all
wanted_time = ['2', '6', '12', '24']
control_mask = mouse_data['Reagent'] == 'PBS'
for cyto in check_cytos:
    fig, axs = plt.subplots(1,3, figsize = (10,5), sharey = True)
    for ax, reag in zip(axs.flatten(), wanted_reag):
        boxes = [mouse_data[cyto][control_mask].dropna().values]
        reag_mask = mouse_data['Reagent'] == reag
        for tm in wanted_time:
            boxes.append(mouse_data[cyto][reag_mask & (mouse_data['TimeGroup']==tm)].values)
        ax.boxplot(boxes)
        if ax.is_first_col():
            ax.set_ylabel(cyto)
        ax.set_title(reag)
        ax.set_xticklabels(['PBS']+wanted_time)
        
    fig.tight_layout()
    plt.savefig('/home/will/KrebbsData/draft_figures/mouse_boxplot_%s.png' % cyto)
    plt.close()



In [33]:









    Out[33]:





True



In [ ]:

	0	1	2	3	4
Location	1(1,A1)	2(1,B1)	3(1,C1)	4(1,D1)	5(1,E1)
Sample	Standard	Standard	Standard	Standard	Standard
Age	skip	skip	skip	skip	skip
Patient	skip	skip	skip	skip	skip
Exposed	skip	skip	skip	skip	skip
CellLine	skip	skip	skip	skip	skip
WellSide	skip	skip	skip	skip	skip
TimePoint	skip	skip	skip	skip	skip
VEGF	NaN	NaN	53.18195	5.638355	1.565731
IL-1beta	NaN	NaN	NaN	NaN	39.5339
G-CSF	NaN	NaN	NaN	NaN	NaN
EGF	NaN	NaN	NaN	NaN	NaN
IL-10	NaN	NaN	NaN	NaN	NaN
HGF	NaN	797.8447	212.5651	19.65983	5.840041
FGF-basic	NaN	NaN	NaN	40.03513	16.44776
IFN-alpha	NaN	NaN	NaN	162.2822	59.11853
IL-6	NaN	NaN	NaN	68.45924	19.877
IL-12	NaN	NaN	NaN	169.1865	64.43608
Rantes	NaN	NaN	NaN	74.29918	27.08544
Eotaxin	NaN	NaN	NaN	18.17663	5.845376
IL-13	NaN	NaN	NaN	NaN	NaN
IL-15	NaN	NaN	NaN	332.4042	127.094
IL-17	NaN	NaN	NaN	NaN	109.8124
MIP-1alpha	NaN	NaN	NaN	NaN	65.67461
GM-CSF	NaN	NaN	NaN	NaN	NaN
MIP-1beta	NaN	NaN	NaN	81.03217	23.25415
MCP-1	NaN	NaN	2819.832	264.4408	83.74506
IL-5	NaN	NaN	NaN	72.03586	21.33605
IFN-gamma	NaN	NaN	NaN	NaN	67.72917
TNF-alpha	NaN	NaN	NaN	NaN	NaN
IL-RA	NaN	NaN	NaN	492.8668	191.1588
IL-2	NaN	NaN	NaN	NaN	82.19325
IL-7	NaN	NaN	755.6191	84.30869	26.54168
IP-10	NaN	67.14169	27.58456	2.790785	0.9082555
IL-2R	NaN	NaN	NaN	227.0333	75.35008
MIG	NaN	433.8882	151.1393	NaN	NaN
IL-4	NaN	NaN	NaN	NaN	NaN
IL-8	NaN	NaN	1187.953	144.4454	45.30223
Total Events	4698	3905	4423	4878	4395
xpos	0	1	2	3	4
ypos	1	1	1	1	1
NumBad	30	27	23	13	8