In [31]:
from pandas import *
import os, os.path
import matplotlib.pyplot as plt

os.chdir('/home/will/BloodBrainBarierProject/')

In [32]:
data = read_csv('AdhesionDonerData_newer.csv', sep = ',')
print data.to_string()


   CellType  Untreated          24         48          72       IL-1ß
0       CD3 -18.159659   35.624619  35.124924   53.138330  162.352224
1       CD3   0.268129   20.706886  30.761731   81.889092  192.760512
2       CD3  17.891530   33.808653  25.716027   81.462523   89.018891
3       CD3   1.516389   32.135775  17.146856    9.086667  201.504724
4       CD3  -7.465298   21.521054  47.696256   47.742914  128.799720
5       CD3   5.948909   27.691590  37.863058   18.150006  147.877320
6       CD3  16.707022   90.460048  51.315577   52.413236  178.127522
7       CD3  -9.136400  153.123487  56.400323   -1.888620   76.481033
8       CD3  -7.570621  112.558515  42.841001   15.932203         NaN
9      CD14 -16.505653   17.882837  38.348750  111.880781  202.418636
10     CD14  20.829051   -9.640288  34.114423  114.607742  234.224049
11     CD14  -4.323398   42.953066  26.255567  125.844467   54.799589
12     CD14  -5.763270  -11.782409 -17.301081    9.325158   60.778656
13     CD14   3.410846  -22.816371  -8.248701  -11.292086   25.847357
14     CD14   2.352424  -16.614629  -3.000552   61.611888  111.956286
15     CD14  40.538562   19.427270  26.863000    2.497559  121.124851
16     CD14   4.027009   37.325089  35.429548   19.909969   81.066276
17     CD14 -44.565571         NaN  47.280074         NaN         NaN

In [33]:
fig, axes = plt.subplots(1,2, figsize = (10,5), sharey = True)

for ax, ct in zip(axes.flatten(), ['CD3', 'CD14']):
    data[data['CellType'] == ct].boxplot(ax = ax)
    ax.set_title(ct)



In [76]:
cor_vals


Out[76]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 432 entries, 0 to 431
Data columns:
MFI	Number of adhering cells	Grouping	Plot Title    432  non-null values
dtypes: object(1)

In [77]:
cor_vals = read_csv('CorrData.csv', sep = '\t')
cor_vals['NGrouping'] = cor_vals['Grouping'].map(lambda x: x.split('-')[0])
cor_vals['Donor'] = cor_vals['Grouping'].map(lambda x: x.split('-')[1])
print cor_vals
print cor_vals['Grouping'].unique()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 432 entries, 0 to 431
Data columns:
MFI                         426  non-null values
Number of adhering cells    432  non-null values
Grouping                    432  non-null values
Plot Title                  432  non-null values
NGrouping                   432  non-null values
Donor                       432  non-null values
dtypes: float64(1), int64(1), object(4)
[untreated-1 untreated-2 untreated-3 m24-1 m24-2 m24-3 m48-1 m48-2 m48-3
 m72-1 m72-2 m72-3]

In [54]:
tmp = cor_vals.groupby(['Plot Title', 'Grouping']).agg({'MFI':'mean', 'Number of adhering cells':'mean', 'NGrouping':'first', 'Donor':'first'})
print tmp.head()


                        Donor         MFI NGrouping  Number of adhering cells
Plot Title     Grouping                                                      
CD14 ALCAM MFI m24-1        1  816.000000       m24              17085.666667
               m24-2        2  701.666667       m24              24524.000000
               m24-3        3  851.333333       m24              48832.333333
               m48-1        1  797.000000       m48              19397.666667
               m48-2        2  682.333333       m48              26758.000000

In [71]:
clist = dict([('untreated', 'o'),
         ('m24', '+'),
         ('m48', '*'),
         ('m72', 'D'),
        ('1', 'r'),
        ('2', 'g'),
        ('3', 'b')])

plots = sorted(cor_vals['Plot Title'].unique())
results = []
for p in plots:
    plt.figure(figsize = (10,10))
    
    plt.hold(True)
    tdata = tmp.ix[p]
    for _, row in tdata.iterrows():
        
        plt.scatter(row['MFI'], row['Number of adhering cells'], 
                    marker = clist[row['NGrouping']], color = clist[row['Donor']],
                    s = 100)
    for donor, df in tdata.groupby('Donor'):
        m, b, rqs, pval, _ = linregress(df['MFI'], df['Number of adhering cells'])
        results.append((p, donor, m, b, rqs, pval))
        xpos = np.linspace(df['MFI'].min()*0.9, df['MFI'].max()*1.1, 10)
        ypos = m*xpos+b
        plt.plot(xpos, ypos, color = clist[donor])
    
    plt.title(p)
    fname = 'TrendLines-'  + p.replace(' ', '-') + '.png'
    plt.hold(False)
    
    plt.savefig(fname)



In [11]:
from scipy.stats import linregress

res = linregress(tmp['Expressors'].values, tmp['Adhesor'].values)
print res


(0.27565964253649666, 6042.5838424335188, 0.5958938254447731, 0.040884470555590073, 0.11747732357486251)

In [68]:
resdf = DataFrame(results, columns = ['Anal', 'Donor', 'm', 'b', 'RSquared', 'Pval'])

In [70]:
resdf.to_excel('trend_results.xls')

In [86]:
four_point = cor_vals.groupby(['Plot Title', 'NGrouping']).mean()
plots = sorted(cor_vals['Plot Title'].unique())
four_res = []
for p in plots:
    m, b, rqs, pval, _ = linregress(four_point.ix[p]['MFI'], four_point.ix[p]['Number of adhering cells'])
    four_res.append((p, rqs, pval))
four_df = DataFrame(four_res, columns = ['Anal', 'R^2', 'Pval'])
print four_df
four_df.to_excel('FourPoints.xls')


                     Anal       R^2      Pval
0          CD14 ALCAM MFI  0.638014  0.361986
1   CD14 ALCAM expression  0.682367  0.317633
2           CD14 ICAM MFI  0.528092  0.471908
3    CD14 ICAM expression  0.558902  0.441098
4           CD14 VCAM MFI -0.210707  0.789293
5    CD14 VCAM expression  0.605162  0.394838
6           CD3 ALCAM MFI  0.982413  0.017587
7    CD3 ALCAM expression  0.988345  0.011655
8            CD3 ICAM MFI  0.961525  0.038475
9     CD3 ICAM expression  0.982530  0.017470
10           CD3 VCAM MFI -0.257713  0.742287
11    CD3 VCAM expression  0.997783  0.002217

In [ ]: