Goal: Find out number of isomers in husermet data

Then look into how many can be found redundant. Start with positive ion mode


In [17]:
import pandas as pd
import numpy as np
import scipy.stats as stats

import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
import seaborn as sns
%matplotlib inline

In [2]:
# import the data
local_path = '/home/irockafe/Dropbox (MIT)/Alm_Lab/projects/'
project_path = ('/revo_healthcare/data/processed/Husermet_MTBLS97/'+
                'Husermet_UPLCMS_positive_ion_mode.xlsx')
metadata = pd.read_excel(local_path+project_path, sheetname=1,
                        index_col = 0)
peaks = pd.read_excel(local_path+project_path, sheetname=2, index_col=0)
# samples x features
df = pd.read_excel(local_path+project_path, sheetname=3,
                  dtype=np.float64)
# Replace X from df column labels
df.columns = pd.Series([i.replace('X', '') for i in df.columns], 
                       dtype='Int64')

In [243]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1189 entries, 0 to 1188
Columns: 1024 entries, 1000 to 4160
dtypes: float64(1024)
memory usage: 9.3 MB

In [244]:
# Peaks is all the features detected, pre-QC, it seems. Select
# Only those that made it to the dataframe.
print 'df columns', df.columns
print 'peak index', peaks.index

# Sanity check that all the peaks are accounted for in the
# peaklist
for i in df.columns:
    if i not in peaks.index:
        print ("Oh shit, you couldn't find one of the"+
               "df columns in the peaklist index")
        raise hell
    else:
        print "Found {i}".format(i=i)


df columns Int64Index([1000, 1008, 1010, 1013, 1017, 1022, 1026, 1028,  103, 1030,
            ...
            4138,  414, 4140, 4141, 4144, 4147, 4151, 4152, 4153, 4160],
           dtype='int64', length=1024)
peak index Int64Index([  48,   59,   63,   76,   79,   82,   91,   93,  103,  109,
            ...
            7757, 7758, 7761, 7765, 7776, 7780, 7784, 7787, 7798, 7805],
           dtype='int64', name=u'idx', length=2178)
Found 1000
Found 1008
Found 1010
Found 1013
Found 1017
Found 1022
Found 1026
Found 1028
Found 103
Found 1030
Found 1036
Found 1037
Found 1042
Found 1048
Found 1055
Found 1056
Found 1062
Found 1069
Found 1072
Found 1073
Found 1079
Found 1083
Found 1087
Found 1089
Found 109
Found 1090
Found 1093
Found 1099
Found 1100
Found 1101
Found 1102
Found 1108
Found 1110
Found 112
Found 1123
Found 1124
Found 1126
Found 1133
Found 1134
Found 1141
Found 1142
Found 1147
Found 1149
Found 1150
Found 1161
Found 1169
Found 1172
Found 1178
Found 1184
Found 1191
Found 1203
Found 1207
Found 1208
Found 1210
Found 1219
Found 1221
Found 1226
Found 1238
Found 124
Found 1260
Found 1262
Found 1263
Found 1268
Found 1271
Found 1274
Found 1275
Found 1286
Found 1287
Found 1288
Found 129
Found 1296
Found 1297
Found 1299
Found 1301
Found 1303
Found 1304
Found 1305
Found 1306
Found 1311
Found 1314
Found 1317
Found 1321
Found 1322
Found 1323
Found 1326
Found 1331
Found 1336
Found 1339
Found 134
Found 1344
Found 1345
Found 1351
Found 1359
Found 1362
Found 1363
Found 1364
Found 1365
Found 1368
Found 1374
Found 1379
Found 1381
Found 1385
Found 1387
Found 1388
Found 1389
Found 139
Found 1397
Found 1403
Found 1409
Found 142
Found 1423
Found 1425
Found 1426
Found 143
Found 1436
Found 1448
Found 1455
Found 1456
Found 1457
Found 1459
Found 1464
Found 1467
Found 1468
Found 147
Found 1472
Found 1473
Found 1478
Found 1480
Found 1481
Found 1484
Found 1485
Found 1486
Found 1490
Found 1491
Found 1493
Found 1494
Found 1500
Found 1501
Found 1502
Found 1503
Found 1506
Found 1507
Found 1509
Found 1510
Found 1512
Found 1516
Found 1517
Found 1519
Found 1521
Found 1522
Found 1525
Found 1532
Found 1534
Found 1537
Found 1542
Found 1544
Found 1546
Found 1548
Found 1550
Found 1552
Found 1554
Found 1558
Found 1559
Found 1560
Found 1563
Found 1571
Found 1573
Found 1579
Found 1583
Found 1584
Found 1585
Found 1586
Found 1590
Found 1595
Found 1596
Found 1598
Found 1606
Found 1607
Found 1609
Found 1610
Found 1614
Found 1615
Found 1617
Found 1619
Found 1624
Found 1625
Found 1628
Found 1629
Found 1634
Found 1638
Found 1645
Found 1651
Found 1652
Found 1654
Found 1656
Found 1657
Found 166
Found 1666
Found 1667
Found 1674
Found 1676
Found 1680
Found 1686
Found 1688
Found 1689
Found 1692
Found 1694
Found 1697
Found 1698
Found 1700
Found 1711
Found 1712
Found 1715
Found 1716
Found 172
Found 1727
Found 1729
Found 1736
Found 1737
Found 1738
Found 1739
Found 1740
Found 1741
Found 1746
Found 1748
Found 1752
Found 1756
Found 1760
Found 1764
Found 1766
Found 1774
Found 1775
Found 1777
Found 178
Found 1781
Found 1784
Found 1786
Found 179
Found 1792
Found 1796
Found 1797
Found 1799
Found 1810
Found 1815
Found 1825
Found 1826
Found 1830
Found 1832
Found 1839
Found 1840
Found 1843
Found 1847
Found 1849
Found 1850
Found 1851
Found 1852
Found 1857
Found 1860
Found 1865
Found 1866
Found 1870
Found 1873
Found 1875
Found 1879
Found 188
Found 1885
Found 1886
Found 1895
Found 1898
Found 1900
Found 1904
Found 1905
Found 1906
Found 1909
Found 1910
Found 1915
Found 1916
Found 1917
Found 1919
Found 1923
Found 1933
Found 1934
Found 1935
Found 1936
Found 1940
Found 1943
Found 1945
Found 1948
Found 195
Found 1950
Found 1951
Found 1952
Found 1955
Found 1956
Found 1957
Found 1961
Found 1962
Found 1970
Found 1973
Found 1977
Found 1983
Found 1987
Found 1998
Found 1999
Found 2004
Found 2005
Found 2007
Found 201
Found 2013
Found 2014
Found 2015
Found 2018
Found 2019
Found 2020
Found 2028
Found 2033
Found 2037
Found 2039
Found 205
Found 2052
Found 2053
Found 2055
Found 2059
Found 2060
Found 2061
Found 2064
Found 2065
Found 2068
Found 2069
Found 2087
Found 2098
Found 2101
Found 2102
Found 2105
Found 2111
Found 2117
Found 2129
Found 2131
Found 2136
Found 2139
Found 214
Found 2144
Found 2145
Found 2146
Found 2147
Found 215
Found 2151
Found 2152
Found 2156
Found 2158
Found 2160
Found 2161
Found 2163
Found 2170
Found 2171
Found 2174
Found 2175
Found 2186
Found 2189
Found 2196
Found 2203
Found 2205
Found 2208
Found 221
Found 2212
Found 2214
Found 2220
Found 2222
Found 2223
Found 2224
Found 2231
Found 2235
Found 2238
Found 2239
Found 2243
Found 2246
Found 2247
Found 2250
Found 2253
Found 2256
Found 2259
Found 226
Found 2263
Found 2269
Found 227
Found 2280
Found 2285
Found 2289
Found 2290
Found 2297
Found 2299
Found 2306
Found 2307
Found 2310
Found 2311
Found 2312
Found 2315
Found 2318
Found 2323
Found 2324
Found 2326
Found 2328
Found 2332
Found 2333
Found 2335
Found 2337
Found 234
Found 2348
Found 2354
Found 2356
Found 2359
Found 236
Found 2369
Found 2370
Found 2374
Found 2375
Found 2376
Found 2377
Found 2388
Found 239
Found 2394
Found 2399
Found 240
Found 2400
Found 2402
Found 2403
Found 2405
Found 2412
Found 2414
Found 2415
Found 2416
Found 2426
Found 2432
Found 2437
Found 2438
Found 2439
Found 2442
Found 2443
Found 2444
Found 2446
Found 2450
Found 2458
Found 2461
Found 2462
Found 2464
Found 2470
Found 2473
Found 2475
Found 2479
Found 2482
Found 2485
Found 2488
Found 2491
Found 2492
Found 2494
Found 2497
Found 2503
Found 2506
Found 2507
Found 2508
Found 2509
Found 2516
Found 2517
Found 2521
Found 2527
Found 2531
Found 2533
Found 2537
Found 2541
Found 2548
Found 2550
Found 2551
Found 2555
Found 2558
Found 2559
Found 256
Found 2561
Found 2564
Found 2567
Found 2568
Found 257
Found 2571
Found 2579
Found 258
Found 2582
Found 2583
Found 259
Found 2593
Found 2595
Found 2596
Found 2598
Found 2599
Found 2602
Found 2606
Found 2607
Found 2608
Found 2610
Found 2612
Found 2615
Found 2616
Found 2618
Found 2619
Found 2620
Found 2626
Found 2632
Found 2633
Found 2636
Found 2642
Found 2643
Found 2644
Found 2645
Found 2646
Found 2651
Found 2653
Found 2654
Found 2657
Found 2659
Found 2665
Found 2666
Found 2667
Found 2669
Found 2670
Found 2679
Found 2681
Found 2682
Found 2683
Found 2687
Found 2689
Found 269
Found 2691
Found 2692
Found 2694
Found 2698
Found 2699
Found 2701
Found 2702
Found 2704
Found 2707
Found 2708
Found 2709
Found 2717
Found 2718
Found 2725
Found 2728
Found 2729
Found 2730
Found 2733
Found 274
Found 2743
Found 2745
Found 2747
Found 2750
Found 2752
Found 2754
Found 2755
Found 2762
Found 2766
Found 2777
Found 2779
Found 2782
Found 2784
Found 2787
Found 2788
Found 2789
Found 2791
Found 2794
Found 2797
Found 2798
Found 2799
Found 2801
Found 2802
Found 2810
Found 2811
Found 2814
Found 2818
Found 2821
Found 2822
Found 2826
Found 2828
Found 283
Found 2832
Found 2834
Found 2837
Found 2838
Found 2839
Found 2841
Found 2844
Found 2845
Found 2848
Found 2850
Found 2855
Found 2857
Found 2858
Found 2861
Found 2863
Found 2865
Found 2867
Found 2869
Found 2870
Found 2872
Found 2874
Found 2875
Found 2876
Found 2881
Found 2884
Found 2885
Found 2888
Found 2891
Found 2892
Found 2900
Found 2903
Found 2905
Found 2906
Found 2907
Found 2909
Found 291
Found 2913
Found 2917
Found 2918
Found 2921
Found 2923
Found 2925
Found 2929
Found 2930
Found 2932
Found 2935
Found 2940
Found 2942
Found 2944
Found 2947
Found 2950
Found 2953
Found 2954
Found 2957
Found 2958
Found 296
Found 2963
Found 2966
Found 2971
Found 2977
Found 2980
Found 2982
Found 2991
Found 2992
Found 2999
Found 3000
Found 3004
Found 3009
Found 3010
Found 3012
Found 3015
Found 3021
Found 3022
Found 3024
Found 3025
Found 3029
Found 3030
Found 3031
Found 304
Found 3045
Found 3046
Found 3047
Found 3052
Found 3058
Found 3060
Found 3064
Found 3069
Found 3074
Found 3076
Found 3077
Found 3078
Found 3080
Found 3082
Found 3085
Found 3094
Found 3096
Found 3097
Found 3103
Found 3104
Found 3110
Found 3111
Found 3113
Found 3118
Found 3120
Found 3122
Found 3124
Found 3125
Found 3127
Found 3133
Found 3135
Found 3136
Found 3138
Found 3143
Found 3144
Found 3145
Found 3154
Found 3158
Found 3162
Found 3163
Found 3164
Found 3166
Found 3169
Found 317
Found 3172
Found 3174
Found 3176
Found 318
Found 3180
Found 3181
Found 3182
Found 3184
Found 3189
Found 319
Found 3192
Found 3197
Found 3200
Found 3201
Found 3203
Found 3204
Found 3206
Found 3207
Found 321
Found 3210
Found 3212
Found 3219
Found 3221
Found 3225
Found 3239
Found 3240
Found 3244
Found 3248
Found 3252
Found 3255
Found 3257
Found 3259
Found 3263
Found 3266
Found 3269
Found 3279
Found 3280
Found 3286
Found 3291
Found 3296
Found 330
Found 3300
Found 3304
Found 3305
Found 3307
Found 3308
Found 3309
Found 3312
Found 3321
Found 3322
Found 3323
Found 3325
Found 3337
Found 3341
Found 3343
Found 3348
Found 3350
Found 3354
Found 3355
Found 3357
Found 3361
Found 3365
Found 3366
Found 3367
Found 3370
Found 3376
Found 3378
Found 3379
Found 338
Found 3383
Found 3388
Found 3390
Found 3391
Found 3399
Found 3403
Found 3410
Found 3417
Found 3419
Found 3428
Found 3433
Found 3437
Found 3441
Found 3442
Found 3443
Found 3448
Found 3451
Found 3453
Found 3456
Found 3467
Found 347
Found 3475
Found 3480
Found 3487
Found 3488
Found 3492
Found 3493
Found 3495
Found 3500
Found 3503
Found 3505
Found 351
Found 3513
Found 3515
Found 3518
Found 352
Found 3522
Found 3523
Found 3525
Found 3534
Found 3536
Found 3540
Found 3542
Found 3547
Found 3549
Found 355
Found 3550
Found 3554
Found 3555
Found 3559
Found 3563
Found 3566
Found 3567
Found 3568
Found 3569
Found 3572
Found 3573
Found 3574
Found 3576
Found 3579
Found 3585
Found 3586
Found 3591
Found 3592
Found 3597
Found 3599
Found 3601
Found 3602
Found 3605
Found 3608
Found 3611
Found 3612
Found 3615
Found 3623
Found 3628
Found 3637
Found 3638
Found 3640
Found 3642
Found 3643
Found 3645
Found 3646
Found 3650
Found 3655
Found 3656
Found 366
Found 3660
Found 3661
Found 3663
Found 3667
Found 3668
Found 3669
Found 3672
Found 3674
Found 3682
Found 3683
Found 3688
Found 3689
Found 3691
Found 3702
Found 3704
Found 3709
Found 371
Found 3711
Found 3713
Found 3722
Found 3724
Found 3730
Found 3733
Found 3735
Found 3736
Found 3741
Found 3742
Found 3743
Found 3745
Found 3747
Found 3748
Found 3749
Found 3753
Found 3756
Found 3757
Found 3758
Found 3765
Found 3772
Found 3777
Found 3778
Found 3780
Found 3783
Found 3786
Found 3788
Found 3791
Found 3794
Found 3796
Found 3799
Found 3807
Found 3817
Found 3818
Found 382
Found 3820
Found 3823
Found 3825
Found 3826
Found 3833
Found 3840
Found 3841
Found 3842
Found 3844
Found 3849
Found 3850
Found 3855
Found 3862
Found 3871
Found 3873
Found 3875
Found 388
Found 3880
Found 3881
Found 3883
Found 389
Found 3891
Found 3899
Found 3900
Found 3901
Found 3902
Found 3904
Found 391
Found 3910
Found 3914
Found 3915
Found 3921
Found 3923
Found 3927
Found 3928
Found 393
Found 3932
Found 3936
Found 3937
Found 3938
Found 3940
Found 395
Found 3957
Found 3958
Found 396
Found 3967
Found 3972
Found 3975
Found 3976
Found 3983
Found 3986
Found 3987
Found 3990
Found 3994
Found 3995
Found 3996
Found 3997
Found 3998
Found 3999
Found 400
Found 4001
Found 4002
Found 4003
Found 4007
Found 4010
Found 4012
Found 4013
Found 4014
Found 4025
Found 4027
Found 403
Found 4032
Found 4033
Found 4035
Found 4037
Found 4043
Found 4044
Found 4048
Found 4055
Found 4056
Found 4061
Found 4062
Found 4063
Found 4069
Found 4072
Found 4078
Found 4080
Found 4081
Found 4086
Found 4094
Found 4101
Found 4105
Found 4106
Found 4113
Found 4114
Found 4115
Found 4116
Found 4121
Found 4122
Found 4123
Found 4125
Found 4129
Found 4132
Found 4134
Found 4138
Found 414
Found 4140
Found 4141
Found 4144
Found 4147
Found 4151
Found 4152
Found 4153
Found 4160

In [20]:
# Make a matrix of the pairwise-ppm difference between peaks
def pairwise_ppm_matrix(peak_mz):
    '''
    GOAL - Make a matrix containing pairwise ppm differences
           from a pandas series
    INPUT - peak_mz: pandas series with index as feature identifier
    OUTPUT - matrix of pairwise ppm values. half-full with comparisons
            Other (redundant) half is nan values. Using nans so 
            you can ask "ppm_matrix < 20" and sum rows/columns
            to get an answer
    '''
    ppm_pairwise_matrix = pd.DataFrame(
        np.full([len(peak_mz), len(peak_mz)], np.nan),
        index=peak_mz.index, columns=peak_mz.index)
    for i, mz in enumerate(peak_mz):
        for idx, mz2 in enumerate(peak_mz[i+1:]):
            j=i+1+idx # 
            min_ppm = abs(
                        (float(mz-mz2)/max(mz,mz2)) * 10**6)
            ppm_pairwise_matrix.iloc[j,i] = min_ppm
    return ppm_pairwise_matrix

test_mz = pd.Series([1,2,3], index=['a', 'b', 'c'], dtype='float64')
print test_mz
test_val = pairwise_ppm_matrix(test_mz)
should_val = pd.DataFrame({'a': [np.nan, 0.5*10**6, (2.0/3)*10**6],
                          'b': [np.nan, np.nan, (1.0/3)*10**6],
                          'c': [np.nan, np.nan, np.nan]},
                         index=['a', 'b', 'c'])

print '\nOutput from test_vals:\n', test_val
print '\nShould be this:\n', should_val

assert(test_val.all() == should_val.all()).all()
if (test_val.all() == should_val.all()).all():
    print '\n\nYou passed the test! (might be other bugs, but idk)'


a    1.0
b    2.0
c    3.0
dtype: float64

Output from test_vals:
               a              b   c
a            NaN            NaN NaN
b  500000.000000            NaN NaN
c  666666.666667  333333.333333 NaN

Should be this:
               a              b   c
a            NaN            NaN NaN
b  500000.000000            NaN NaN
c  666666.666667  333333.333333 NaN


You passed the test! (might be other bugs, but idk)

In [21]:
# Select out the peaks from dataframe (those that presumably passed QC)
features = peaks.loc[df.columns]
feature_mz = features['mz']

In [15]:
def plot_mz_rt(df, save=False,path=None, rt_bounds=[-1e5,-1e5]):
    # the random data
    x = df['rt']
    y = df['mz']
    print np.max(x)
    print np.max(y)
    nullfmt = NullFormatter()         # no labels

    # definitions for the axes
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    bottom_h = left_h = left + width + 0.02

    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom_h, width, 0.2]
    rect_histy = [left_h, bottom, 0.2, height]

    # start with a rectangular Figure
    #fig = plt.figure(1, figsize=(8, 8))
    fig = plt.figure(1, figsize=(10,10))
    
    axScatter = plt.axes(rect_scatter)
    axHistx = plt.axes(rect_histx)
    axHisty = plt.axes(rect_histy)

    # no labels
    axHistx.xaxis.set_major_formatter(nullfmt)
    axHisty.yaxis.set_major_formatter(nullfmt)

    # the scatter plot:
    axScatter.scatter(x, y, s=1)

    # now determine nice limits by hand:
    binwidth = 0.25

    #xymax = np.max([np.max(np.fabs(x)), np.max(np.fabs(y))])

    #lim = (int(xymax/binwidth) + 1) * binwidth

    x_min = np.min(x)-50
    x_max = np.max(x)+50
    axScatter.set_xlim(x_min, x_max )
    y_min = np.min(y)-50
    y_max = np.max(y)+50
    axScatter.set_ylim(y_min, y_max)

    # Add vertical red line between 750-1050 retention time
    '''
    plt.plot([0,1], [0,1], linestyle = '--', lw=2, color='r',
                        label='Luck', alpha=0.5)
    '''
    print 'ymin: ', y_min
    
    # Add vertical/horizontal lines to scatter and histograms
    axScatter.axvline(x=rt_bounds[0], lw=2, color='r', alpha=0.5)
    axScatter.axvline(x=rt_bounds[1], lw=2, color='r', alpha=0.5)

    #bins = np.arange(-lim, lim + binwidth, binwidth)
    bins = 100
    axHistx.hist(x, bins=bins)
    axHisty.hist(y, bins=bins, orientation='horizontal')

    axHistx.set_xlim(axScatter.get_xlim())
    axHisty.set_ylim(axScatter.get_ylim())

    axScatter.set_ylabel('m/z', fontsize=30)
    axScatter.set_xlabel('Retention Time', fontsize=30)

    axHistx.set_ylabel('# of Features', fontsize=20)
    axHisty.set_xlabel('# of Features', fontsize=20)
    if save:
        plt.savefig(path, 
                format='pdf')
    plt.show()

In [16]:
plot_mz_rt(features)


1189.217979
531.234175
ymin:  46.972084

In [22]:
# Do a quick test on own data
ppm_matrix = pairwise_ppm_matrix(feature_mz)

Only 11 features are within 5ppm of one-another :)

Unsure how this will stack up between datasets, but 52 possibly isomeric features out of 1000 is not bad...

Also, there's a distinct possibility that they removed other redundant features... Check on that below


In [23]:
def plot_ppm_overlaps(ppm_matrix, x_vals):
    x = x_vals
    y = (np.array([(ppm_matrix < i).sum().sum() for i in x]) / 
         float(ppm_matrix.shape[0]))*100
    plt.scatter(x,y)
    plt.xlabel('ppm')
    plt.ylabel('% of overlapping m/z')
    plt.title('Few overlapping m/z values in Husermet dataset'
             + ' (# Features = %s)' % ppm_matrix.shape[0])
    plt.axvline(5, color='red', alpha=0.2, label='Instrument precision')
    plt.legend()
    plt.show()
    
plot_ppm_overlaps(ppm_matrix, range(1,20))



In [167]:
# Check how many of the annotated features (peaks?)
# have overlapping m/z
# This takes a long time. Maybe try to matrix-ify some of the code?
all_feats = pairwise_ppm_matrix(peaks['mz'])

Even for all the features (not just those that were in the dataframe and passed QC), only ~1% are indistinguishable by mass


In [173]:
plot_ppm_overlaps(all_feats, range(1,20))


Now let's see if the distributions of these m/z overlapping features are distinct

Just work with the QC'd features (those found in the feature table, not those found only in the peaklist)


In [28]:
# Get the overlapping features...
# Stack() pivots values and drops nan values - yay!
overlapping_mz_pairs = list(ppm_matrix[ppm_matrix < 6].stack().index)
len(overlapping_mz_pairs)
print overlapping_mz_pairs

# write a function to get intensities for these features
def plot_overlapping_mz_intensities(df, feature_pair):
    '''
    GOAL - Take in tuple of feature indices, return Intensity values
        for that pair.
    INPUT - 
        df - pandas dataframe. A feature table with 
            (samples x features), with column
            index that has same index as feature_pair
        feature_pair - Tuple. Contains indexes to get intensity vals
    OUTPUT - 
        Dataframe of (sample, intensity) for each feature pair
    '''
    feats = df[list(feature_pair)]
    
    # convert to tidy data
    tidy_feats = feats.melt(id_vars=feats.index,
              value_vars=feats.columns,
              var_name='feature',
              value_name='intensity').dropna(axis=1, how='all')
    
    # Get mann-whitney values
    u, pval_u = stats.mannwhitneyu(df[feature_pair[0]], df[feature_pair[1]])
    
    # Convert dtype of intensity values! float..?
    sns.boxplot(x='feature', y='intensity',
               data=tidy_feats)
    ax = sns.stripplot(data=tidy_feats,
                       x='feature', y='intensity',
                      jitter=True)
    plt.title("mann-whitney: {u}, pval: {pval:.2e}".format(
        u=u, pval=pval_u))
    plt.show()
    

#TODO fix bug here that says 
# I'm using different-length arrays
for i in range(0,len(overlapping_mz_pairs)):
    plot_overlapping_mz_intensities(df , overlapping_mz_pairs[i])


[(1288, 1287), (1522, 1521), (1740, 1739), (1999, 1998), (2019, 2018), (2020, 2019), (2438, 2437), (3308, 3307), (389, 388), (3928, 3927), (3995, 3994), (4003, 4002)]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-036e998821d2> in <module>()
     41 
     42 for i in range(0,len(overlapping_mz_pairs)):
---> 43     plot_overlapping_mz_intensities(df , overlapping_mz_pairs[i])
     44 

<ipython-input-28-036e998821d2> in plot_overlapping_mz_intensities(df, feature_pair)
     24               value_vars=feats.columns,
     25               var_name='feature',
---> 26               value_name='intensity').dropna(axis=1, how='all')
     27 
     28     # Get mann-whitney values

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/pandas/core/frame.pyc in melt(self, id_vars, value_vars, var_name, value_name, col_level)
   4056         return melt(self, id_vars=id_vars, value_vars=value_vars,
   4057                     var_name=var_name, value_name=value_name,
-> 4058                     col_level=col_level)
   4059 
   4060     # ----------------------------------------------------------------------

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/pandas/core/reshape/reshape.pyc in melt(frame, id_vars, value_vars, var_name, value_name, col_level)
    771                                    ._get_level_values(i)).repeat(N)
    772 
--> 773     return DataFrame(mdata, columns=mcolumns)
    774 
    775 

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/pandas/core/frame.pyc in __init__(self, data, index, columns, dtype, copy)
    273                                  dtype=dtype, copy=copy)
    274         elif isinstance(data, dict):
--> 275             mgr = self._init_dict(data, index, columns, dtype=dtype)
    276         elif isinstance(data, ma.MaskedArray):
    277             import numpy.ma.mrecords as mrecords

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/pandas/core/frame.pyc in _init_dict(self, data, index, columns, dtype)
    367             # raise ValueError if only scalars in dict
    368             if index is None:
--> 369                 extract_index(list(data.values()))
    370 
    371             # prefilter if columns passed

/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/pandas/core/frame.pyc in extract_index(data)
   5542             lengths = list(set(raw_lengths))
   5543             if len(lengths) > 1:
-> 5544                 raise ValueError('arrays must all be same length')
   5545 
   5546             if have_dicts:

ValueError: arrays must all be same length

In [252]:
test = pd.DataFrame({'A': [1,2,3], 'B':[10,20,30], 'C':[100,200,300]})
print test
test.melt(id_vars=test.index, value_vars=test.columns,
         var_name='feature', value_name='intensity').dropna(axis=1)


   A   B    C
0  1  10  100
1  2  20  200
2  3  30  300
Out[252]:
feature intensity
0 A 1
1 A 2
2 A 3
3 B 10
4 B 20
5 B 30
6 C 100
7 C 200
8 C 300