This explores different ways to analyze the quality of PSM quantifications



In [1]:

    
from itertools import chain

bad_data = [
    ('ELcSAAITMSDNTAANLLLTTIGGPk', 8846),
    ('FVESVDVAVNLGIDAR',7466 ),
    ('ELcSAAITMSDNTAANLLLTTIGGPK', 9209),
    ('FVESVDVAVNLGIDAR', 9213),
    ('FVESVDVAVNLGIDAR', 9426),
    ('AVTLYLGAVAATVR', 6660),
    ('AVTLYLGAVAATVR', 8958),
    ('IVVIYTTGSQATMDER', 4505),
    ('VGYIELDLNSGk', 5624),
    ('LLTGELLTLASR', 6942),
    ('FVESVDVAVNLGIDAr', 9184),
    ('ELcSAAITMSDNTAANLLLTTIGGPk', 9458),
    ('VGYIELDLNSGk', 5238),
    ('IVVIYTTGSQATMDERNR', 4024),
    ('AVTLYLGAVAATVR', 9652),
    ('ELcSAAITMSDNTAANLLLTTIGGPk', 8883),
    ('IVVIYTTGSQATMDERNR', 4005),
    ('FVESVDVAVNLGIDAR', 9950),
    ('AQHSALDDIPR', 2510),
    ('FVESVDVAVNLGIDAR', 9980),
    ('VGYIELDLNSGk', 9546),
    ('IVVIYTTGSQATMDER', 9933),
    ('HFESTPDTPEIIATIHGEGYR', 4488),
    ('YYLGNADEIAAK', 3703),
    ('FVESVDVAVNLGIDAR', 6879),
    ('RDDSILLAQHTR', 1849),
    ('EQGYALDSEENEQGVR', 2536),
    ('VLLcGAVLSR', 4541),
    ('LGYPITDDLDIYTr', 5790),
    ('VGYIELDLNSGk', 8965),
    ('FVESVDVAVNLGIDAR', 7796),
]

good_data = [
    ('VHIINLEK', 2373),
    ('HITDRDVR', 863),
    ('GATVLPHGTGr', 1244),
    ('GATVLPHGTGR', 1238),
    ('EQGLHFYAAGHHATER', 1570),
    ('VPLHTLr', 1371),
    ('IHVAVAQEVPGTGVDTPEDLER', 4157),
    ('cIFDNISLTVPR', 6174),
    ('HLTDGmTVR', 974),
    ('AGVHFGHQTR', 1002),
    ('AHHYPSELSGGQQQR', 1142),
    ('HYGALQGLNk', 1738),
    ('HITGLHYNPITNTFk', 3590),
    ('IGLLEHANR', 2008),
    ('ALEINSQSLDNNAAFIR', 5217),
    ('RIYGVLER', 2188),
    ('FQDVGSFDYGR', 3734),
    ('AVQNAMR', 995),
    ('IGVGGTITYPR', 3358),
    ('GmGESNPVTGNTcDNVk', 1558),
    ('MVEEDPAHPr', 1177),
    ('AIENQAYVAGcNr', 1914),
    ('FIAQQLGVSR', 3332),
    ('MPEDLLTr', 3424),
    ('mVEEDPAHPr', 1016),
    ('GFSVNFER', 3790),
    ('TPVGNTAAIcIYPR', 4031),
    ('IDAILVDR', 3375),
    ('LVAVGNTFVYPIAGYSk', 5966),
]

peptides = ' '.join(i[0] for i in chain(bad_data, good_data))
scans = ' '.join(str(i[1]) for i in chain(bad_data, good_data))
out = 'ml_train'



In [24]:

    
# %%bash -s "$peptides" "$scans" "$out"
# pyQuant --search-file "/home/chris/gdrive/Dropbox/Manuscripts/SILAC Fix/EColi/PD/Chris_Ecoli_1-2-4-(01).msf" \
#     --scan-file "/home/chris/gdrive/Dropbox/Manuscripts/SILAC Fix/EColi/Chris_Ecoli_1-2-4.mzML" \
#     --peptide $1 --scan $2 \
#     -o $3 \
#     -p 9









    



INFO:pyQuant:Reader done






    



msparser not found, Mascot DAT files unable to be parsed
Loading Scans:
.
Scans loaded.
Beginning quantification.
Processing /home/chris/gdrive/Dropbox/Manuscripts/SILAC Fix/EColi/Chris_Ecoli_1-2-4.mzML.
........................................................................................................../home/chris/Devel/pyquant/pyquant/worker.py:49: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting
  self.msn_rt_map.sort()
Chris_Ecoli_1-2-4 processed and placed into queue.
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
16.67% Completed/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/lib/nanfunctions.py:675: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
100.00% CompletedUnable to calculate statistics for Heavy/Light.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Heavy/Medium.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Light/Heavy.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Light/Medium.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Medium/Heavy.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Medium/Light.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11



In [33]:

    
# %%bash -s "$peptides" "$scans" "$out"
# pyQuant --search-file "/home/chris/gdrive/Dropbox/Manuscripts/SILAC Fix/EColi/PD/Chris_Ecoli_1-2-4-(01).msf" \
#     --scan-file "/home/chris/gdrive/Dropbox/Manuscripts/SILAC Fix/EColi/Chris_Ecoli_1-2-4.mzML" \
#     -o $3 \
#     -p 9









    



ERROR ON IGSDAYNQGLSER: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/worker.py", line 702, in quantify_peaks
    peak_index = peaks.find_nearest_index(merged_x, valid_peaks[0]['mean'])
IndexError: list index out of range

INFO:pyQuant:Reader done






    



msparser not found, Mascot DAT files unable to be parsed
In file included from /home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1777:0,
                 from /home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:18,
                 from /home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/include/numpy/arrayobject.h:4,
                 from /home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:266:
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:15:2: warning: #warning "Using deprecated NumPy API, disable it by " "#defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [-Wcpp]
 #warning "Using deprecated NumPy API, disable it by " \
  ^
In file included from /home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:27:0,
                 from /home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/include/numpy/arrayobject.h:4,
                 from /home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:266:
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/include/numpy/__multiarray_api.h:1448:1: warning: ‘_import_array’ defined but not used [-Wunused-function]
 _import_array(void)
 ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c: In function ‘__pyx_pf_7pyquant_6cpeaks_12gauss_jac_old.isra.52’:
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:6003:37: warning: ‘__pyx_v_mu’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       __pyx_t_1 = PyFloat_FromDouble(__pyx_v_mu); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 125, __pyx_L1_error)
                                     ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:5702:9: note: ‘__pyx_v_mu’ was declared here
   float __pyx_v_mu;
         ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:6056:37: warning: ‘__pyx_v_amp’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       __pyx_t_4 = PyFloat_FromDouble(__pyx_v_amp); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 126, __pyx_L1_error)
                                     ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:5701:9: note: ‘__pyx_v_amp’ was declared here
   float __pyx_v_amp;
         ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c: In function ‘__pyx_pf_7pyquant_6cpeaks_14gauss_jac.isra.50’:
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:6789:39: warning: ‘__pyx_v_mu’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       __pyx_t_1 = PyFloat_FromDouble((-__pyx_v_mu)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 144, __pyx_L1_error)
                                       ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:6440:9: note: ‘__pyx_v_mu’ was declared here
   float __pyx_v_mu;
         ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:6897:38: warning: ‘__pyx_v_amp’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       __pyx_t_15 = PyFloat_FromDouble(__pyx_v_amp); if (unlikely(!__pyx_t_15)) __PYX_ERR(0, 146, __pyx_L1_error)
                                      ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:6439:9: note: ‘__pyx_v_amp’ was declared here
   float __pyx_v_amp;
         ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c: In function ‘__pyx_pf_7pyquant_6cpeaks_10bigauss_jac.isra.54’:
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:5062:37: warning: ‘__pyx_v_amp’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       __pyx_t_4 = PyFloat_FromDouble(__pyx_v_amp); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 104, __pyx_L1_error)
                                     ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:4510:9: note: ‘__pyx_v_amp’ was declared here
   float __pyx_v_amp;
         ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:4931:49: warning: ‘__pyx_v_sigma1’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       __pyx_t_3 = PyFloat_FromDouble((2.0 * powf(__pyx_v_sigma1, 2.0))); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 101, __pyx_L1_error)
                                                 ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:4512:9: note: ‘__pyx_v_sigma1’ was declared here
   float __pyx_v_sigma1;
         ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:4920:39: warning: ‘__pyx_v_mu’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       __pyx_t_3 = PyFloat_FromDouble((-__pyx_v_mu)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 101, __pyx_L1_error)
                                       ^
/home/chris/.pyxbld/temp.linux-x86_64-2.7/pyrex/pyquant/cpeaks.c:4511:9: note: ‘__pyx_v_mu’ was declared here
   float __pyx_v_mu;
         ^
Loading Scans:
..
Scans loaded.
Beginning quantification.
Processing /home/chris/gdrive/Dropbox/Manuscripts/SILAC Fix/EColi/Chris_Ecoli_1-2-4.mzML.
........................................................................................................../home/chris/Devel/pyquant/pyquant/worker.py:49: FutureWarning: sort is deprecated, use sort_values(inplace=True) for INPLACE sorting
  self.msn_rt_map.sort()
Chris_Ecoli_1-2-4 processed and placed into queue.
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/lib/nanfunctions.py:675: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
0.78% Completed/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
1.55% Completed/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
3.10% Completed/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
99.30% CompletedUnable to calculate statistics for Heavy/Light.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Heavy/Medium.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Light/Heavy.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Light/Medium.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Medium/Heavy.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11 
Unable to calculate statistics for Medium/Light.
 Traceback: Traceback (most recent call last):
  File "/home/chris/Devel/pyquant/pyquant/command_line.py", line 1148, in run_pyquant
    conf_ass = classifier.predict_proba(fit_predictors)[:,1]*10
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 537, in predict_proba
    X = self._validate_X_predict(X)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/ensemble/forest.py", line 319, in _validate_X_predict
    return self.estimators_[0]._validate_X_predict(X, check_input=True)
  File "/home/chris/.virtualenvs/pyquant/local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 376, in _validate_X_predict
    % (self.n_features_, n_features))
ValueError: Number of features of the model must  match the input. Model n_features is 19 and  input n_features is 11



In [ ]:



In [74]:

    
%matplotlib inline
from tpot import TPOT
from sklearn.cross_validation import train_test_split
import numpy as np
from scipy.special import logit
import pandas as pd
pd.options.display.max_columns = None
from patsy import dmatrix

dat = pd.read_table(out)
dat = dat[dat['Peptide'].str.count('R')+dat['Peptide'].str.count('K')+dat['Peptide'].str.count('k')+dat['Peptide'].str.count('r') == 1]
dat['Class'] = None
dat.loc[dat['Peptide'].str.count('R')+dat['Peptide'].str.count('r') == 1, 'Class'] = 'R'
dat.loc[dat['Peptide'].str.count('K')+dat['Peptide'].str.count('k') == 1, 'Class'] = 'K'
dat.set_index(['Peptide', 'MS2 Spectrum ID'], inplace=True)
dat.drop(['Modifications', 'Raw File', 'Accession', 'MS1 Spectrum ID', 'Charge', 'Medium Calibrated Precursor', 'Medium Precursor', 'Heavy/Medium', 'Heavy Calibrated Precursor', 'Heavy Precursor', 'Light Calibrated Precursor', 'Light Precursor', 'Retention Time', 'Heavy/Light Confidence', 'Medium/Heavy', 'Medium/Heavy Confidence', 'Medium/Light Confidence', 'Light/Medium Confidence', 'Heavy/Medium Confidence', 'Light/Heavy Confidence'], inplace=True, axis=1)
# Arg H/L -> -1.86
# Arg M/L = -1
# Lys H/L -> 1.89
# Lys M/L = 0.72
nds = []
for numerator, denominator in zip(['Heavy', 'Medium'], ['Light', 'Light']):
    ratio = '{}/{}'.format(numerator, denominator)
    cols=['Isotopes Found', 'Intensity', 'RT Width', 'Mean Offset', 'Residual', 'R^2', 'SNR']
    nd = pd.DataFrame([], columns=[
         'Label1 Isotopes Found',
         'Label1 Intensity',
         'Label1 RT Width',
         'Label1 Mean Offset',
         'Label1 Residual',
         'Label1 R^2',
         'Label1 SNR',
         'Label2 Isotopes Found',
         'Label2 Intensity',
         'Label2 RT Width',
         'Label2 Mean Offset',
         'Label2 Residual',
         'Label2 R^2',
         'Label2 SNR',
         'Deviation',
         'Class',
    ])
    median, std = np.log2(dat[dat['Class']=='R'][ratio]).median(), np.log2(dat[dat['Class']=='R'][ratio]).std()
    expected = median
    nd['Deviation'] = np.log2(dat[dat['Class']=='R'][ratio])-expected
    nd['Class'] = np.abs(np.log2(dat[dat['Class']=='R'][ratio])-median).apply(lambda x: 1 if x < std else 0)
    for label, new_label in zip([numerator, denominator], ['Label1', 'Label2']):
        for col in cols:
            nd['{} {}'.format(new_label, col)] = dat['{} {}'.format(label, col)]
    nd['Label1 Intensity'] = np.log2(nd['Label1 Intensity'])
    nd['Label2 Intensity'] = np.log2(nd['Label2 Intensity'])
    nd['Label1 R^2'] = logit(nd['Label1 R^2'])
    nd['Label2 R^2'] = logit(nd['Label2 R^2'])
    nds.append(nd)
for numerator, denominator in zip(['Heavy', 'Medium'], ['Light', 'Light']):
    ratio = '{}/{}'.format(numerator, denominator)
    cols=['Isotopes Found', 'Intensity', 'RT Width', 'Mean Offset', 'Residual', 'R^2', 'SNR']
    nd = pd.DataFrame([], columns=[
         'Label1 Isotopes Found',
         'Label1 Intensity',
         'Label1 RT Width',
         'Label1 Mean Offset',
         'Label1 Residual',
         'Label1 R^2',
         'Label1 SNR',
         'Label2 Isotopes Found',
         'Label2 Intensity',
         'Label2 RT Width',
         'Label2 Mean Offset',
         'Label2 Residual',
         'Label2 R^2',
         'Label2 SNR',
         'Deviation',
         'Class'
    ])
    median, std = np.log2(dat[dat['Class']=='K'][ratio]).median(), np.log2(dat[dat['Class']=='K'][ratio]).std()
    expected = median
    nd['Deviation'] = np.log2(dat[dat['Class']=='K'][ratio])-expected
    nd['Class'] = np.abs(np.log2(dat[dat['Class']=='K'][ratio])-median).apply(lambda x: 1 if x < std else 0)
    for label, new_label in zip([numerator, denominator], ['Label1', 'Label2']):
        for col in cols:
            nd['{} {}'.format(new_label, col)] = dat['{} {}'.format(label, col)]
    nd['Label1 Intensity'] = np.log2(nd['Label1 Intensity'])
    nd['Label2 Intensity'] = np.log2(nd['Label2 Intensity'])
    nd['Label1 R^2'] = logit(nd['Label1 R^2'])
    nd['Label2 R^2'] = logit(nd['Label2 R^2'])
    nds.append(nd)
pd.concat(nds)









    Out[74]:






  
    
      
      
      Label1 Isotopes Found
      Label1 Intensity
      Label1 RT Width
      Label1 Mean Offset
      Label1 Residual
      Label1 R^2
      Label1 SNR
      Label2 Isotopes Found
      Label2 Intensity
      Label2 RT Width
      Label2 Mean Offset
      Label2 Residual
      Label2 R^2
      Label2 SNR
      Deviation
      Class
    
    
      Peptide
      MS2 Spectrum ID
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      GcImGSAHQr
      779
      24.0
      14.985734
      0.053936
      9.931775e-01
      0.085489
      3.528190
      21.114721
      32.0
      16.703665
      0.056111
      1.434795e+00
      0.416770
      2.361091
      9.658139
      0.094592
      1
    
    
      GcImGSAHQR
      783
      17.0
      12.924555
      0.042203
      2.686645e+00
      0.492796
      3.038507
      8.359946
      26.0
      14.711507
      0.055908
      4.517510e-01
      1.063376
      2.148553
      6.236617
      0.000523
      1
    
    
      777
      28.0
      14.996194
      0.043803
      2.569664e+00
      0.084761
      3.823541
      20.474859
      34.0
      16.708224
      0.056213
      1.406006e+00
      0.412675
      2.329501
      9.082110
      0.094830
      1
    
    
      TQDATHGNSLSHR
      811
      39.0
      13.991486
      0.048815
      2.128489e+00
      0.972111
      1.881489
      6.062496
      50.0
      15.962065
      0.061264
      4.103946e-01
      0.238537
      3.268813
      2.282480
      -0.101995
      1
    
    
      IEQAPGQHGAR
      887
      7.0
      16.167331
      0.050244
      7.230988e-02
      0.012362
      13.026475
      1.012433
      11.0
      17.872230
      0.045521
      6.600722e-02
      0.034781
      11.525348
      1.143215
      0.290681
      1
    
    
      AGVTGAENr
      904
      8.0
      17.969158
      0.067676
      2.493468e-01
      0.025712
      5.443686
      1.053801
      10.0
      19.819117
      0.055905
      1.362417e-01
      0.015580
      8.858486
      1.034260
      -0.065885
      1
    
    
      AGVTGAENR
      903
      8.0
      17.969158
      0.067676
      2.493468e-01
      0.025712
      5.443686
      1.053801
      10.0
      19.819117
      0.055905
      1.362417e-01
      0.015580
      8.858486
      1.034260
      -0.065885
      1
    
    
      GTAmNPVDHPHGGGEGR
      917
      8.0
      16.550535
      0.050721
      1.833391e-01
      0.019163
      7.692895
      1.021964
      12.0
      18.294151
      0.051766
      9.084471e-02
      0.014866
      9.459570
      1.234008
      0.085492
      1
    
    
      ALVSHPR
      933
      4.0
      15.729831
      0.090572
      1.991482e-01
      0.059474
      3.887081
      NaN
      8.0
      17.857216
      0.070174
      9.030660e-01
      0.067431
      4.512238
      1.013494
      0.111110
      1
    
    
      mTGDNPDAPR
      944
      2.0
      13.843877
      0.054663
      1.666761e-01
      0.034627
      8.161228
      NaN
      7.0
      15.876904
      0.063909
      6.023946e-01
      0.034698
      4.959971
      1.001678
      0.007644
      1
    
    
      VHPNGIR
      898
      6.0
      15.131996
      0.056340
      4.975481e-01
      0.247207
      3.758966
      NaN
      11.0
      17.245997
      0.058525
      1.444357e-01
      0.052027
      5.027183
      1.406236
      -0.135307
      1
    
    
      SVANAEQmDR
      959
      9.0
      16.980617
      0.069780
      3.405412e-01
      0.119328
      4.002708
      1.026194
      12.0
      18.786625
      0.075726
      4.197732e-01
      0.104451
      3.851186
      0.931681
      0.071929
      1
    
    
      SVANAEQmDr
      962
      9.0
      16.980617
      0.069780
      3.405412e-01
      0.119328
      4.002708
      1.026194
      12.0
      18.786625
      0.075726
      4.197732e-01
      0.104451
      3.851186
      0.931681
      0.071929
      1
    
    
      AAASHLVR
      961
      14.0
      17.324065
      0.058804
      1.981803e+00
      0.189402
      8.559042
      15.804382
      27.0
      19.420032
      0.075272
      1.019746e+00
      1.396346
      2.584104
      19.286076
      0.091354
      1
    
    
      AAASHLVr
      964
      15.0
      17.324065
      0.058804
      1.981799e+00
      0.203778
      8.558997
      10.880751
      28.0
      19.424549
      0.075731
      9.812200e-01
      1.535988
      2.584007
      15.079136
      0.083129
      1
    
    
      HLTDGmTVr
      975
      28.0
      23.746521
      0.093142
      4.456008e-01
      0.021245
      5.482953
      15.104723
      35.0
      25.416597
      0.084778
      1.511559e+00
      0.072245
      5.677385
      16.507001
      0.080686
      1
    
    
      HLTDGmTVR
      974
      28.0
      23.746521
      0.093142
      4.456008e-01
      0.021245
      5.482953
      15.104723
      35.0
      25.416597
      0.084778
      1.511559e+00
      0.072245
      5.677385
      16.507001
      0.080686
      1
    
    
      AVQNAMR
      995
      9.0
      17.682340
      0.061553
      1.262045e+00
      0.062885
      4.914437
      1.126810
      18.0
      19.446729
      0.067331
      9.486762e-01
      0.099772
      5.123132
      3.522732
      0.142120
      1
    
    
      AGVHFGHQTR
      1002
      12.0
      21.008434
      0.083990
      5.339253e-01
      0.055843
      4.154514
      1.224810
      20.0
      22.994444
      0.075378
      2.275157e-01
      0.024981
      5.482815
      1.207646
      -0.189688
      1
    
    
      mVEEDPAHPr
      1016
      13.0
      18.121198
      0.092191
      7.536466e-01
      0.071224
      4.389010
      6.852894
      20.0
      20.084497
      0.093785
      3.310712e-01
      0.046965
      4.964579
      10.238337
      -0.049676
      1
    
    
      mVEEDPAHPR
      1020
      12.0
      18.117252
      0.092013
      7.461029e-01
      0.072749
      4.389664
      1.142899
      20.0
      20.084497
      0.093785
      3.310712e-01
      0.046965
      4.964579
      10.238337
      -0.227445
      1
    
    
      AGVHFGHQTR
      992
      23.0
      23.282827
      0.076255
      1.544298e-01
      0.013500
      7.192932
      15.622094
      30.0
      25.061076
      0.080505
      3.480352e-01
      0.063421
      4.765415
      14.052629
      0.021684
      1
    
    
      AGVHFGHQTr
      994
      23.0
      23.282827
      0.076255
      1.544298e-01
      0.013500
      7.192932
      15.622094
      31.0
      25.061076
      0.080505
      3.483843e-01
      0.064882
      4.765081
      14.052629
      0.021684
      1
    
    
      mVEEDPAHPr
      1025
      6.0
      15.698337
      0.069287
      4.783882e-01
      0.046070
      5.193861
      2.638442
      11.0
      18.316519
      0.079541
      1.710395e-01
      0.137727
      5.748694
      1.114434
      -0.720857
      1
    
    
      AGVHFGHQTr
      1004
      12.0
      21.008434
      0.083990
      5.339253e-01
      0.055843
      4.154514
      1.224810
      20.0
      22.994444
      0.075378
      2.275157e-01
      0.024981
      5.482815
      1.207646
      -0.189688
      1
    
    
      GTAMNPVDHPHGGGEGR
      1087
      6.0
      16.798970
      0.052111
      2.076855e-01
      0.042727
      4.882561
      NaN
      12.0
      18.816326
      0.051097
      5.237172e-01
      0.099033
      5.016891
      1.092491
      -0.049417
      1
    
    
      TDLHGTAVR
      1078
      12.0
      18.357713
      0.060047
      6.581272e-01
      0.053875
      4.749313
      1.380350
      8.0
      19.553958
      0.051518
      1.114874e-01
      0.032910
      7.299707
      1.038474
      1.064196
      0
    
    
      AHHYPSELSGGQQQR
      1137
      19.0
      20.740120
      0.069004
      7.869672e-01
      0.034991
      5.765859
      1.344427
      28.0
      22.611332
      0.080211
      3.786946e-01
      0.056959
      4.790247
      1.299816
      0.020353
      1
    
    
      1142
      13.0
      18.501803
      0.071385
      6.950947e-01
      0.036237
      5.550402
      1.138348
      22.0
      20.373860
      0.069400
      3.310561e-01
      0.041349
      5.465852
      1.339840
      0.052759
      1
    
    
      1145
      5.0
      18.857965
      0.087171
      7.221403e-01
      0.016279
      4.229913
      1.098967
      23.0
      21.817347
      0.073249
      3.987897e-01
      0.079853
      4.701050
      1.190595
      0.222085
      1
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      LAQmQIPADDYFIWITGEGk
      7850
      9.0
      18.757221
      0.077344
      1.544988e+00
      0.442809
      2.354658
      1.138290
      5.0
      17.276999
      0.116156
      8.156097e-01
      1.656970
      NaN
      1.651758
      -0.327431
      1
    
    
      EGAFVPFVTLGDPGIEQSLK
      7849
      43.0
      23.650085
      0.193217
      1.206463e+00
      0.729791
      1.709437
      6.358218
      41.0
      23.037434
      0.207459
      2.615308e-01
      0.228140
      3.322470
      6.108227
      -0.002491
      1
    
    
      ELcSAAITmSDNTAANLLLTTIGGPk
      7866
      23.0
      20.358045
      0.143019
      9.635800e-01
      1.476684
      1.071712
      10.838309
      17.0
      18.233260
      0.076696
      4.096748e+00
      1.956180
      1.403417
      221.445566
      1.077706
      0
    
    
      TQGAAAFEGAVIAYEPVWAIGTGk
      7884
      49.0
      23.298585
      0.219064
      1.977094e-01
      0.204182
      3.837238
      4.173747
      39.0
      22.537990
      0.180326
      1.371329e+00
      0.624656
      2.240398
      6.524064
      0.063856
      1
    
    
      ELcSAAITmSDNTAANLLLTTIGGPk
      7874
      49.0
      24.919943
      0.206087
      2.303474e-01
      0.853150
      1.620717
      4.859298
      47.0
      24.124365
      0.212079
      1.652292e-01
      0.197110
      3.682382
      4.434152
      0.058754
      1
    
    
      7886
      21.0
      19.999417
      0.114682
      3.075215e-01
      1.977056
      1.436670
      7.416190
      34.0
      19.573735
      0.056227
      1.608774e+00
      6.064123
      2.089884
      6.699860
      0.739104
      1
    
    
      VLALAENYQPLYAALGLHPGMLEk
      7915
      18.0
      19.815205
      0.095440
      5.583934e-01
      0.903778
      1.903067
      120.276338
      12.0
      19.124448
      0.071011
      1.931950e+00
      0.545626
      2.040685
      15.321044
      0.767542
      1
    
    
      FGASSLLASLLk
      8084
      34.0
      23.119274
      0.148651
      2.196329e+00
      0.141995
      3.944355
      5.234995
      35.0
      22.498940
      0.188970
      4.770330e-01
      0.213794
      3.080469
      4.816335
      -0.080325
      1
    
    
      TqGAAAFEGAVIAYEPVWAIGTGk
      8094
      23.0
      16.250988
      0.041308
      4.430558e-01
      0.092574
      11.158354
      -0.743560
      27.0
      18.138290
      0.040326
      2.672755e+01
      1.164123
      12.381305
      0.775548
      -1.095661
      0
    
    
      DGVGLLPTVLDVVENPk
      8559
      38.0
      21.899157
      0.169400
      1.452420e-01
      0.516881
      3.197701
      7.688767
      27.0
      21.228191
      0.184760
      3.650728e-01
      0.534657
      2.116104
      6.450245
      -0.088636
      1
    
    
      ELcSAAITMSDNTAANLLLTTIGGPk
      8695
      29.0
      22.895231
      0.273095
      3.034958e-01
      0.280452
      2.329319
      4.004378
      52.0
      23.293799
      0.232344
      4.787828e-01
      1.796918
      0.960049
      4.702896
      -0.195882
      1
    
    
      8686
      32.0
      23.059216
      0.201174
      5.944131e+00
      2.126078
      0.805061
      4.347489
      39.0
      22.595297
      0.129570
      2.685297e+00
      3.346062
      1.681354
      2.784739
      0.025548
      1
    
    
      VGYIELDLNSGk
      8965
      36.0
      18.805564
      0.332078
      2.099859e+00
      7.771240
      NaN
      6.158339
      29.0
      17.510583
      0.145803
      2.394400e+00
      4.397509
      -0.054590
      2.641730
      0.394377
      1
    
    
      SLDDAQIALAVINTTYASQIGLTPAk
      9089
      26.0
      20.788147
      0.102492
      2.694714e-01
      0.586520
      3.116046
      15.176564
      31.0
      20.371373
      0.114255
      6.046525e-01
      1.046882
      2.549166
      6.614207
      -0.124315
      1
    
    
      ELcSAAITMSDNTAANLLLTTIGGPk
      8846
      216.0
      23.835211
      0.850754
      3.325143e-01
      1.893201
      NaN
      -0.022767
      216.0
      23.428056
      0.756642
      2.127841e+00
      2.621094
      -3.001260
      0.851808
      -0.127975
      1
    
    
      8883
      264.0
      24.355262
      0.235376
      1.270539e-01
      2.377913
      1.738366
      4.411485
      268.0
      24.149613
      0.279444
      2.810016e-01
      1.998608
      1.920195
      3.695578
      -0.300475
      1
    
    
      9458
      71.0
      19.814885
      0.067602
      1.056360e+00
      4.460319
      1.553299
      8.495942
      55.0
      19.899775
      0.081068
      7.621572e-01
      4.665191
      2.026509
      16.984387
      -1.687464
      0
    
    
      LANEGIFTQQELYDELLTLADEAk
      9522
      4.0
      18.594230
      0.047298
      9.488388e-02
      0.128509
      3.940116
      NaN
      5.0
      18.353222
      0.046340
      7.130651e-01
      0.196605
      3.942563
      NaN
      0.062293
      1
    
    
      AIHTLWNVLDELDQAWLPVEk
      9560
      11.0
      22.513893
      0.058378
      2.969589e-01
      0.072579
      4.455892
      1.022964
      11.0
      21.834118
      0.062201
      4.901800e-01
      0.119166
      3.774949
      1.024549
      -0.038950
      1
    
    
      9586
      6.0
      21.728629
      0.058892
      3.079504e-01
      0.046923
      4.161881
      0.963363
      8.0
      21.320748
      0.054593
      9.095046e-01
      0.092096
      4.274263
      1.208662
      -0.070930
      1
    
    
      ELcSAAITMSDNTAANLLLTTIGGPK
      9148
      104.0
      18.329059
      0.041806
      6.509279e+00
      2.474097
      3.799134
      2.287613
      182.0
      21.163595
      0.263017
      9.294961e-01
      3.052862
      0.857173
      1.730295
      -3.928888
      0
    
    
      TAPDGEHGVNLVHLEDVIGAITLLLQAPk
      9656
      4.0
      17.701533
      0.040175
      6.339672e-04
      0.016543
      inf
      NaN
      6.0
      17.895217
      0.040175
      6.339672e-04
      0.024815
      inf
      NaN
      -0.150357
      1
    
    
      NADGLGMLVAqAAHAFLLWHGVLPDVEPVIk
      9688
      3.0
      15.521233
      0.052455
      1.120198e-01
      0.068335
      9.743359
      NaN
      1.0
      13.981914
      0.052455
      1.120198e-01
      0.022778
      9.743359
      NaN
      -0.548328
      1
    
    
      FLQFMVSPAFQNAIPTGnWMYPVANVTLPAGFEK
      9696
      0.0
      -inf
      NaN
      NaN
      NaN
      NaN
      NaN
      4.0
      16.293770
      0.067028
      1.165689e-01
      0.042940
      4.309249
      NaN
      NaN
      0
    
    
      ELcSAAITMSDNTAANLLLTTIGGPK
      9209
      216.0
      20.944411
      0.236307
      9.643571e+00
      3.447382
      0.541612
      2.741174
      142.0
      20.709059
      0.299294
      1.026466e+01
      5.649428
      -2.158764
      8.410005
      -0.675744
      1
    
    
      VLAPINDFINTLNAFFSAGGk
      9765
      6.0
      19.314618
      0.053537
      1.655816e-01
      0.013354
      6.934276
      NaN
      6.0
      18.662179
      0.053537
      2.025805e-01
      0.013438
      6.934400
      NaN
      -0.090769
      1
    
    
      9767
      5.0
      18.794496
      0.058808
      2.308393e-01
      0.013254
      6.645728
      1.003997
      7.0
      18.154701
      0.054971
      1.785362e-01
      0.006698
      7.625821
      NaN
      0.235751
      1
    
    
      VGYIELDLNSGk
      9546
      93.0
      19.127481
      0.184934
      1.256568e+01
      12.800832
      -1.205556
      3.856789
      83.0
      17.154424
      0.098624
      3.966709e+00
      9.656604
      -3.448946
      8.693567
      1.260810
      0
    
    
      FVQAYQSDEVYEAANk
      10015
      1.0
      12.242579
      0.030985
      1.733472e-07
      0.036631
      3.687593
      NaN
      1.0
      12.242579
      0.030985
      1.733472e-07
      0.036631
      3.687593
      NaN
      -0.721485
      1
    
    
      ELcSAAITMSDNTAAnLLLTTIGGPk
      9818
      65.0
      17.013110
      0.113016
      5.642632e+00
      0.907013
      0.174813
      0.286011
      39.0
      16.018433
      0.050737
      1.074356e+01
      0.859082
      10.182012
      0.436856
      1.423368
      0
    
  

2138 rows × 16 columns



In [223]:









    Out[223]:





Peptide                                      MS2 Spectrum ID
GcImGSAHQr                                   779                1
GcImGSAHQR                                   783                1
                                             777                1
TQDATHGNSLSHR                                811                1
IEQAPGQHGAR                                  887                1
AGVTGAENr                                    904                1
AGVTGAENR                                    903                1
GTAmNPVDHPHGGGEGR                            917                1
ALVSHPR                                      933                1
mTGDNPDAPR                                   944                1
VHPNGIR                                      898                1
SVANAEQmDR                                   959                1
SVANAEQmDr                                   962                1
AAASHLVR                                     961                1
AAASHLVr                                     964                1
HLTDGmTVr                                    975                1
HLTDGmTVR                                    974                1
AVQNAMR                                      995                1
AGVHFGHQTR                                   1002               1
mVEEDPAHPr                                   1016               1
mVEEDPAHPR                                   1020               1
AGVHFGHQTR                                   992                1
AGVHFGHQTr                                   994                1
mVEEDPAHPr                                   1025               1
AGVHFGHQTr                                   1004               1
GTAMNPVDHPHGGGEGR                            1087               1
TDLHGTAVR                                    1078               0
AHHYPSELSGGQQQR                              1137               1
                                             1142               1
                                             1145               1
                                                               ..
TIPSVLTALFcAR                                8606               0
AMLTLIVFSFTVSVYSSATVTPGSLnLAPIAIADMDQSqLSnr  8994               0
GVLLPLLSLDcAVTITNR                           8966               1
ILELAGFLDSYIPEPER                            8999               0
FVESVDVAVNLGIDAr                             9057               0
IEGGEWLVETVQmLTER                            9375               1
                                             9363               1
IEGGEWLVETVQmLTEr                            9381               1
                                             9377               1
GDMLSMEDVLEILR                               9469               1
IEGGEWLVETVQmLTEr                            9388               1
GDMLSMEDVLEILr                               9471               1
HLEFFNTQPFVAAPILGVTLALEEQR                   9514               1
SVPGYSNIISMIGmLAER                           9526               1
SVPGYSNIISMIGMLAER                           9593               1
IEGGEWLVETVQMLTER                            9611               1
IEGGEWLVETVQMLTEr                            9612               1
IEGGEWLVETVQMLTER                            9613               1
                                             9625               0
FVESVDVAVnLGIDAR                             9677               0
ATFVVDPQGIIQAIEVTAEGIGR                      9724               1
                                             9729               1
AVTLYLGAVAATVR                               9652               0
                                             8958               0
IVVIYTTGSQATMDER                             9933               1
FVESVDVAVNLGIDAR                             9213               1
                                             9426               0
FVESVDVAVNLGIDAr                             9184               0
FVESVDVAVNLGIDAR                             9950               0
                                             9980               0
Name: Heavy/Light, dtype: int64



In [75]:

    
df = pd.concat(nds)
df = df.replace([np.inf,-np.inf], np.nan).dropna()

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV

X = preprocessing.scale(df.drop('Deviation', axis=1).drop('Class', axis=1).values)
y = df.loc[:, ['Deviation', 'Class']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
y_test_reg = y_test[:, 0]
y_test_class = y_test[:, 1]
y_train_reg = y_train[:, 0]
y_train_class = y_train[:, 1]



In [82]:

    
from sklearn.svm import SVC as Classifier

clf = Classifier()
clf = clf.fit(X_train, y_train_class)
from sklearn.metrics import accuracy_score
print accuracy_score(y_test_class, clf.predict(X_test))









    



0.884615384615



In [341]:

    
from sklearn.qda import QDA as Classifier

clf = Classifier()
clf = clf.fit(X_train, y_train_class)
from sklearn.metrics import accuracy_score
print accuracy_score(y_test_class, clf.predict(X_test))









    



0.926799007444



In [349]:

    
from sklearn.gaussian_process import GaussianProcessClassifier as Classifier

clf = Classifier()
clf = clf.fit(X_train, y_train_class)
from sklearn.metrics import accuracy_score
print accuracy_score(y_test_class, clf.predict(X_test))









    



0.918114143921



In [83]:

    
from sklearn.neural_network import MLPClassifier as Classifier

clf = Classifier()
clf = clf.fit(X_train, y_train_class)
from sklearn.metrics import accuracy_score
print accuracy_score(y_test_class, clf.predict(X_test))









    



0.890818858561



In [84]:

    
import pickle
pickle.dump(clf, open('/home/chris/Devel/pyquant/pyquant/static/new_classifier2.pickle', 'wb'))



In [375]:

    
from sklearn.ensemble import AdaBoostRegressor as Regressor

clf = Regressor()
clf = clf.fit(X_train, y_train_reg)
from sklearn import metrics
print metrics.median_absolute_error(y_test_reg, clf.predict(X_test))
from matplotlib import pyplot as plt
plt.scatter(y_test_reg, clf.predict(X_test))
plt.plot([-6, 6], [-6, 6], 'r-')









    



0.217232977277






    Out[375]:





[<matplotlib.lines.Line2D at 0x7f2df55edf50>]



In [24]:

    
from sklearn.neural_network import MLPRegressor
reg = MLPRegressor()
clf = GridSearchCV(reg, {})
clf.fit(X_train, y_train_reg)
print metrics.median_absolute_error(y_test_reg, clf.predict(X_test))
plt.scatter(y_test_reg, clf.predict(X_test))
plt.plot([-6, 6], [-6, 6], 'r-')









    



0.206629642862






    Out[24]:





[<matplotlib.lines.Line2D at 0x7f6fe8eda150>]



In [377]:

    
from sklearn.ensemble import GradientBoostingRegressor
reg = GradientBoostingRegressor()
parameters = {
    'loss': ['ls', 'lad'], 
    'learning_rate': [0.01, 0.1, 0.5],
    'n_estimators': [50, 100, 200],
}
clf = GridSearchCV(reg, parameters)
clf.fit(X_train, y_train_reg)
from sklearn.metrics import r2_score
r2_score(y_test_reg, clf.predict(X_test))
plt.scatter(y_test_reg, clf.predict(X_test))
plt.plot([-6, 6], [-6, 6], 'r-')









    Out[377]:





[<matplotlib.lines.Line2D at 0x7f2df570ad10>]



In [379]:

    
from sklearn.tree import DecisionTreeRegressor as Regressor
clf = Regressor()
clf.fit(X_train, y_train_reg)
print r2_score(y_test_reg, clf.predict(X_test))
plt.scatter(y_test_reg, clf.predict(X_test))
plt.plot([-6, 6], [-6, 6], 'r-')









    



-0.302797515251






    Out[379]:





[<matplotlib.lines.Line2D at 0x7f2df53bee10>]



In [50]:

    
np.log2(dat[dat['Class']=='R']['Heavy/Light']).plot(kind='hist')









    Out[50]:





<matplotlib.axes._subplots.AxesSubplot at 0x7feb98e65b90>



In [41]:

    
dat.columns.tolist()









    Out[41]:





['Heavy Isotopes Found',
 'Heavy Intensity',
 'Heavy RT Width',
 'Heavy Mean Offset',
 'Heavy Residual',
 'Heavy R^2',
 'Heavy SNR',
 'Heavy/Light',
 'Light Isotopes Found',
 'Light Intensity',
 'Light RT Width',
 'Light Mean Offset',
 'Light Residual',
 'Light R^2',
 'Light SNR',
 'Medium Isotopes Found',
 'Medium Intensity',
 'Medium RT Width',
 'Medium Mean Offset',
 'Medium Residual',
 'Medium R^2',
 'Medium SNR',
 'Medium/Light',
 'Class']



In [5]:

    
from tpot import TPOT
from sklearn.cross_validation import train_test_split
import numpy as np
import pandas as pd
from patsy import dmatrix

dat = pd.read_table(out)
dat.set_index(['Peptide', 'MS2 Spectrum ID'], inplace=True)
dat.drop(['Modifications', 'Raw File', 'Accession', 'MS1 Spectrum ID', 'Charge', 'Retention Time', 'Heavy/Light', 'Heavy/Light Confidence', 'Medium/Light', 'Medium/Heavy', 'Medium/Heavy Confidence', 'Medium/Light', 'Medium/Light Confidence', 'Light/Medium', 'Light/Medium Confidence', 'Heavy/Medium', 'Heavy/Medium Confidence', 'Light/Heavy Confidence', 'Light/Heavy'], inplace=True, axis=1)
for i in ['Heavy', 'Medium', 'Light']:
    for j in ['Precursor', 'Calibrated Precursor']:
        dat.drop(i + ' ' +j, inplace=True, axis=1)
    to_drop = []

for j in dat.columns:
    if j.startswith('Heavy'):
        to_drop.append(j)
dat.drop(to_drop, inplace=True, axis=1)

dat['Class'] = None
for i in bad_data:
    dat.loc[i, 'Class'] = 0
for i in good_data:
    dat.loc[i, 'Class'] = 1

dat.dropna(inplace=True)
labels = dat['Class']

# # preprocess
dat['Medium Intensity'] = np.log2(dat['Medium Intensity'])
dat['Light Intensity'] = np.log2(dat['Light Intensity'])

# extra info
for i in ['RT Width', 'Isotopes Found']:
    dat['Medium/Light {}'.format(i)] = dat['Medium {}'.format(i)]/dat['Light {}'.format(i)]

# dat = dat.loc[:, ['Medium R^2', 'Light R^2', 'Class']]
dat.reset_index(drop=True, inplace=True)
training_indices, testing_indices = train_test_split(dat.index, stratify = labels.values, train_size=0.5, test_size=0.5)

tpot = TPOT(verbosity=2, generations=10)
tpot.fit(dat.drop('Class',axis=1).loc[training_indices].values, dat.loc[training_indices,'Class'].values.astype(int))
tpot.score(dat.drop('Class',axis=1).loc[testing_indices].values, dat.loc[testing_indices, 'Class'].values.astype(int))









    



GP Progress:   9%|▉         | 100/1100 [00:00<04:56,  3.37pipeline/s]





    



Generation 1 - Current best internal CV score: 1.00000






    



GP Progress:  18%|█▊        | 196/1100 [00:00<06:04,  2.48pipeline/s]





    



Generation 2 - Current best internal CV score: 1.00000






    



GP Progress:  26%|██▋       | 290/1100 [00:00<07:17,  1.85pipeline/s]





    



Generation 3 - Current best internal CV score: 1.00000






    



GP Progress:  36%|███▌      | 394/1100 [00:00<05:00,  2.35pipeline/s]





    



Generation 4 - Current best internal CV score: 1.00000






    



GP Progress:  44%|████▍     | 488/1100 [00:00<03:11,  3.20pipeline/s]





    



Generation 5 - Current best internal CV score: 1.00000






    



GP Progress:  54%|█████▍    | 594/1100 [00:00<03:19,  2.54pipeline/s]





    



Generation 6 - Current best internal CV score: 1.00000






    



GP Progress:  63%|██████▎   | 692/1100 [00:00<03:19,  2.04pipeline/s]





    



Generation 7 - Current best internal CV score: 1.00000






    



GP Progress:  72%|███████▏  | 789/1100 [00:00<01:52,  2.76pipeline/s]





    



Generation 8 - Current best internal CV score: 1.00000






    



GP Progress:  81%|████████▏ | 894/1100 [00:00<01:26,  2.39pipeline/s]





    



Generation 9 - Current best internal CV score: 1.00000






    



GP Progress:  90%|█████████ | 990/1100 [00:00<00:50,  2.19pipeline/s]





    



Generation 10 - Current best internal CV score: 1.00000






    



                                                                      





    



Best pipeline: _linear_svc(input_df, 0.97999999999999998, 60, False)






    









    Out[5]:





1.0



In [17]:

    
# %matplotlib inline
# from sklearn.svm import SVC

# predictor = SVC()
# predictor.fit(dat.drop('Class',axis=1).loc[training_indices].values, dat.loc[training_indices,'Class'].values.astype(int))
# predictor.score(dat.drop('Class',axis=1).loc[training_indices].values, dat.loc[training_indices,'Class'].values.astype(int))
# # plt.scatter(dat.iloc[:, 0], dat.iloc[:, 1], c=dat.iloc[:, 2])









    Out[17]:





0.90740740740740744



In [4]:

    
tpot.export('pipe.py')



In [85]:

    
dat = pd.read_table('/home/chris/Devel/pyquant/ml_test_cl2_stats')
dat = dat[dat['Peptide'].str.count('R')+dat['Peptide'].str.count('K')+dat['Peptide'].str.count('k')+dat['Peptide'].str.count('r') == 1]
dat['Class'] = None
dat.loc[dat['Peptide'].str.count('R')+dat['Peptide'].str.count('r') == 1, 'Class'] = 'R'
dat.loc[dat['Peptide'].str.count('K')+dat['Peptide'].str.count('k') == 1, 'Class'] = 'K'



In [98]:

    
np.log2(dat.loc[dat['Class']=='R','Heavy/Light']).plot(kind='density', c='r')
np.log2(dat.loc[(dat['Class']=='R') & (dat['Heavy/Light Confidence']>5),'Heavy/Light']).plot(kind='density', c='g')
np.log2(dat.loc[(dat['Class']=='R') & (dat['Heavy/Light Confidence']>8),'Heavy/Light']).plot(kind='density', c='k')









    Out[98]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f6fe2a4a210>



In [97]:

    
isotope = 'K'
ratio = 'Heavy/Light'
df_1 = np.log2(dat.loc[dat['Class']==isotope,ratio])
df_2 = np.log2(dat.loc[(dat['Class']==isotope) & (dat['{} Confidence'.format(ratio)]>5),ratio])
df_3 = np.log2(dat.loc[(dat['Class']==isotope) & (dat['{} Confidence'.format(ratio)]>9),ratio])
df = pd.concat([df_1, df_2, df_3], axis=1)
df.columns=['All', '5', '8']
df.plot(kind='box')









    Out[97]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f6fe2bb5fd0>



In [94]:

    
dat.loc[dat['Class']=='K', '{} Confidence'.format('Heavy/Light')].plot(kind='density')









    Out[94]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f6fe2d99ad0>

		Label1 Isotopes Found	Label1 Intensity	Label1 RT Width	Label1 Mean Offset	Label1 Residual	Label1 R^2	Label1 SNR	Label2 Isotopes Found	Label2 Intensity	Label2 RT Width	Label2 Mean Offset	Label2 Residual	Label2 R^2	Label2 SNR	Deviation	Class
Peptide	MS2 Spectrum ID
GcImGSAHQr	779	24.0	14.985734	0.053936	9.931775e-01	0.085489	3.528190	21.114721	32.0	16.703665	0.056111	1.434795e+00	0.416770	2.361091	9.658139	0.094592	1
GcImGSAHQR	783	17.0	12.924555	0.042203	2.686645e+00	0.492796	3.038507	8.359946	26.0	14.711507	0.055908	4.517510e-01	1.063376	2.148553	6.236617	0.000523	1
GcImGSAHQR	777	28.0	14.996194	0.043803	2.569664e+00	0.084761	3.823541	20.474859	34.0	16.708224	0.056213	1.406006e+00	0.412675	2.329501	9.082110	0.094830	1
TQDATHGNSLSHR	811	39.0	13.991486	0.048815	2.128489e+00	0.972111	1.881489	6.062496	50.0	15.962065	0.061264	4.103946e-01	0.238537	3.268813	2.282480	-0.101995	1
IEQAPGQHGAR	887	7.0	16.167331	0.050244	7.230988e-02	0.012362	13.026475	1.012433	11.0	17.872230	0.045521	6.600722e-02	0.034781	11.525348	1.143215	0.290681	1
AGVTGAENr	904	8.0	17.969158	0.067676	2.493468e-01	0.025712	5.443686	1.053801	10.0	19.819117	0.055905	1.362417e-01	0.015580	8.858486	1.034260	-0.065885	1
AGVTGAENR	903	8.0	17.969158	0.067676	2.493468e-01	0.025712	5.443686	1.053801	10.0	19.819117	0.055905	1.362417e-01	0.015580	8.858486	1.034260	-0.065885	1
GTAmNPVDHPHGGGEGR	917	8.0	16.550535	0.050721	1.833391e-01	0.019163	7.692895	1.021964	12.0	18.294151	0.051766	9.084471e-02	0.014866	9.459570	1.234008	0.085492	1
ALVSHPR	933	4.0	15.729831	0.090572	1.991482e-01	0.059474	3.887081	NaN	8.0	17.857216	0.070174	9.030660e-01	0.067431	4.512238	1.013494	0.111110	1
mTGDNPDAPR	944	2.0	13.843877	0.054663	1.666761e-01	0.034627	8.161228	NaN	7.0	15.876904	0.063909	6.023946e-01	0.034698	4.959971	1.001678	0.007644	1
VHPNGIR	898	6.0	15.131996	0.056340	4.975481e-01	0.247207	3.758966	NaN	11.0	17.245997	0.058525	1.444357e-01	0.052027	5.027183	1.406236	-0.135307	1
SVANAEQmDR	959	9.0	16.980617	0.069780	3.405412e-01	0.119328	4.002708	1.026194	12.0	18.786625	0.075726	4.197732e-01	0.104451	3.851186	0.931681	0.071929	1
SVANAEQmDr	962	9.0	16.980617	0.069780	3.405412e-01	0.119328	4.002708	1.026194	12.0	18.786625	0.075726	4.197732e-01	0.104451	3.851186	0.931681	0.071929	1
AAASHLVR	961	14.0	17.324065	0.058804	1.981803e+00	0.189402	8.559042	15.804382	27.0	19.420032	0.075272	1.019746e+00	1.396346	2.584104	19.286076	0.091354	1
AAASHLVr	964	15.0	17.324065	0.058804	1.981799e+00	0.203778	8.558997	10.880751	28.0	19.424549	0.075731	9.812200e-01	1.535988	2.584007	15.079136	0.083129	1
HLTDGmTVr	975	28.0	23.746521	0.093142	4.456008e-01	0.021245	5.482953	15.104723	35.0	25.416597	0.084778	1.511559e+00	0.072245	5.677385	16.507001	0.080686	1
HLTDGmTVR	974	28.0	23.746521	0.093142	4.456008e-01	0.021245	5.482953	15.104723	35.0	25.416597	0.084778	1.511559e+00	0.072245	5.677385	16.507001	0.080686	1
AVQNAMR	995	9.0	17.682340	0.061553	1.262045e+00	0.062885	4.914437	1.126810	18.0	19.446729	0.067331	9.486762e-01	0.099772	5.123132	3.522732	0.142120	1
AGVHFGHQTR	1002	12.0	21.008434	0.083990	5.339253e-01	0.055843	4.154514	1.224810	20.0	22.994444	0.075378	2.275157e-01	0.024981	5.482815	1.207646	-0.189688	1
mVEEDPAHPr	1016	13.0	18.121198	0.092191	7.536466e-01	0.071224	4.389010	6.852894	20.0	20.084497	0.093785	3.310712e-01	0.046965	4.964579	10.238337	-0.049676	1
mVEEDPAHPR	1020	12.0	18.117252	0.092013	7.461029e-01	0.072749	4.389664	1.142899	20.0	20.084497	0.093785	3.310712e-01	0.046965	4.964579	10.238337	-0.227445	1
AGVHFGHQTR	992	23.0	23.282827	0.076255	1.544298e-01	0.013500	7.192932	15.622094	30.0	25.061076	0.080505	3.480352e-01	0.063421	4.765415	14.052629	0.021684	1
AGVHFGHQTr	994	23.0	23.282827	0.076255	1.544298e-01	0.013500	7.192932	15.622094	31.0	25.061076	0.080505	3.483843e-01	0.064882	4.765081	14.052629	0.021684	1
mVEEDPAHPr	1025	6.0	15.698337	0.069287	4.783882e-01	0.046070	5.193861	2.638442	11.0	18.316519	0.079541	1.710395e-01	0.137727	5.748694	1.114434	-0.720857	1
AGVHFGHQTr	1004	12.0	21.008434	0.083990	5.339253e-01	0.055843	4.154514	1.224810	20.0	22.994444	0.075378	2.275157e-01	0.024981	5.482815	1.207646	-0.189688	1
GTAMNPVDHPHGGGEGR	1087	6.0	16.798970	0.052111	2.076855e-01	0.042727	4.882561	NaN	12.0	18.816326	0.051097	5.237172e-01	0.099033	5.016891	1.092491	-0.049417	1
TDLHGTAVR	1078	12.0	18.357713	0.060047	6.581272e-01	0.053875	4.749313	1.380350	8.0	19.553958	0.051518	1.114874e-01	0.032910	7.299707	1.038474	1.064196	0
AHHYPSELSGGQQQR	1137	19.0	20.740120	0.069004	7.869672e-01	0.034991	5.765859	1.344427	28.0	22.611332	0.080211	3.786946e-01	0.056959	4.790247	1.299816	0.020353	1
	1142	13.0	18.501803	0.071385	6.950947e-01	0.036237	5.550402	1.138348	22.0	20.373860	0.069400	3.310561e-01	0.041349	5.465852	1.339840	0.052759	1
	1145	5.0	18.857965	0.087171	7.221403e-01	0.016279	4.229913	1.098967	23.0	21.817347	0.073249	3.987897e-01	0.079853	4.701050	1.190595	0.222085	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
LAQmQIPADDYFIWITGEGk	7850	9.0	18.757221	0.077344	1.544988e+00	0.442809	2.354658	1.138290	5.0	17.276999	0.116156	8.156097e-01	1.656970	NaN	1.651758	-0.327431	1
EGAFVPFVTLGDPGIEQSLK	7849	43.0	23.650085	0.193217	1.206463e+00	0.729791	1.709437	6.358218	41.0	23.037434	0.207459	2.615308e-01	0.228140	3.322470	6.108227	-0.002491	1
ELcSAAITmSDNTAANLLLTTIGGPk	7866	23.0	20.358045	0.143019	9.635800e-01	1.476684	1.071712	10.838309	17.0	18.233260	0.076696	4.096748e+00	1.956180	1.403417	221.445566	1.077706	0
TQGAAAFEGAVIAYEPVWAIGTGk	7884	49.0	23.298585	0.219064	1.977094e-01	0.204182	3.837238	4.173747	39.0	22.537990	0.180326	1.371329e+00	0.624656	2.240398	6.524064	0.063856	1
ELcSAAITmSDNTAANLLLTTIGGPk	7874	49.0	24.919943	0.206087	2.303474e-01	0.853150	1.620717	4.859298	47.0	24.124365	0.212079	1.652292e-01	0.197110	3.682382	4.434152	0.058754	1
ELcSAAITmSDNTAANLLLTTIGGPk	7886	21.0	19.999417	0.114682	3.075215e-01	1.977056	1.436670	7.416190	34.0	19.573735	0.056227	1.608774e+00	6.064123	2.089884	6.699860	0.739104	1
VLALAENYQPLYAALGLHPGMLEk	7915	18.0	19.815205	0.095440	5.583934e-01	0.903778	1.903067	120.276338	12.0	19.124448	0.071011	1.931950e+00	0.545626	2.040685	15.321044	0.767542	1
FGASSLLASLLk	8084	34.0	23.119274	0.148651	2.196329e+00	0.141995	3.944355	5.234995	35.0	22.498940	0.188970	4.770330e-01	0.213794	3.080469	4.816335	-0.080325	1
TqGAAAFEGAVIAYEPVWAIGTGk	8094	23.0	16.250988	0.041308	4.430558e-01	0.092574	11.158354	-0.743560	27.0	18.138290	0.040326	2.672755e+01	1.164123	12.381305	0.775548	-1.095661	0
DGVGLLPTVLDVVENPk	8559	38.0	21.899157	0.169400	1.452420e-01	0.516881	3.197701	7.688767	27.0	21.228191	0.184760	3.650728e-01	0.534657	2.116104	6.450245	-0.088636	1
ELcSAAITMSDNTAANLLLTTIGGPk	8695	29.0	22.895231	0.273095	3.034958e-01	0.280452	2.329319	4.004378	52.0	23.293799	0.232344	4.787828e-01	1.796918	0.960049	4.702896	-0.195882	1
ELcSAAITMSDNTAANLLLTTIGGPk	8686	32.0	23.059216	0.201174	5.944131e+00	2.126078	0.805061	4.347489	39.0	22.595297	0.129570	2.685297e+00	3.346062	1.681354	2.784739	0.025548	1
VGYIELDLNSGk	8965	36.0	18.805564	0.332078	2.099859e+00	7.771240	NaN	6.158339	29.0	17.510583	0.145803	2.394400e+00	4.397509	-0.054590	2.641730	0.394377	1
SLDDAQIALAVINTTYASQIGLTPAk	9089	26.0	20.788147	0.102492	2.694714e-01	0.586520	3.116046	15.176564	31.0	20.371373	0.114255	6.046525e-01	1.046882	2.549166	6.614207	-0.124315	1
ELcSAAITMSDNTAANLLLTTIGGPk	8846	216.0	23.835211	0.850754	3.325143e-01	1.893201	NaN	-0.022767	216.0	23.428056	0.756642	2.127841e+00	2.621094	-3.001260	0.851808	-0.127975	1
	8883	264.0	24.355262	0.235376	1.270539e-01	2.377913	1.738366	4.411485	268.0	24.149613	0.279444	2.810016e-01	1.998608	1.920195	3.695578	-0.300475	1
	9458	71.0	19.814885	0.067602	1.056360e+00	4.460319	1.553299	8.495942	55.0	19.899775	0.081068	7.621572e-01	4.665191	2.026509	16.984387	-1.687464	0
LANEGIFTQQELYDELLTLADEAk	9522	4.0	18.594230	0.047298	9.488388e-02	0.128509	3.940116	NaN	5.0	18.353222	0.046340	7.130651e-01	0.196605	3.942563	NaN	0.062293	1
AIHTLWNVLDELDQAWLPVEk	9560	11.0	22.513893	0.058378	2.969589e-01	0.072579	4.455892	1.022964	11.0	21.834118	0.062201	4.901800e-01	0.119166	3.774949	1.024549	-0.038950	1
AIHTLWNVLDELDQAWLPVEk	9586	6.0	21.728629	0.058892	3.079504e-01	0.046923	4.161881	0.963363	8.0	21.320748	0.054593	9.095046e-01	0.092096	4.274263	1.208662	-0.070930	1
ELcSAAITMSDNTAANLLLTTIGGPK	9148	104.0	18.329059	0.041806	6.509279e+00	2.474097	3.799134	2.287613	182.0	21.163595	0.263017	9.294961e-01	3.052862	0.857173	1.730295	-3.928888	0
TAPDGEHGVNLVHLEDVIGAITLLLQAPk	9656	4.0	17.701533	0.040175	6.339672e-04	0.016543	inf	NaN	6.0	17.895217	0.040175	6.339672e-04	0.024815	inf	NaN	-0.150357	1
NADGLGMLVAqAAHAFLLWHGVLPDVEPVIk	9688	3.0	15.521233	0.052455	1.120198e-01	0.068335	9.743359	NaN	1.0	13.981914	0.052455	1.120198e-01	0.022778	9.743359	NaN	-0.548328	1
FLQFMVSPAFQNAIPTGnWMYPVANVTLPAGFEK	9696	0.0	-inf	NaN	NaN	NaN	NaN	NaN	4.0	16.293770	0.067028	1.165689e-01	0.042940	4.309249	NaN	NaN	0
ELcSAAITMSDNTAANLLLTTIGGPK	9209	216.0	20.944411	0.236307	9.643571e+00	3.447382	0.541612	2.741174	142.0	20.709059	0.299294	1.026466e+01	5.649428	-2.158764	8.410005	-0.675744	1
VLAPINDFINTLNAFFSAGGk	9765	6.0	19.314618	0.053537	1.655816e-01	0.013354	6.934276	NaN	6.0	18.662179	0.053537	2.025805e-01	0.013438	6.934400	NaN	-0.090769	1
VLAPINDFINTLNAFFSAGGk	9767	5.0	18.794496	0.058808	2.308393e-01	0.013254	6.645728	1.003997	7.0	18.154701	0.054971	1.785362e-01	0.006698	7.625821	NaN	0.235751	1
VGYIELDLNSGk	9546	93.0	19.127481	0.184934	1.256568e+01	12.800832	-1.205556	3.856789	83.0	17.154424	0.098624	3.966709e+00	9.656604	-3.448946	8.693567	1.260810	0
FVQAYQSDEVYEAANk	10015	1.0	12.242579	0.030985	1.733472e-07	0.036631	3.687593	NaN	1.0	12.242579	0.030985	1.733472e-07	0.036631	3.687593	NaN	-0.721485	1
ELcSAAITMSDNTAAnLLLTTIGGPk	9818	65.0	17.013110	0.113016	5.642632e+00	0.907013	0.174813	0.286011	39.0	16.018433	0.050737	1.074356e+01	0.859082	10.182012	0.436856	1.423368	0