notebook.community

Edit and run



In [7]:

    
import numpy as np
import pandas as pd



In [91]:

    
cns_all = pd.read_csv("CNS_all.csv")



In [92]:

    
cns_all["Age"] = cns_all["Age"].apply(lambda x : 1 if x > 60 else 0)



In [93]:

    
cns_all["Race"] = cns_all["Race"].apply(lambda x : "White" if x == "White" else "Other")



In [94]:

    
def stage_mask(stage):
    if stage == "I" or stage == "II":
        return "I/II"
    elif stage == "III" or stage == "IV":
        return "III/IV"
    else:
        return "Missing"
    
def LDH_mask(ldh):
    if ldh == "NO":
        return 0
    elif ldh == "YES":
        return 1
    else:
        return None



In [95]:

    
cns_all["Stage"] = cns_all["Stage"].apply(stage_mask)



In [96]:

    
cns_all["IPI Score"] = cns_all["IPI Score"].apply(lambda x: "L/LI" if x < 3 else "H/HI")



In [97]:

    
cns_all["PS"] = cns_all["PS"].apply(lambda x: 1 if x > 1 else 0)



In [98]:

    
cns_all["LDH"] = cns_all["LDH"].apply(LDH_mask)



In [99]:

    
cns_all["B Symp 1"] = cns_all["B Symp 1"].apply(lambda x : 0 if x == "NO" else 1)
cns_all["B symp 2"] = cns_all["B symp 2"].apply(lambda x : 0 if x == "NO" else 1)
cns_all["B symp 3"] = cns_all["B symp 3"].apply(lambda x : 0 if x == "NO" else 1)



In [103]:

    
b_symp_any = cns_all["B Symp 1"] + cns_all["B symp 2"] + cns_all["B symp 3"]



In [106]:

    
cns_all["B Symp"] = b_symp_any.apply(lambda x: 1 if x > 0 else 0)



In [108]:

    
del cns_all["B Symp 1"]
del cns_all["B symp 2"]
del cns_all["B symp 3"]



In [112]:

    
cns_all[">1 extranodal"] = cns_all[">1 extranodal"].apply(lambda x : 0 if x == "NO" else 1)



In [115]:

    
cns_all["BM Involv"] = cns_all["BM Involv"].apply(LDH_mask)



In [120]:

    
cns_all["PB Involv"] = cns_all["PB Involv"].apply(LDH_mask)



In [129]:

    
cns = cns_all["CNS "].apply(LDH_mask)



In [131]:

    
cns_all["cns"] = cns
del cns_all["CNS "]



In [135]:

    
cns_all["cns"] = cns_all["cns"].apply(lambda x: 1 if x == 1 else 0)



In [136]:

    
cns_all









    Out[136]:






  
    
      
      Age
      Gender
      Race
      Stage
      IPI Score
      PS
      LDH
      >1 extranodal
      BM Involv
      PB Involv
      B Symp
      cns
    
  
  
    
      0  
       1
       M
       White
        III/IV
       L/LI
       0
      NaN
       0
        1
      NaN
       0
       0
    
    
      1  
       0
       F
       White
        III/IV
       H/HI
       1
        0
       1
        0
      NaN
       1
       0
    
    
      2  
       0
       M
       White
          I/II
       L/LI
       0
        0
       0
        0
        0
       0
       0
    
    
      3  
       0
       F
       Other
       Missing
       L/LI
       0
        1
       0
        1
        1
       0
       0
    
    
      4  
       0
       M
       White
        III/IV
       H/HI
       1
        1
       0
        1
        0
       1
       0
    
    
      5  
       0
       M
       Other
        III/IV
       H/HI
       0
        1
       1
        1
      NaN
       1
       0
    
    
      6  
       0
       M
       Other
        III/IV
       L/LI
       0
      NaN
       1
        0
      NaN
       1
       0
    
    
      7  
       1
       M
       White
        III/IV
       L/LI
       0
      NaN
       0
        0
      NaN
       1
       0
    
    
      8  
       1
       F
       White
        III/IV
       L/LI
       0
      NaN
       0
        1
        1
       1
       0
    
    
      9  
       1
       M
       Other
          I/II
       L/LI
       0
        0
       0
      NaN
        1
       0
       0
    
    
      10 
       1
       M
       White
          I/II
       L/LI
       0
        0
       0
      NaN
      NaN
       1
       0
    
    
      11 
       0
       F
       White
       Missing
       L/LI
       0
      NaN
       1
      NaN
      NaN
       1
       0
    
    
      12 
       1
       F
       White
          I/II
       L/LI
       0
        0
       0
      NaN
      NaN
       0
       0
    
    
      13 
       0
       M
       White
        III/IV
       L/LI
       0
        1
       0
        1
        1
       0
       0
    
    
      14 
       0
       M
       White
        III/IV
       H/HI
       0
        1
       1
        1
        1
       1
       0
    
    
      15 
       0
       F
       White
          I/II
       L/LI
       0
        0
       0
      NaN
        1
       1
       0
    
    
      16 
       0
       M
       White
        III/IV
       L/LI
       0
      NaN
       0
        0
      NaN
       0
       0
    
    
      17 
       1
       M
       White
        III/IV
       H/HI
       1
        1
       0
        1
      NaN
       1
       0
    
    
      18 
       0
       M
       Other
        III/IV
       L/LI
       0
      NaN
       1
        1
      NaN
       1
       0
    
    
      19 
       0
       F
       White
          I/II
       L/LI
       0
      NaN
       0
      NaN
      NaN
       1
       0
    
    
      20 
       0
       F
       Other
       Missing
       L/LI
       0
      NaN
       0
        1
        1
       0
       0
    
    
      21 
       1
       M
       White
          I/II
       L/LI
       0
      NaN
       0
      NaN
      NaN
       0
       0
    
    
      22 
       1
       M
       White
        III/IV
       L/LI
       0
      NaN
       0
        0
      NaN
       0
       1
    
    
      23 
       1
       M
       Other
          I/II
       L/LI
       0
      NaN
       0
        0
        0
       0
       0
    
    
      24 
       0
       F
       White
          I/II
       L/LI
       0
        0
       0
        0
        0
       0
       0
    
    
      25 
       1
       M
       White
        III/IV
       H/HI
       0
      NaN
       1
        1
        1
       0
       0
    
    
      26 
       1
       F
       White
        III/IV
       H/HI
       0
        1
       0
        0
        0
       0
       0
    
    
      27 
       0
       M
       Other
        III/IV
       L/LI
       0
        0
       1
        0
        0
       0
       1
    
    
      28 
       0
       F
       Other
          I/II
       L/LI
       0
        0
       0
        0
        0
       1
       0
    
    
      29 
       0
       M
       White
       Missing
       L/LI
       0
        0
       0
        0
        0
       0
       0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      229
       1
       M
       Other
        III/IV
       L/LI
       0
        0
       0
        1
        1
       0
       0
    
    
      230
       1
       M
       Other
       Missing
       L/LI
       0
        0
       0
        1
        1
       1
       0
    
    
      231
       1
       M
       White
       Missing
       L/LI
       0
      NaN
       0
        1
        1
       0
       0
    
    
      232
       0
       M
       Other
       Missing
       L/LI
       0
        0
       0
      NaN
        0
       1
       0
    
    
      233
       0
       M
       Other
        III/IV
       L/LI
       0
        0
       1
        1
        1
       1
       1
    
    
      234
       1
       M
       White
        III/IV
       H/HI
       0
        1
       0
        0
      NaN
       1
       0
    
    
      235
       1
       F
       White
        III/IV
       H/HI
       0
        0
       1
        0
        0
       1
       0
    
    
      236
       0
       M
       White
        III/IV
       L/LI
       0
        1
       0
        0
      NaN
       0
       0
    
    
      237
       0
       F
       Other
       Missing
       L/LI
       0
      NaN
       0
        0
      NaN
       0
       0
    
    
      238
       0
       F
       Other
        III/IV
       L/LI
       0
        1
       0
        1
        1
       1
       0
    
    
      239
       0
       F
       Other
        III/IV
       L/LI
       0
        1
       0
      NaN
        1
       1
       0
    
    
      240
       0
       F
       White
       Missing
       L/LI
       0
        1
       0
        1
      NaN
       0
       0
    
    
      241
       0
       M
       White
          I/II
       L/LI
       0
        1
       0
        0
      NaN
       1
       0
    
    
      242
       1
       M
       Other
        III/IV
       H/HI
       0
      NaN
       1
        0
      NaN
       0
       0
    
    
      243
       0
       F
       White
          I/II
       L/LI
       0
        0
       0
        0
        0
       1
       0
    
    
      244
       0
       M
       White
        III/IV
       H/HI
       0
        1
       1
        0
      NaN
       1
       0
    
    
      245
       1
       F
       White
        III/IV
       H/HI
       0
        1
       1
      NaN
      NaN
       1
       0
    
    
      246
       0
       M
       Other
        III/IV
       H/HI
       0
        1
       1
        0
      NaN
       1
       0
    
    
      247
       1
       F
       White
        III/IV
       L/LI
       0
        0
       0
      NaN
        1
       1
       0
    
    
      248
       1
       M
       Other
        III/IV
       H/HI
       0
        1
       1
        1
      NaN
       1
       0
    
    
      249
       0
       M
       White
        III/IV
       L/LI
       0
        0
       1
        0
      NaN
       0
       0
    
    
      250
       0
       M
       White
        III/IV
       L/LI
       0
      NaN
       0
        1
        1
       0
       0
    
    
      251
       1
       F
       Other
          I/II
       L/LI
       0
        0
       0
        0
        0
       1
       0
    
    
      252
       0
       M
       White
          I/II
       L/LI
       0
        0
       0
        0
      NaN
       1
       0
    
    
      253
       0
       M
       Other
        III/IV
       L/LI
       0
        1
       0
        1
        1
       1
       0
    
    
      254
       1
       F
       White
        III/IV
       H/HI
       0
        1
       0
        1
      NaN
       1
       0
    
    
      255
       1
       M
       Other
       Missing
       L/LI
       0
        0
       0
      NaN
      NaN
       1
       1
    
    
      256
       0
       M
       Other
        III/IV
       L/LI
       0
        0
       0
      NaN
        1
       1
       0
    
    
      257
       1
       F
       White
        III/IV
       L/LI
       0
        0
       0
        0
      NaN
       1
       0
    
    
      258
       1
       M
       White
          I/II
       L/LI
       0
      NaN
       0
      NaN
      NaN
       1
       0
    
  

259 rows × 12 columns



In [137]:

    
import matplotlib
import json
import requests
from sklearn import ensemble, cross_validation
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing



In [138]:

    
le = preprocessing.LabelEncoder()



In [139]:

    
def fit_and_transform(column_name, df):
    le.fit(df[column_name])
    df[column_name] = le.transform(df[column_name])
    
columns = ["Gender", "Race", "Stage", "IPI Score"]

for column in columns:
    fit_and_transform(column, cns_all)



In [168]:

    
cns_dropped = cns_all.dropna()
cns_filled = cns_all.fillna("missing")



In [170]:

    
for column in cns_filled.columns.unique():
    fit_and_transform(column, cns_filled)



In [343]:

    
del cns_filled[">1 extranodal"]



In [344]:

    
target = cns_filled['cns']
main_data = cns_filled.ix[:,:-1]



In [345]:

    
X, y = shuffle(main_data, target, random_state=13)
offset = 150
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]



In [346]:

    
params = {'n_estimators':10, 'learning_rate':.001,
          'max_depth':2, 'random_state':0, 'loss': 'deviance'}
clf = ensemble.GradientBoostingClassifier(**params)

clf.fit(X_train, y_train)
mse = mean_squared_error(y_test, clf.predict(X_test))
clf_score = clf.score(X_test, y_test)
print("MSE: %.4f" % mse)
print("Accuracy: %.4f" % clf_score)









    



MSE: 0.0367
Accuracy: 0.9633



In [347]:

    
import pylab as pl
# Plot feature importance
feature_importance = clf.feature_importances_
# make importances relative to max importance
feature_importance = 100.0 * (feature_importance / feature_importance.max())
sorted_idx = np.argsort(feature_importance)
pos = np.arange(sorted_idx.shape[0]) + .5
pl.subplot(1, 2, 2)
pl.barh(pos, feature_importance[sorted_idx], align='center')
pl.yticks(pos, main_data.columns[sorted_idx])
pl.xlabel('Relative Importance')
pl.title('Variable Importance')









    Out[347]:





<matplotlib.text.Text at 0x10bd6afd0>



In [342]:

    
# Plot training deviance
test_score = np.zeros((params['n_estimators'],))

for i, y_pred in enumerate(clf.staged_decision_function(X_test)):
    test_score[i] = clf.loss_(y_test, y_pred)

pl.figure(figsize=(12, 6))
pl.subplot(1, 2, 1)
pl.title('Deviance')
pl.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',
        label='Training Set Deviance')
pl.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
        label='Test Set Deviance')
pl.legend(loc='upper right')
pl.xlabel('Boosting Iterations')
pl.ylabel('Deviance')









    Out[342]:





<matplotlib.text.Text at 0x10bd17690>



In [261]:

    
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

svm_clf = SVC()
neighbors_clf = KNeighborsClassifier()
random_forest_clf = RandomForestClassifier()
clfs = [("SVM", SVC()), ("KNN", KNeighborsClassifier()), ("Random Forest", RandomForestClassifier())]

for name, clf in clfs:
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    clf_score = clf.score(X_test, y_test)
    print name + ": -----------------"
    print("MSE: %.4f" % mse)
    print("Accuracy: %.4f" % clf_score)
    f1scores = cross_validation.cross_val_score(clf, X_test, y_test, cv=3, scoring='f1')
    print("F1 Score: %0.2f (+/- %0.2f)" % (f1scores.mean(), f1scores.std() * 2))
    precision = cross_validation.cross_val_score(clf, X_test, y_test, cv=3, scoring='precision')
    print("Precision Score: %0.2f (+/- %0.2f)" % (precision.mean(), precision.std() * 2))
    recall = cross_validation.cross_val_score(clf, X_test, y_test, cv=3, scoring='recall')
    print("Recall Score: %0.2f (+/- %0.2f)" % (recall.mean(), recall.std() * 2))









    



SVM: -----------------
MSE: 0.0367
Accuracy: 0.9633
F1 Score: 0.00 (+/- 0.00)
Precision Score: 0.00 (+/- 0.00)
Recall Score: 0.00 (+/- 0.00)
KNN: -----------------
MSE: 0.0367
Accuracy: 0.9633
F1 Score: 0.00 (+/- 0.00)
Precision Score: 0.00 (+/- 0.00)
Recall Score: 0.00 (+/- 0.00)
Random Forest: -----------------
MSE: 0.0642
Accuracy: 0.9358
F1 Score: 0.00 (+/- 0.00)
Precision Score: 0.00 (+/- 0.00)
Recall Score: 0.00 (+/- 0.00)



In [210]:

    
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.lda import LDA



In [267]:

    
pca = PCA(n_components=11)
X_r = pca.fit(X).transform(X)

lda = LDA(n_components=2)
X_r2 = lda.fit(X, y).transform(X)

target_names = cns_filled.columns.unique()

# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s'
      % str(pca.explained_variance_ratio_))


plt.figure(figsize=(12, 8))
for c, i, target_name in zip("brgcmy", [0, 1, 2,3, 4, 5, 6, 7, 8, 9, 10], target_names):
    plt.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name)
plt.legend(loc="upper left")
plt.title('PCA')

plt.figure(figsize=(12, 8))
for c, i, target_name in zip("brgcmy", [0, 1, 2,3, 4, 5, 6], target_names):
    plt.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name)
plt.legend(loc="upper left")
plt.title('LDA')

plt.show()









    



explained variance ratio (first two components): [ 0.2259346   0.17002742  0.1201034   0.1041173   0.09072686  0.07485279
  0.06850171  0.06528011  0.05315184  0.01826943  0.00903455]



In [268]:

    
from sklearn import linear_model



In [363]:

    
logreg = linear_model.LogisticRegression(C=1)



In [364]:

    
logclf = logreg.fit(X_train, y_train)



In [365]:

    
logclf.score(X_test, y_test)









    Out[365]:





0.96330275229357798



In [366]:

    
logclf.coef_









    Out[366]:





array([[ 0.08445391,  0.08189909, -0.66691392, -0.34062076]])



In [289]:

    
from __future__ import print_function
import numpy as np
import statsmodels.api as sm



In [357]:

    
logit_mod = sm.Logit(y_train, X_train)
logit_res = logit_mod.fit(method='bfgs', disp=0)
print('Parameters: ', logit_res.params)









    



Parameters:  [-0.44677339  0.01126037 -1.09695065 -1.07596921]



In [358]:

    
margeff = logit_res.get_margeff()
print(margeff.summary())









    



        Logit Marginal Effects       
=====================================
Dep. Variable:                      y
Method:                          dydx
At:                           overall
==============================================================================
                dy/dx    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
x1            -0.0412      0.030     -1.394      0.163        -0.099     0.017
x2             0.0010      0.038      0.028      0.978        -0.073     0.075
x3            -0.1012      0.026     -3.856      0.000        -0.153    -0.050
x4            -0.0993      0.043     -2.318      0.020        -0.183    -0.015
==============================================================================



In [359]:

    
print(logit_res.summary())









    



                           Logit Regression Results                           
==============================================================================
Dep. Variable:                      y   No. Observations:                  150
Model:                          Logit   Df Residuals:                      146
Method:                           MLE   Df Model:                            3
Date:                Thu, 29 Jan 2015   Pseudo R-squ.:                -0.06124
Time:                        03:29:56   Log-Likelihood:                -46.921
converged:                       True   LL-Null:                       -44.214
                                        LLR p-value:                     1.000
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
x1            -0.4468      0.330     -1.354      0.176        -1.093     0.200
x2             0.0113      0.407      0.028      0.978        -0.787     0.810
x3            -1.0970      0.308     -3.556      0.000        -1.702    -0.492
x4            -1.0760      0.486     -2.215      0.027        -2.028    -0.124
==============================================================================



In [371]:

    
cns_filled









    Out[371]:






  
    
      
      Stage
      BM Involv
      PB Involv
      B Symp
      cns
    
  
  
    
      0  
       1
       1
       2
       0
       0
    
    
      1  
       1
       0
       2
       1
       0
    
    
      2  
       0
       0
       0
       0
       0
    
    
      3  
       2
       1
       1
       0
       0
    
    
      4  
       1
       1
       0
       1
       0
    
    
      5  
       1
       1
       2
       1
       0
    
    
      6  
       1
       0
       2
       1
       0
    
    
      7  
       1
       0
       2
       1
       0
    
    
      8  
       1
       1
       1
       1
       0
    
    
      9  
       0
       2
       1
       0
       0
    
    
      10 
       0
       2
       2
       1
       0
    
    
      11 
       2
       2
       2
       1
       0
    
    
      12 
       0
       2
       2
       0
       0
    
    
      13 
       1
       1
       1
       0
       0
    
    
      14 
       1
       1
       1
       1
       0
    
    
      15 
       0
       2
       1
       1
       0
    
    
      16 
       1
       0
       2
       0
       0
    
    
      17 
       1
       1
       2
       1
       0
    
    
      18 
       1
       1
       2
       1
       0
    
    
      19 
       0
       2
       2
       1
       0
    
    
      20 
       2
       1
       1
       0
       0
    
    
      21 
       0
       2
       2
       0
       0
    
    
      22 
       1
       0
       2
       0
       1
    
    
      23 
       0
       0
       0
       0
       0
    
    
      24 
       0
       0
       0
       0
       0
    
    
      25 
       1
       1
       1
       0
       0
    
    
      26 
       1
       0
       0
       0
       0
    
    
      27 
       1
       0
       0
       0
       1
    
    
      28 
       0
       0
       0
       1
       0
    
    
      29 
       2
       0
       0
       0
       0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      229
       1
       1
       1
       0
       0
    
    
      230
       2
       1
       1
       1
       0
    
    
      231
       2
       1
       1
       0
       0
    
    
      232
       2
       2
       0
       1
       0
    
    
      233
       1
       1
       1
       1
       1
    
    
      234
       1
       0
       2
       1
       0
    
    
      235
       1
       0
       0
       1
       0
    
    
      236
       1
       0
       2
       0
       0
    
    
      237
       2
       0
       2
       0
       0
    
    
      238
       1
       1
       1
       1
       0
    
    
      239
       1
       2
       1
       1
       0
    
    
      240
       2
       1
       2
       0
       0
    
    
      241
       0
       0
       2
       1
       0
    
    
      242
       1
       0
       2
       0
       0
    
    
      243
       0
       0
       0
       1
       0
    
    
      244
       1
       0
       2
       1
       0
    
    
      245
       1
       2
       2
       1
       0
    
    
      246
       1
       0
       2
       1
       0
    
    
      247
       1
       2
       1
       1
       0
    
    
      248
       1
       1
       2
       1
       0
    
    
      249
       1
       0
       2
       0
       0
    
    
      250
       1
       1
       1
       0
       0
    
    
      251
       0
       0
       0
       1
       0
    
    
      252
       0
       0
       2
       1
       0
    
    
      253
       1
       1
       1
       1
       0
    
    
      254
       1
       1
       2
       1
       0
    
    
      255
       2
       2
       2
       1
       1
    
    
      256
       1
       2
       1
       1
       0
    
    
      257
       1
       0
       2
       1
       0
    
    
      258
       0
       2
       2
       1
       0
    
  

259 rows × 5 columns



In [ ]:

	Age	Gender	Race	Stage	IPI Score	PS	LDH	>1 extranodal	BM Involv	PB Involv	B Symp	cns
0	1	M	White	III/IV	L/LI	0	NaN	0	1	NaN	0	0
1	0	F	White	III/IV	H/HI	1	0	1	0	NaN	1	0
2	0	M	White	I/II	L/LI	0	0	0	0	0	0	0
3	0	F	Other	Missing	L/LI	0	1	0	1	1	0	0
4	0	M	White	III/IV	H/HI	1	1	0	1	0	1	0
5	0	M	Other	III/IV	H/HI	0	1	1	1	NaN	1	0
6	0	M	Other	III/IV	L/LI	0	NaN	1	0	NaN	1	0
7	1	M	White	III/IV	L/LI	0	NaN	0	0	NaN	1	0
8	1	F	White	III/IV	L/LI	0	NaN	0	1	1	1	0
9	1	M	Other	I/II	L/LI	0	0	0	NaN	1	0	0
10	1	M	White	I/II	L/LI	0	0	0	NaN	NaN	1	0
11	0	F	White	Missing	L/LI	0	NaN	1	NaN	NaN	1	0
12	1	F	White	I/II	L/LI	0	0	0	NaN	NaN	0	0
13	0	M	White	III/IV	L/LI	0	1	0	1	1	0	0
14	0	M	White	III/IV	H/HI	0	1	1	1	1	1	0
15	0	F	White	I/II	L/LI	0	0	0	NaN	1	1	0
16	0	M	White	III/IV	L/LI	0	NaN	0	0	NaN	0	0
17	1	M	White	III/IV	H/HI	1	1	0	1	NaN	1	0
18	0	M	Other	III/IV	L/LI	0	NaN	1	1	NaN	1	0
19	0	F	White	I/II	L/LI	0	NaN	0	NaN	NaN	1	0
20	0	F	Other	Missing	L/LI	0	NaN	0	1	1	0	0
21	1	M	White	I/II	L/LI	0	NaN	0	NaN	NaN	0	0
22	1	M	White	III/IV	L/LI	0	NaN	0	0	NaN	0	1
23	1	M	Other	I/II	L/LI	0	NaN	0	0	0	0	0
24	0	F	White	I/II	L/LI	0	0	0	0	0	0	0
25	1	M	White	III/IV	H/HI	0	NaN	1	1	1	0	0
26	1	F	White	III/IV	H/HI	0	1	0	0	0	0	0
27	0	M	Other	III/IV	L/LI	0	0	1	0	0	0	1
28	0	F	Other	I/II	L/LI	0	0	0	0	0	1	0
29	0	M	White	Missing	L/LI	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...
229	1	M	Other	III/IV	L/LI	0	0	0	1	1	0	0
230	1	M	Other	Missing	L/LI	0	0	0	1	1	1	0
231	1	M	White	Missing	L/LI	0	NaN	0	1	1	0	0
232	0	M	Other	Missing	L/LI	0	0	0	NaN	0	1	0
233	0	M	Other	III/IV	L/LI	0	0	1	1	1	1	1
234	1	M	White	III/IV	H/HI	0	1	0	0	NaN	1	0
235	1	F	White	III/IV	H/HI	0	0	1	0	0	1	0
236	0	M	White	III/IV	L/LI	0	1	0	0	NaN	0	0
237	0	F	Other	Missing	L/LI	0	NaN	0	0	NaN	0	0
238	0	F	Other	III/IV	L/LI	0	1	0	1	1	1	0
239	0	F	Other	III/IV	L/LI	0	1	0	NaN	1	1	0
240	0	F	White	Missing	L/LI	0	1	0	1	NaN	0	0
241	0	M	White	I/II	L/LI	0	1	0	0	NaN	1	0
242	1	M	Other	III/IV	H/HI	0	NaN	1	0	NaN	0	0
243	0	F	White	I/II	L/LI	0	0	0	0	0	1	0
244	0	M	White	III/IV	H/HI	0	1	1	0	NaN	1	0
245	1	F	White	III/IV	H/HI	0	1	1	NaN	NaN	1	0
246	0	M	Other	III/IV	H/HI	0	1	1	0	NaN	1	0
247	1	F	White	III/IV	L/LI	0	0	0	NaN	1	1	0
248	1	M	Other	III/IV	H/HI	0	1	1	1	NaN	1	0
249	0	M	White	III/IV	L/LI	0	0	1	0	NaN	0	0
250	0	M	White	III/IV	L/LI	0	NaN	0	1	1	0	0
251	1	F	Other	I/II	L/LI	0	0	0	0	0	1	0
252	0	M	White	I/II	L/LI	0	0	0	0	NaN	1	0
253	0	M	Other	III/IV	L/LI	0	1	0	1	1	1	0
254	1	F	White	III/IV	H/HI	0	1	0	1	NaN	1	0
255	1	M	Other	Missing	L/LI	0	0	0	NaN	NaN	1	1
256	0	M	Other	III/IV	L/LI	0	0	0	NaN	1	1	0
257	1	F	White	III/IV	L/LI	0	0	0	0	NaN	1	0
258	1	M	White	I/II	L/LI	0	NaN	0	NaN	NaN	1	0

	Stage	BM Involv	PB Involv	B Symp	cns
0	1	1	2	0	0
1	1	0	2	1	0
2	0	0	0	0	0
3	2	1	1	0	0
4	1	1	0	1	0
5	1	1	2	1	0
6	1	0	2	1	0
7	1	0	2	1	0
8	1	1	1	1	0
9	0	2	1	0	0
10	0	2	2	1	0
11	2	2	2	1	0
12	0	2	2	0	0
13	1	1	1	0	0
14	1	1	1	1	0
15	0	2	1	1	0
16	1	0	2	0	0
17	1	1	2	1	0
18	1	1	2	1	0
19	0	2	2	1	0
20	2	1	1	0	0
21	0	2	2	0	0
22	1	0	2	0	1
23	0	0	0	0	0
24	0	0	0	0	0
25	1	1	1	0	0
26	1	0	0	0	0
27	1	0	0	0	1
28	0	0	0	1	0
29	2	0	0	0	0
...	...	...	...	...	...
229	1	1	1	0	0
230	2	1	1	1	0
231	2	1	1	0	0
232	2	2	0	1	0
233	1	1	1	1	1
234	1	0	2	1	0
235	1	0	0	1	0
236	1	0	2	0	0
237	2	0	2	0	0
238	1	1	1	1	0
239	1	2	1	1	0
240	2	1	2	0	0
241	0	0	2	1	0
242	1	0	2	0	0
243	0	0	0	1	0
244	1	0	2	1	0
245	1	2	2	1	0
246	1	0	2	1	0
247	1	2	1	1	0
248	1	1	2	1	0
249	1	0	2	0	0
250	1	1	1	0	0
251	0	0	0	1	0
252	0	0	2	1	0
253	1	1	1	1	0
254	1	1	2	1	0
255	2	2	2	1	1
256	1	2	1	1	0
257	1	0	2	1	0
258	0	2	2	1	0

	Stage	BM Involv	PB Involv	B Symp	cns
0	1	1	2	0	0
1	1	0	2	1	0
2	0	0	0	0	0
3	2	1	1	0	0
4	1	1	0	1	0
5	1	1	2	1	0
6	1	0	2	1	0
7	1	0	2	1	0
8	1	1	1	1	0
9	0	2	1	0	0
10	0	2	2	1	0
11	2	2	2	1	0
12	0	2	2	0	0
13	1	1	1	0	0
14	1	1	1	1	0
15	0	2	1	1	0
16	1	0	2	0	0
17	1	1	2	1	0
18	1	1	2	1	0
19	0	2	2	1	0
20	2	1	1	0	0
21	0	2	2	0	0
22	1	0	2	0	1
23	0	0	0	0	0
24	0	0	0	0	0
25	1	1	1	0	0
26	1	0	0	0	0
27	1	0	0	0	1
28	0	0	0	1	0
29	2	0	0	0	0
...	...	...	...	...	...
229	1	1	1	0	0
230	2	1	1	1	0
231	2	1	1	0	0
232	2	2	0	1	0
233	1	1	1	1	1
234	1	0	2	1	0
235	1	0	0	1	0
236	1	0	2	0	0
237	2	0	2	0	0
238	1	1	1	1	0
239	1	2	1	1	0
240	2	1	2	0	0
241	0	0	2	1	0
242	1	0	2	0	0
243	0	0	0	1	0
244	1	0	2	1	0
245	1	2	2	1	0
246	1	0	2	1	0
247	1	2	1	1	0
248	1	1	2	1	0
249	1	0	2	0	0
250	1	1	1	0	0
251	0	0	0	1	0
252	0	0	2	1	0
253	1	1	1	1	0
254	1	1	2	1	0
255	2	2	2	1	1
256	1	2	1	1	0
257	1	0	2	1	0
258	0	2	2	1	0

	Stage	BM Involv	PB Involv	B Symp	cns
0	1	1	2	0	0
1	1	0	2	1	0
2	0	0	0	0	0
3	2	1	1	0	0
4	1	1	0	1	0
5	1	1	2	1	0
6	1	0	2	1	0
7	1	0	2	1	0
8	1	1	1	1	0
9	0	2	1	0	0
10	0	2	2	1	0
11	2	2	2	1	0
12	0	2	2	0	0
13	1	1	1	0	0
14	1	1	1	1	0
15	0	2	1	1	0
16	1	0	2	0	0
17	1	1	2	1	0
18	1	1	2	1	0
19	0	2	2	1	0
20	2	1	1	0	0
21	0	2	2	0	0
22	1	0	2	0	1
23	0	0	0	0	0
24	0	0	0	0	0
25	1	1	1	0	0
26	1	0	0	0	0
27	1	0	0	0	1
28	0	0	0	1	0
29	2	0	0	0	0
...	...	...	...	...	...
229	1	1	1	0	0
230	2	1	1	1	0
231	2	1	1	0	0
232	2	2	0	1	0
233	1	1	1	1	1
234	1	0	2	1	0
235	1	0	0	1	0
236	1	0	2	0	0
237	2	0	2	0	0
238	1	1	1	1	0
239	1	2	1	1	0
240	2	1	2	0	0
241	0	0	2	1	0
242	1	0	2	0	0
243	0	0	0	1	0
244	1	0	2	1	0
245	1	2	2	1	0
246	1	0	2	1	0
247	1	2	1	1	0
248	1	1	2	1	0
249	1	0	2	0	0
250	1	1	1	0	0
251	0	0	0	1	0
252	0	0	2	1	0
253	1	1	1	1	0
254	1	1	2	1	0
255	2	2	2	1	1
256	1	2	1	1	0
257	1	0	2	1	0
258	0	2	2	1	0