In [113]:

    
%matplotlib qt
import pandas as pd
import numpy as np
import re
import mia



In [104]:

    
hologic_cluster = pd.DataFrame.from_csv('../2015-03-28-real-texture-cluster.csv')
hologic_cluster.head()









    Out[104]:






  
    
      
      contrast_cluster_1
      dissimilarity_cluster_1
      homogeneity_cluster_1
      energy_cluster_1
      contrast_cluster_2
      dissimilarity_cluster_2
      homogeneity_cluster_2
      energy_cluster_2
      contrast_cluster_3
      dissimilarity_cluster_3
      homogeneity_cluster_3
      energy_cluster_3
      contrast_cluster_4
      dissimilarity_cluster_4
      homogeneity_cluster_4
      energy_cluster_4
    
  
  
    
      p214-010-60001-cl.png
       2.931535e+08
       7003850.25
       8130257.674942
       51371.507991
       8.260166e+08
       12106920.25
       8178692.790514
       51247.278008
       1.189669e+09
       10889315.75
       8309986.114009
       46728.453963
       7.767933e+08
       5886436.25
       8380532.207002
       43973.709887
    
    
      p214-010-60001-cr.png
       1.856295e+08
       4623518.75
       8241054.817176
       52008.002383
       6.412453e+08
        9141035.75
       8250194.492365
       57204.728899
       1.151640e+09
       10339648.50
       8311361.858274
       48833.499865
       7.751633e+08
       5625592.50
       8402698.450330
       47068.058470
    
    
      p214-010-60001-ml.png
       2.225075e+08
       5867047.50
       8121710.629886
       53939.606377
       8.938307e+08
       12497527.25
       8178878.867447
       60888.448918
       1.785641e+09
       16247420.50
       8210113.855566
       39805.116295
       1.237097e+09
       9180578.50
       8338267.205284
       40480.385535
    
    
      p214-010-60001-mr.png
       2.280928e+08
       5802305.25
       8143748.833719
       56363.712378
       1.047522e+09
       13875836.25
       8165493.907820
       53059.072536
       1.850879e+09
       16737395.50
       8209670.079957
       47579.096741
       1.073341e+09
       7801042.00
       8371460.064432
       57173.612063
    
    
      p214-010-60005-cl.png
       1.563874e+08
       4584042.25
       8163100.805695
       37606.734376
       6.720121e+08
        9797939.50
       8198951.000702
       36979.949572
       1.548208e+09
       13951816.50
       8220116.309021
       32055.018330
       1.131708e+09
       8562726.25
       8306607.852877
       55253.097523



In [105]:

    
hologic_meta = mia.analysis.create_hologic_meta_data(hologic_cluster, '../data/BIRADS.csv')
hologic_meta.head()









    Out[105]:






  
    
      
      patient_id
      side
      view
      img_name
      BIRADS
      img_number
    
  
  
    
      p214-010-60001-cl.png
       21401060001
       c
       l
       p214-010-60001-cl.png
       3
       1
    
    
      p214-010-60001-cr.png
       21401060001
       c
       r
       p214-010-60001-cr.png
       3
       1
    
    
      p214-010-60001-ml.png
       21401060001
       m
       l
       p214-010-60001-ml.png
       3
       1
    
    
      p214-010-60001-mr.png
       21401060001
       m
       r
       p214-010-60001-mr.png
       3
       1
    
    
      p214-010-60005-cl.png
       21401060005
       c
       l
       p214-010-60005-cl.png
       4
       5

Real Dataset Analysis



In [185]:

    
columns = filter(lambda x: 'homogeneity' in x, hologic_cluster.columns)

Scatter matrix of the clusters. From this is is noted that Homogeneity is the major cause for the splitting as it's bimodal across all clusters. Contrast and dissimilarity show correlations from high to low risk. Enegery also shows this trend, but it's correlation is weaker.



In [194]:

    
hc = hologic_cluster[filter(lambda x: '1' in x, hologic_cluster.columns)].copy()
hc['BIRADS'] = hologic_meta.BIRADS
mia.plotting.plot_scattermatrix(hc, 'BIRADS')

The bimodal-ness of homogeneity can be better seen when plotted as a histogram for each cluster:



In [193]:

    
%matplotlib inline
hc = hologic_cluster.copy()
hc['class'] = hologic_meta.BIRADS
mia.plotting.plot_risk_classes(hc, 'homogeneity_cluster_4')



In [177]:

    
mapping = mia.analysis.tSNE(hologic_cluster[columns], n_components=2, verbose=2, learning_rate=300)









    



[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 360 / 360
[t-SNE] Mean sigma: 0.574917
[t-SNE] Iteration 10: error = 16.1541876, gradient norm = 0.1719006
[t-SNE] Iteration 20: error = 13.2540182, gradient norm = 0.1536431
[t-SNE] Iteration 30: error = 12.5407164, gradient norm = 0.1586372
[t-SNE] Iteration 40: error = 12.8821844, gradient norm = 0.1474866
[t-SNE] Iteration 50: error = 12.8103744, gradient norm = 0.1423375
[t-SNE] Iteration 60: error = 12.7278322, gradient norm = 0.1492145
[t-SNE] Iteration 70: error = 12.9535844, gradient norm = 0.1467182
[t-SNE] Iteration 80: error = 12.9187282, gradient norm = 0.1383605
[t-SNE] Iteration 83: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 83 iterations with early exaggeration: 12.640844
[t-SNE] Iteration 90: error = 0.8356086, gradient norm = 0.0233477
[t-SNE] Iteration 100: error = 0.5918354, gradient norm = 0.0087953
[t-SNE] Iteration 110: error = 0.5427384, gradient norm = 0.0032808
[t-SNE] Iteration 120: error = 0.5263414, gradient norm = 0.0016312
[t-SNE] Iteration 130: error = 0.5209433, gradient norm = 0.0008191
[t-SNE] Iteration 140: error = 0.5183772, gradient norm = 0.0006766
[t-SNE] Iteration 150: error = 0.5169651, gradient norm = 0.0006389
[t-SNE] Iteration 160: error = 0.5161954, gradient norm = 0.0006213
[t-SNE] Iteration 170: error = 0.5157535, gradient norm = 0.0006135
[t-SNE] Iteration 180: error = 0.5154955, gradient norm = 0.0006089
[t-SNE] Iteration 190: error = 0.5153432, gradient norm = 0.0006062
[t-SNE] Iteration 200: error = 0.5152528, gradient norm = 0.0006047
[t-SNE] Iteration 210: error = 0.5151990, gradient norm = 0.0006037
[t-SNE] Iteration 220: error = 0.5151668, gradient norm = 0.0006032
[t-SNE] Iteration 230: error = 0.5151476, gradient norm = 0.0006028
[t-SNE] Iteration 240: error = 0.5151362, gradient norm = 0.0006026
[t-SNE] Iteration 250: error = 0.5151293, gradient norm = 0.0006025
[t-SNE] Iteration 260: error = 0.5151252, gradient norm = 0.0006025
[t-SNE] Iteration 270: error = 0.5151227, gradient norm = 0.0006024
[t-SNE] Iteration 280: error = 0.5151212, gradient norm = 0.0006024
[t-SNE] Iteration 283: error difference 0.000000. Finished.
[t-SNE] Error after 283 iterations: 0.515121



In [178]:

    
mia.plotting.plot_scatter_2d(mapping, [0,1], hologic_meta.BIRADS)









    Out[178]:





<matplotlib.axes._subplots.AxesSubplot at 0x133f1bf50>



In [196]:

    
mia.analysis.measure_closeness(mapping, hologic_meta.img_name).mean()









    Out[196]:





5.3192683969678507



In [106]:

    
left_cluster = hologic_cluster[mapping[0] < 0]
right_cluster = hologic_cluster[mapping[0] >= 0]

# columns = filter(lambda x: 'cluster_4' in x, hologic_cluster.columns)
# mapping[]
mask = filter(lambda x: '61246' in x, mapping.index)
hologic_cluster.loc[mask].describe() - hologic_cluster[hologic_meta.BIRADS == 1].describe()









    Out[106]:






  
    
      
      contrast_cluster_1
      dissimilarity_cluster_1
      homogeneity_cluster_1
      energy_cluster_1
      contrast_cluster_2
      dissimilarity_cluster_2
      homogeneity_cluster_2
      energy_cluster_2
      contrast_cluster_3
      dissimilarity_cluster_3
      homogeneity_cluster_3
      energy_cluster_3
      contrast_cluster_4
      dissimilarity_cluster_4
      homogeneity_cluster_4
      energy_cluster_4
    
  
  
    
      count
      -5.200000e+01
            -52.000000
           -52.000000
         -52.000000
      -5.200000e+01
            -52.000000
           -52.000000
          -52.000000
      -5.200000e+01
            -52.000000
           -52.000000
          -52.000000
      -5.200000e+01
            -52.000000
           -52.000000
          -52.000000
    
    
      mean
       9.160567e+07
          74524.982143
      -4061161.758205
      -18304.689283
      -5.051321e+08
       -7603719.178571
      -4147655.094432
       -20643.333663
      -3.373641e+09
      -36808132.767857
      -3666362.056107
       -42376.793559
      -3.327056e+09
      -31426773.834821
      -3804590.423466
       -29025.715724
    
    
      std
      -1.603533e+08
       -5406764.273350
      -1618317.125137
      -19830.220933
      -7.159395e+08
      -12883083.536642
      -1569686.264484
       -25174.014670
      -1.874185e+09
      -22761707.482165
      -1522252.920157
       -30145.025223
      -1.554493e+09
      -15425890.091613
      -1602577.047234
       -19292.470167
    
    
      min
       3.823331e+08
        9378632.750000
             0.000000
       22302.068686
       1.215520e+09
       20742859.000000
             0.000000
        22709.714106
       0.000000e+00
              0.000000
         20266.891924
         5965.814005
       0.000000e+00
              0.000000
        218008.899588
         4024.187474
    
    
      25%
       2.387171e+08
        6330557.437500
      -4194017.424574
       -1511.915397
      -5.778795e+06
         755407.062500
      -4202539.132652
        -1039.978464
      -1.904371e+09
      -18989405.625000
      -3815734.235083
       -21753.060392
      -2.172337e+09
      -17971034.187500
      -4219773.376121
       -15959.627871
    
    
      50%
       8.880562e+07
        -190636.000000
      -4574191.429087
      -18499.407759
      -4.143155e+08
       -6934196.375000
      -4664886.311245
       -14429.145909
      -3.429784e+09
      -33118396.125000
      -4180971.192237
       -38970.849494
      -3.478431e+09
      -31447243.125000
      -4406834.832800
       -30190.599009
    
    
      75%
       2.835360e+07
       -3909092.812500
      -4986291.515469
      -30188.766977
      -8.355635e+08
      -14812328.625000
      -5032462.009876
       -32851.642817
      -4.576160e+09
      -51285987.125000
      -4581666.947618
       -56118.123489
      -4.471933e+09
      -42422963.500000
      -4554812.161786
       -40418.875428
    
    
      max
      -5.056405e+08
      -15772208.000000
      -5508030.187789
      -71767.455048
      -2.995128e+09
      -43548365.000000
      -5568131.421848
      -109680.219475
      -7.532141e+09
      -92935277.000000
      -4947226.463877
      -159337.533306
      -6.141837e+09
      -66472843.250000
      -4989668.123093
      -100752.339516

Looking at the most significant features



In [156]:

    
hc = mia.analysis.normalize_data_frame(hologic_cluster)
hc.columns = hologic_cluster.columns
hc['BIRADS'] = hologic_meta.BIRADS
pd.tools.plotting.radviz(hc, 'BIRADS')









    Out[156]:





<matplotlib.axes._subplots.AxesSubplot at 0x129410b90>



In [73]:

    
mapping['class'] = hologic_meta['BIRADS']
mia.io_tools.dump_mapping_to_json(mapping, [0,1], '../mapping_viz/data.json')



In [49]:

    
mapping.to_csv('../2015-03-28-real-texture-cluster-mapping.csv')

Including the Synthetic Data



In [109]:

    
phantoms = pd.DataFrame.from_csv('../2015-03-28-phantom-texture-cluster.csv')
phantoms.head()









    Out[109]:






  
    
      
      contrast_cluster_1
      dissimilarity_cluster_1
      homogeneity_cluster_1
      energy_cluster_1
      contrast_cluster_2
      dissimilarity_cluster_2
      homogeneity_cluster_2
      energy_cluster_2
      contrast_cluster_3
      dissimilarity_cluster_3
      homogeneity_cluster_3
      energy_cluster_3
      contrast_cluster_4
      dissimilarity_cluster_4
      homogeneity_cluster_4
      energy_cluster_4
    
  
  
    
      test_Mix_DPerc0_c_0.dcm
       7.805321e+07
       337040.75
       3664686.896810
       49792.877875
       2.137532e+08
       899534.25
       3659636.929234
       37305.672427
       3.012525e+08
       1239525.25
       3656372.640303
       60874.139738
       2.190925e+08
        934585.25
       3637426.757401
        91539.044083
    
    
      test_Mix_DPerc0_c_1.dcm
       1.123977e+08
       483231.25
       3662776.650960
       58883.868513
       2.284522e+08
       955230.00
       3658457.557997
       37682.485071
       4.226799e+08
       1717695.00
       3655296.549928
       41519.417587
       3.340117e+08
       1389453.25
       3637492.324049
        94920.560177
    
    
      test_Mix_DPerc0_c_10.dcm
       1.746862e+07
        90670.75
       3665781.117732
       49048.932066
       7.524988e+07
       337097.25
       3662244.233806
       53688.129002
       2.188807e+08
        914860.75
       3656727.883353
       31368.812801
       1.995978e+08
        864599.25
       3635457.378337
        92797.242762
    
    
      test_Mix_DPerc0_c_11.dcm
       8.080798e+06
        49274.25
       3666552.061988
       46708.531551
       5.140643e+07
       240891.50
       3662899.062626
       41445.544031
       1.732943e+08
        735558.00
       3656810.989204
       40871.497863
       1.718443e+08
        756757.25
       3634511.683852
       101160.576150
    
    
      test_Mix_DPerc0_c_12.dcm
       1.394344e+07
        77445.00
       3665783.596805
       47152.382667
       7.345143e+07
       331874.00
       3661879.744782
       41461.886764
       2.239596e+08
        936619.50
       3656330.531212
       57281.910172
       2.009572e+08
        869104.50
       3635644.891045
        98646.047275



In [117]:

    
phantom_meta = mia.analysis.create_synthetic_meta_data(phantoms, '/Volumes/Seagate/2015-03-26/synthetic_meta_data_cleaned.csv')
#replace BIRADS inspecific BIRADS classes
phantom_meta.BIRADS.replace('3 or 4', 4, inplace=True)
phantom_meta.BIRADS.replace(re.compile(r'2 \([a-z]+\)'), 2, inplace=True)
phantom_meta.BIRADS = phantom_meta.BIRADS.astype(float)
phantom_meta.head()









    Out[117]:






  
    
      
      Vol
      CmprTh
      SkTh
      LigThCrs
      LigThFn
      #cmprts
      #cmprts.1
      Dperc
      VBD
      VBD.1
      BIRADS
      min_speed
      max_speed
      min_ratio
      max_ratio
      phantom_name
    
  
  
    
      test_Mix_DPerc0_c_0.dcm
       436
       5
       0.5
       400
       200
       333
       1000
       0
       24
       21
       1
       0.5
       2
       0.5
       2
       test_Mix_DPerc0_c
    
    
      test_Mix_DPerc0_c_1.dcm
       436
       5
       0.5
       400
       200
       333
       1000
       0
       24
       21
       1
       0.5
       2
       0.5
       2
       test_Mix_DPerc0_c
    
    
      test_Mix_DPerc0_c_10.dcm
       436
       5
       0.5
       400
       200
       333
       1000
       0
       24
       21
       1
       0.5
       2
       0.5
       2
       test_Mix_DPerc0_c
    
    
      test_Mix_DPerc0_c_11.dcm
       436
       5
       0.5
       400
       200
       333
       1000
       0
       24
       21
       1
       0.5
       2
       0.5
       2
       test_Mix_DPerc0_c
    
    
      test_Mix_DPerc0_c_12.dcm
       436
       5
       0.5
       400
       200
       333
       1000
       0
       24
       21
       1
       0.5
       2
       0.5
       2
       test_Mix_DPerc0_c

Select a random subset of the phantoms for use with the t-SNE algorithm



In [122]:

    
import random
group = phantoms.groupby(phantom_meta.phantom_name)

def select_random(x):
    return x.ix[random.sample(x.index, 1)]
random_synthetic_features = group.apply(select_random)

random_synthetic_features.reset_index(drop=True, level=0, inplace=True)
random_synthetic_features









    Out[122]:






  
    
      
      contrast_cluster_1
      dissimilarity_cluster_1
      homogeneity_cluster_1
      energy_cluster_1
      contrast_cluster_2
      dissimilarity_cluster_2
      homogeneity_cluster_2
      energy_cluster_2
      contrast_cluster_3
      dissimilarity_cluster_3
      homogeneity_cluster_3
      energy_cluster_3
      contrast_cluster_4
      dissimilarity_cluster_4
      homogeneity_cluster_4
      energy_cluster_4
    
  
  
    
      test_Mix_DPerc0_c_6.dcm
       1.289264e+08
       551006.50
       3662803.274767
       53445.460117
       2.306973e+08
       962567.50
       3659318.739485
       42593.927039
       3.455569e+08
       1411829.00
       3657185.743758
       57941.727834
       2.559077e+08
       1080401.25
       3637437.591758
        76573.368273
    
    
      test_Mix_DPerc10_c_3.dcm
       1.382765e+08
       588710.75
       3662005.961962
       28960.985685
       2.301554e+08
       956916.00
       3658477.753351
       39581.689743
       1.096317e+09
       4418884.25
       3639789.584119
       55585.581781
       9.496640e+08
       3818397.00
       3642493.255595
        83856.953167
    
    
      test_Mix_DPerc20_c_3.dcm
       5.541825e+07
       240940.50
       3665167.187222
       52121.902079
       1.394425e+08
       584554.75
       3660939.875182
       41342.833212
       8.630619e+08
       3480343.00
       3643674.748048
       97355.501648
       7.266742e+08
       2927117.25
       3644305.585281
        75286.592959
    
    
      test_Mix_DPerc35_c_7.dcm
       4.736188e+07
       210227.75
       3663797.672162
       45032.226662
       2.066614e+08
       854896.25
       3658942.095173
       43871.778160
       6.274249e+08
       2529067.00
       3647462.072949
       92228.524380
       4.550624e+08
       1828349.75
       3652664.802134
        79720.458224
    
    
      test_Mix_DPerc5_c_6.dcm
       4.005631e+07
       180438.50
       3664749.354619
       38294.706842
       1.566761e+08
       660404.50
       3660755.434569
       42336.236493
       3.231703e+08
       1323028.00
       3656627.025249
       38625.867028
       2.629593e+08
       1100958.50
       3641405.868696
       103933.893189
    
    
      test_Mix_DPerc75_c_10.dcm
       3.715249e+07
       163717.50
       3665041.626076
       49269.152625
       1.884157e+08
       777045.25
       3660657.189942
       44628.508039
       4.566236e+08
       1843255.50
       3650387.653658
       96029.157381
       3.009384e+08
       1202946.25
       3658487.012120
        75580.943172



In [132]:

    
features = pd.concat([hologic_cluster, random_synthetic_features])

class_labels = pd.concat([hologic_meta.BIRADS, phantom_meta.loc[random_synthetic_features.index].BIRADS])
class_labels.shape









    Out[132]:





(366,)



In [213]:

    
columns = filter(lambda x: 'homogeneity' not in x, features.columns)



In [203]:

    
joint_mapping = mia.analysis.tSNE(features[columns], verbose=2, learning_rate=300)









    



[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 366 / 366
[t-SNE] Mean sigma: 0.575752
[t-SNE] Iteration 10: error = 16.4174517, gradient norm = 0.1587858
[t-SNE] Iteration 20: error = 13.4212003, gradient norm = 0.1597509
[t-SNE] Iteration 30: error = 13.1282000, gradient norm = 0.1497105
[t-SNE] Iteration 40: error = 12.6614001, gradient norm = 0.1495442
[t-SNE] Iteration 50: error = 12.9267517, gradient norm = 0.1458217
[t-SNE] Iteration 60: error = 12.4563635, gradient norm = 0.1550798
[t-SNE] Iteration 70: error = 12.5049876, gradient norm = 0.1562701
[t-SNE] Iteration 80: error = 12.7761028, gradient norm = 0.1421605
[t-SNE] Iteration 83: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 83 iterations with early exaggeration: 13.093702
[t-SNE] Iteration 90: error = 0.8760613, gradient norm = 0.0257260
[t-SNE] Iteration 100: error = 0.5608583, gradient norm = 0.0093508
[t-SNE] Iteration 110: error = 0.5177783, gradient norm = 0.0030459
[t-SNE] Iteration 120: error = 0.5073383, gradient norm = 0.0012317
[t-SNE] Iteration 130: error = 0.5029114, gradient norm = 0.0007389
[t-SNE] Iteration 140: error = 0.5006705, gradient norm = 0.0006484
[t-SNE] Iteration 150: error = 0.4994553, gradient norm = 0.0006244
[t-SNE] Iteration 160: error = 0.4987725, gradient norm = 0.0006113
[t-SNE] Iteration 170: error = 0.4983793, gradient norm = 0.0006043
[t-SNE] Iteration 180: error = 0.4981494, gradient norm = 0.0006002
[t-SNE] Iteration 190: error = 0.4980136, gradient norm = 0.0005978
[t-SNE] Iteration 200: error = 0.4979330, gradient norm = 0.0005963
[t-SNE] Iteration 210: error = 0.4978849, gradient norm = 0.0005955
[t-SNE] Iteration 220: error = 0.4978563, gradient norm = 0.0005950
[t-SNE] Iteration 230: error = 0.4978391, gradient norm = 0.0005947
[t-SNE] Iteration 240: error = 0.4978289, gradient norm = 0.0005945
[t-SNE] Iteration 250: error = 0.4978227, gradient norm = 0.0005944
[t-SNE] Iteration 260: error = 0.4978191, gradient norm = 0.0005943
[t-SNE] Iteration 270: error = 0.4978169, gradient norm = 0.0005943
[t-SNE] Iteration 280: error = 0.4978155, gradient norm = 0.0005943
[t-SNE] Iteration 281: error difference 0.000000. Finished.
[t-SNE] Error after 281 iterations: 0.497815

Plotting both reals and phantoms together as a single scatter plot:



In [212]:

    
%matplotlib qt
joint_mapping['BIRADS'] = class_labels
hol_map = joint_mapping[:-6]
hol_map.shape

syn_map = joint_mapping[-6:]
syn_map.head()

ax = mia.plotting.plot_scatter_2d(hol_map, [0,1], 'BIRADS')
ax = mia.plotting.plot_scatter_2d(syn_map, [0,1], 'BIRADS', ax=ax, marker='^', s=50)



In [198]:

    
random_synthetic_features.describe() - hologic_cluster.describe()









    Out[198]:






  
    
      
      contrast_cluster_1
      dissimilarity_cluster_1
      homogeneity_cluster_1
      energy_cluster_1
      contrast_cluster_2
      dissimilarity_cluster_2
      homogeneity_cluster_2
      energy_cluster_2
      contrast_cluster_3
      dissimilarity_cluster_3
      homogeneity_cluster_3
      energy_cluster_3
      contrast_cluster_4
      dissimilarity_cluster_4
      homogeneity_cluster_4
      energy_cluster_4
    
  
  
    
      count
      -3.540000e+02
           -354.000000
          -354.000000
        -354.000000
      -3.540000e+02
           -354.000000
          -354.000000
         -354.000000
      -3.540000e+02
      -3.540000e+02
          -354.000000
         -354.000000
      -3.540000e+02
           -354.000000
          -354.000000
        -354.000000
    
    
      mean
      -3.471572e+08
      -10930283.923611
      -6402789.438092
      -28498.201271
      -1.785893e+09
      -30031024.609028
      -6284383.394894
       -40080.248296
      -3.131587e+09
      -3.897421e+07
      -6335356.628016
        -5954.835599
      -1.915045e+09
      -19744053.645139
      -6784295.703629
       24317.489556
    
    
      std
      -2.303295e+08
       -6669743.006260
      -2302256.699216
      -14454.358176
      -1.226421e+09
      -19925311.553204
      -2188673.993117
       -34683.839547
      -2.211510e+09
      -2.823480e+07
      -2137806.775196
       -13787.374858
      -1.582827e+09
      -16642677.608562
      -2349208.283240
       -9909.520978
    
    
      min
      -3.780830e+07
       -1970660.750000
      -3611006.038227
        -224.249771
      -1.700863e+08
       -4646772.750000
      -3666852.205031
         4265.839270
      -3.668226e+08
      -6.096884e+06
      -3720603.120880
         9479.079436
      -1.833975e+08
       -2659986.250000
      -4314395.676846
       50184.657298
    
    
      25%
      -1.680347e+08
       -5644954.187500
      -4320432.937471
      -15904.433861
      -7.481320e+08
      -12961745.250000
      -4351030.258263
       -14096.681674
      -1.299226e+09
      -1.544254e+07
      -4440650.296657
         4739.420079
      -7.914160e+08
       -7837961.125000
      -4652986.150354
       30268.965165
    
    
      50%
      -2.893096e+08
       -8969151.625000
      -4598166.617793
      -19669.673215
      -1.447143e+09
      -24490327.000000
      -4578058.258306
       -27598.871381
      -2.123659e+09
      -2.649811e+07
      -4631287.500777
        11405.435315
      -1.104483e+09
      -11165600.875000
      -4732512.514484
       25266.269413
    
    
      75%
      -4.722470e+08
      -15021015.187500
      -8862648.149257
      -35709.537162
      -2.474048e+09
      -41708742.687500
      -8590099.385389
       -55294.361379
      -4.627798e+09
      -5.631630e+07
      -8655609.223456
        -1732.795363
      -2.869220e+09
      -28466116.000000
      -9350053.244120
       18897.068134
    
    
      max
      -1.346652e+09
      -32273665.250000
      -9721225.033498
      -92768.273617
      -5.757936e+09
      -94930068.000000
      -9590192.566618
      -184062.020468
      -1.034807e+10
      -1.279552e+08
      -9545922.986910
      -139384.968335
      -8.751343e+09
      -82078711.750000
      -9820911.412866
      -49430.650173



In [207]:

    
f = features[filter(lambda x: '4' in x, features[columns].columns)].copy()
f['BIRADS'] = class_labels
mia.plotting.plot_scattermatrix(f, 'BIRADS')

	contrast_cluster_1	dissimilarity_cluster_1	homogeneity_cluster_1	energy_cluster_1	contrast_cluster_2	dissimilarity_cluster_2	homogeneity_cluster_2	energy_cluster_2	contrast_cluster_3	dissimilarity_cluster_3	homogeneity_cluster_3	energy_cluster_3	contrast_cluster_4	dissimilarity_cluster_4	homogeneity_cluster_4	energy_cluster_4
p214-010-60001-cl.png	2.931535e+08	7003850.25	8130257.674942	51371.507991	8.260166e+08	12106920.25	8178692.790514	51247.278008	1.189669e+09	10889315.75	8309986.114009	46728.453963	7.767933e+08	5886436.25	8380532.207002	43973.709887
p214-010-60001-cr.png	1.856295e+08	4623518.75	8241054.817176	52008.002383	6.412453e+08	9141035.75	8250194.492365	57204.728899	1.151640e+09	10339648.50	8311361.858274	48833.499865	7.751633e+08	5625592.50	8402698.450330	47068.058470
p214-010-60001-ml.png	2.225075e+08	5867047.50	8121710.629886	53939.606377	8.938307e+08	12497527.25	8178878.867447	60888.448918	1.785641e+09	16247420.50	8210113.855566	39805.116295	1.237097e+09	9180578.50	8338267.205284	40480.385535
p214-010-60001-mr.png	2.280928e+08	5802305.25	8143748.833719	56363.712378	1.047522e+09	13875836.25	8165493.907820	53059.072536	1.850879e+09	16737395.50	8209670.079957	47579.096741	1.073341e+09	7801042.00	8371460.064432	57173.612063
p214-010-60005-cl.png	1.563874e+08	4584042.25	8163100.805695	37606.734376	6.720121e+08	9797939.50	8198951.000702	36979.949572	1.548208e+09	13951816.50	8220116.309021	32055.018330	1.131708e+09	8562726.25	8306607.852877	55253.097523

	patient_id	side	view	img_name	BIRADS	img_number
p214-010-60001-cl.png	21401060001	c	l	p214-010-60001-cl.png	3	1
p214-010-60001-cr.png	21401060001	c	r	p214-010-60001-cr.png	3	1
p214-010-60001-ml.png	21401060001	m	l	p214-010-60001-ml.png	3	1
p214-010-60001-mr.png	21401060001	m	r	p214-010-60001-mr.png	3	1
p214-010-60005-cl.png	21401060005	c	l	p214-010-60005-cl.png	4	5

	contrast_cluster_1	dissimilarity_cluster_1	homogeneity_cluster_1	energy_cluster_1	contrast_cluster_2	dissimilarity_cluster_2	homogeneity_cluster_2	energy_cluster_2	contrast_cluster_3	dissimilarity_cluster_3	homogeneity_cluster_3	energy_cluster_3	contrast_cluster_4	dissimilarity_cluster_4	homogeneity_cluster_4	energy_cluster_4
count	-5.200000e+01	-52.000000	-52.000000	-52.000000	-5.200000e+01	-52.000000	-52.000000	-52.000000	-5.200000e+01	-52.000000	-52.000000	-52.000000	-5.200000e+01	-52.000000	-52.000000	-52.000000
mean	9.160567e+07	74524.982143	-4061161.758205	-18304.689283	-5.051321e+08	-7603719.178571	-4147655.094432	-20643.333663	-3.373641e+09	-36808132.767857	-3666362.056107	-42376.793559	-3.327056e+09	-31426773.834821	-3804590.423466	-29025.715724
std	-1.603533e+08	-5406764.273350	-1618317.125137	-19830.220933	-7.159395e+08	-12883083.536642	-1569686.264484	-25174.014670	-1.874185e+09	-22761707.482165	-1522252.920157	-30145.025223	-1.554493e+09	-15425890.091613	-1602577.047234	-19292.470167
min	3.823331e+08	9378632.750000	0.000000	22302.068686	1.215520e+09	20742859.000000	0.000000	22709.714106	0.000000e+00	0.000000	20266.891924	5965.814005	0.000000e+00	0.000000	218008.899588	4024.187474
25%	2.387171e+08	6330557.437500	-4194017.424574	-1511.915397	-5.778795e+06	755407.062500	-4202539.132652	-1039.978464	-1.904371e+09	-18989405.625000	-3815734.235083	-21753.060392	-2.172337e+09	-17971034.187500	-4219773.376121	-15959.627871
50%	8.880562e+07	-190636.000000	-4574191.429087	-18499.407759	-4.143155e+08	-6934196.375000	-4664886.311245	-14429.145909	-3.429784e+09	-33118396.125000	-4180971.192237	-38970.849494	-3.478431e+09	-31447243.125000	-4406834.832800	-30190.599009
75%	2.835360e+07	-3909092.812500	-4986291.515469	-30188.766977	-8.355635e+08	-14812328.625000	-5032462.009876	-32851.642817	-4.576160e+09	-51285987.125000	-4581666.947618	-56118.123489	-4.471933e+09	-42422963.500000	-4554812.161786	-40418.875428
max	-5.056405e+08	-15772208.000000	-5508030.187789	-71767.455048	-2.995128e+09	-43548365.000000	-5568131.421848	-109680.219475	-7.532141e+09	-92935277.000000	-4947226.463877	-159337.533306	-6.141837e+09	-66472843.250000	-4989668.123093	-100752.339516

	contrast_cluster_1	dissimilarity_cluster_1	homogeneity_cluster_1	energy_cluster_1	contrast_cluster_2	dissimilarity_cluster_2	homogeneity_cluster_2	energy_cluster_2	contrast_cluster_3	dissimilarity_cluster_3	homogeneity_cluster_3	energy_cluster_3	contrast_cluster_4	dissimilarity_cluster_4	homogeneity_cluster_4	energy_cluster_4
test_Mix_DPerc0_c_0.dcm	7.805321e+07	337040.75	3664686.896810	49792.877875	2.137532e+08	899534.25	3659636.929234	37305.672427	3.012525e+08	1239525.25	3656372.640303	60874.139738	2.190925e+08	934585.25	3637426.757401	91539.044083
test_Mix_DPerc0_c_1.dcm	1.123977e+08	483231.25	3662776.650960	58883.868513	2.284522e+08	955230.00	3658457.557997	37682.485071	4.226799e+08	1717695.00	3655296.549928	41519.417587	3.340117e+08	1389453.25	3637492.324049	94920.560177
test_Mix_DPerc0_c_10.dcm	1.746862e+07	90670.75	3665781.117732	49048.932066	7.524988e+07	337097.25	3662244.233806	53688.129002	2.188807e+08	914860.75	3656727.883353	31368.812801	1.995978e+08	864599.25	3635457.378337	92797.242762
test_Mix_DPerc0_c_11.dcm	8.080798e+06	49274.25	3666552.061988	46708.531551	5.140643e+07	240891.50	3662899.062626	41445.544031	1.732943e+08	735558.00	3656810.989204	40871.497863	1.718443e+08	756757.25	3634511.683852	101160.576150
test_Mix_DPerc0_c_12.dcm	1.394344e+07	77445.00	3665783.596805	47152.382667	7.345143e+07	331874.00	3661879.744782	41461.886764	2.239596e+08	936619.50	3656330.531212	57281.910172	2.009572e+08	869104.50	3635644.891045	98646.047275

	Vol	CmprTh	SkTh	LigThCrs	LigThFn	#cmprts	#cmprts.1	VBD	VBD.1	BIRADS	min_speed	max_speed	min_ratio	max_ratio	phantom_name
test_Mix_DPerc0_c_0.dcm	436	5	0.5	400	200	333	1000	24	21	1	0.5	2	0.5	2	test_Mix_DPerc0_c
test_Mix_DPerc0_c_1.dcm	436	5	0.5	400	200	333	1000	24	21	1	0.5	2	0.5	2	test_Mix_DPerc0_c
test_Mix_DPerc0_c_10.dcm	436	5	0.5	400	200	333	1000	24	21	1	0.5	2	0.5	2	test_Mix_DPerc0_c
test_Mix_DPerc0_c_11.dcm	436	5	0.5	400	200	333	1000	24	21	1	0.5	2	0.5	2	test_Mix_DPerc0_c
test_Mix_DPerc0_c_12.dcm	436	5	0.5	400	200	333	1000	24	21	1	0.5	2	0.5	2	test_Mix_DPerc0_c

	contrast_cluster_1	dissimilarity_cluster_1	homogeneity_cluster_1	energy_cluster_1	contrast_cluster_2	dissimilarity_cluster_2	homogeneity_cluster_2	energy_cluster_2	contrast_cluster_3	dissimilarity_cluster_3	homogeneity_cluster_3	energy_cluster_3	contrast_cluster_4	dissimilarity_cluster_4	homogeneity_cluster_4	energy_cluster_4
test_Mix_DPerc0_c_6.dcm	1.289264e+08	551006.50	3662803.274767	53445.460117	2.306973e+08	962567.50	3659318.739485	42593.927039	3.455569e+08	1411829.00	3657185.743758	57941.727834	2.559077e+08	1080401.25	3637437.591758	76573.368273
test_Mix_DPerc10_c_3.dcm	1.382765e+08	588710.75	3662005.961962	28960.985685	2.301554e+08	956916.00	3658477.753351	39581.689743	1.096317e+09	4418884.25	3639789.584119	55585.581781	9.496640e+08	3818397.00	3642493.255595	83856.953167
test_Mix_DPerc20_c_3.dcm	5.541825e+07	240940.50	3665167.187222	52121.902079	1.394425e+08	584554.75	3660939.875182	41342.833212	8.630619e+08	3480343.00	3643674.748048	97355.501648	7.266742e+08	2927117.25	3644305.585281	75286.592959
test_Mix_DPerc35_c_7.dcm	4.736188e+07	210227.75	3663797.672162	45032.226662	2.066614e+08	854896.25	3658942.095173	43871.778160	6.274249e+08	2529067.00	3647462.072949	92228.524380	4.550624e+08	1828349.75	3652664.802134	79720.458224
test_Mix_DPerc5_c_6.dcm	4.005631e+07	180438.50	3664749.354619	38294.706842	1.566761e+08	660404.50	3660755.434569	42336.236493	3.231703e+08	1323028.00	3656627.025249	38625.867028	2.629593e+08	1100958.50	3641405.868696	103933.893189
test_Mix_DPerc75_c_10.dcm	3.715249e+07	163717.50	3665041.626076	49269.152625	1.884157e+08	777045.25	3660657.189942	44628.508039	4.566236e+08	1843255.50	3650387.653658	96029.157381	3.009384e+08	1202946.25	3658487.012120	75580.943172

	contrast_cluster_1	dissimilarity_cluster_1	homogeneity_cluster_1	energy_cluster_1	contrast_cluster_2	dissimilarity_cluster_2	homogeneity_cluster_2	energy_cluster_2	contrast_cluster_3	dissimilarity_cluster_3	homogeneity_cluster_3	energy_cluster_3	contrast_cluster_4	dissimilarity_cluster_4	homogeneity_cluster_4	energy_cluster_4
count	-3.540000e+02	-354.000000	-354.000000	-354.000000	-3.540000e+02	-354.000000	-354.000000	-354.000000	-3.540000e+02	-3.540000e+02	-354.000000	-354.000000	-3.540000e+02	-354.000000	-354.000000	-354.000000
mean	-3.471572e+08	-10930283.923611	-6402789.438092	-28498.201271	-1.785893e+09	-30031024.609028	-6284383.394894	-40080.248296	-3.131587e+09	-3.897421e+07	-6335356.628016	-5954.835599	-1.915045e+09	-19744053.645139	-6784295.703629	24317.489556
std	-2.303295e+08	-6669743.006260	-2302256.699216	-14454.358176	-1.226421e+09	-19925311.553204	-2188673.993117	-34683.839547	-2.211510e+09	-2.823480e+07	-2137806.775196	-13787.374858	-1.582827e+09	-16642677.608562	-2349208.283240	-9909.520978
min	-3.780830e+07	-1970660.750000	-3611006.038227	-224.249771	-1.700863e+08	-4646772.750000	-3666852.205031	4265.839270	-3.668226e+08	-6.096884e+06	-3720603.120880	9479.079436	-1.833975e+08	-2659986.250000	-4314395.676846	50184.657298
25%	-1.680347e+08	-5644954.187500	-4320432.937471	-15904.433861	-7.481320e+08	-12961745.250000	-4351030.258263	-14096.681674	-1.299226e+09	-1.544254e+07	-4440650.296657	4739.420079	-7.914160e+08	-7837961.125000	-4652986.150354	30268.965165
50%	-2.893096e+08	-8969151.625000	-4598166.617793	-19669.673215	-1.447143e+09	-24490327.000000	-4578058.258306	-27598.871381	-2.123659e+09	-2.649811e+07	-4631287.500777	11405.435315	-1.104483e+09	-11165600.875000	-4732512.514484	25266.269413
75%	-4.722470e+08	-15021015.187500	-8862648.149257	-35709.537162	-2.474048e+09	-41708742.687500	-8590099.385389	-55294.361379	-4.627798e+09	-5.631630e+07	-8655609.223456	-1732.795363	-2.869220e+09	-28466116.000000	-9350053.244120	18897.068134
max	-1.346652e+09	-32273665.250000	-9721225.033498	-92768.273617	-5.757936e+09	-94930068.000000	-9590192.566618	-184062.020468	-1.034807e+10	-1.279552e+08	-9545922.986910	-139384.968335	-8.751343e+09	-82078711.750000	-9820911.412866	-49430.650173