In [1]:
    
%matplotlib qt
import mia
import pandas as pd
import numpy as np
import re
import random
import matplotlib.pyplot as plt
from pandas.tools import plotting
    
In [17]:
    
batch1 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch1_blobs.csv')
batch2 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch2_blobs.csv')
synthetics = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/synthetic_blobs.csv')
synthetics.index = synthetics.img_name
hologic = pd.concat([batch1, batch2])
hologic.index = hologic.img_name
hologic.head()
    
    
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-fbe373e01dd1> in <module>()
      2 batch2 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch2_blobs.csv')
      3 synthetics = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-04-11/phantom-blobs1.csv')
----> 4 synthetics.index = synthetics.img_name
      5 
      6 hologic = pd.concat([batch1, batch2])
/Users/samuel/git/major-project/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
   1945                 return self[name]
   1946             raise AttributeError("'%s' object has no attribute '%s'" %
-> 1947                                  (type(self).__name__, name))
   1948 
   1949     def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'img_name'
In [3]:
    
hologic_meta_path = '/Volumes/Seagate/mmp_data/2015-03-26/BIRADS.csv'
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, hologic_meta_path)
hologic_meta.head()
    
    Out[3]:
  
    
       
      patient_id 
      side 
      view 
      img_name 
      BIRADS 
      img_number 
     
    
      img_name 
       
       
       
       
       
       
     
  
  
    
      p214-010-60001-cl.png 
       21401060001 
       c 
       l 
       p214-010-60001-cl.png 
       3 
       1 
     
    
      p214-010-60001-cl.png 
       21401060001 
       c 
       l 
       p214-010-60001-cl.png 
       3 
       1 
     
    
      p214-010-60001-cl.png 
       21401060001 
       c 
       l 
       p214-010-60001-cl.png 
       3 
       1 
     
    
      p214-010-60001-cl.png 
       21401060001 
       c 
       l 
       p214-010-60001-cl.png 
       3 
       1 
     
    
      p214-010-60001-cl.png 
       21401060001 
       c 
       l 
       p214-010-60001-cl.png 
       3 
       1 
     
  
In [4]:
    
synthetic_meta_path = '/Volumes/Seagate/mmp_data/2015-03-26/synthetic_meta_data_cleaned.csv'
synthetic_meta = mia.analysis.create_synthetic_meta_data(synthetics, synthetic_meta_path)
synthetic_meta.head()
    
    Out[4]:
  
    
       
      Vol 
      CmprTh 
      SkTh 
      LigThCrs 
      LigThFn 
      #cmprts 
      #cmprts.1 
      Dperc 
      VBD 
      VBD.1 
      BIRADS 
      min_speed 
      max_speed 
      min_ratio 
      max_ratio 
      phantom_name 
     
    
      img_name 
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
     
  
  
    
      test_Mix_DPerc0_c_0.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
       0 
       24 
       21 
       1 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_0.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
       0 
       24 
       21 
       1 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_0.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
       0 
       24 
       21 
       1 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_0.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
       0 
       24 
       21 
       1 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_0.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
       0 
       24 
       21 
       1 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc0_c 
     
  
In [5]:
    
synth_class_labels = synthetic_meta['BIRADS']
hologic_class_labels = hologic_meta['BIRADS']
class_labels = pd.concat([hologic_class_labels, synth_class_labels])
#replace BIRADS inspecific BIRADS classes
class_labels.replace('3 or 4', 4, inplace=True)
class_labels.replace(re.compile(r'2 \([a-z]+\)'), 2, inplace=True)
class_labels = class_labels.astype(float)
    
Compute shape based features from raw blob detections
In [6]:
    
hologic_blob_features = mia.analysis.features_from_blobs(hologic)
hologic_blob_features.describe()
    
    Out[6]:
  
    
       
      blob_count 
      avg_radius 
      std_radius 
      min_radius 
      max_radius 
      small_radius_count 
      med_radius_count 
      large_radius_count 
      density 
      lower_radius_qt 
      upper_radius_qt 
      upper_dist_count 
     
  
  
    
      count 
       360.000000 
       360.000000 
       360.000000 
       360 
       360.000000 
       360.000000 
       360.000000 
       360.000000 
       360.000000 
       360.000000 
       360.000000 
       360.000000 
     
    
      mean 
       251.777778 
        17.679679 
        20.295388 
         8 
       154.272791 
       243.975000 
         3.927778 
         3.875000 
        42.546985 
         8.050626 
        17.575128 
        66.580556 
     
    
      std 
       182.143063 
         3.290097 
         7.305919 
         0 
        40.349384 
       179.730972 
         5.394241 
         2.559256 
         6.866999 
         0.392667 
         3.161404 
        57.047643 
     
    
      min 
        26.000000 
        11.633776 
         5.269543 
         8 
        45.254834 
        22.000000 
         0.000000 
         1.000000 
        32.678451 
         8.000000 
        11.313708 
         5.000000 
     
    
      25% 
       113.000000 
        15.375540 
        14.601573 
         8 
       128.000000 
       109.000000 
         1.000000 
         2.000000 
        37.506466 
         8.000000 
        16.000000 
        27.000000 
     
    
      50% 
       203.500000 
        17.154971 
        20.158492 
         8 
       181.019336 
       196.000000 
         3.000000 
         3.000000 
        41.412974 
         8.000000 
        16.000000 
        49.000000 
     
    
      75% 
       328.250000 
        19.297899 
        25.607779 
         8 
       181.019336 
       314.000000 
         4.000000 
         5.000000 
        46.069529 
         8.000000 
        16.000000 
        83.000000 
     
    
      max 
       978.000000 
        34.201332 
        45.057882 
         8 
       181.019336 
       958.000000 
        44.000000 
        14.000000 
        79.518497 
        11.313708 
        32.000000 
       337.000000 
     
  
In [7]:
    
synthetic_blob_features = mia.analysis.features_from_blobs(synthetics)
synthetic_blob_features.describe()
    
    Out[7]:
  
    
       
      blob_count 
      avg_radius 
      std_radius 
      min_radius 
      max_radius 
      small_radius_count 
      med_radius_count 
      large_radius_count 
      density 
      lower_radius_qt 
      upper_radius_qt 
      upper_dist_count 
     
  
  
    
      count 
        60.000000 
       60.000000 
       60.000000 
       60 
        60.000000 
        60.000000 
       60.000000 
       60.000000 
       60.000000 
       60.000000 
       60.000000 
       60.000000 
     
    
      mean 
        80.733333 
       21.982949 
       24.371230 
        8 
       130.588167 
        74.033333 
        1.666667 
        5.033333 
       35.482570 
        8.358985 
       22.714521 
       22.316667 
     
    
      std 
        15.154422 
        2.342589 
        2.647525 
        0 
        26.605668 
        15.483142 
        1.445820 
        2.863959 
        4.033106 
        1.004434 
        4.996060 
        6.474177 
     
    
      min 
        40.000000 
       18.645281 
       19.316414 
        8 
        90.509668 
        33.000000 
        0.000000 
        2.000000 
       29.205906 
        8.000000 
       16.000000 
        8.000000 
     
    
      25% 
        72.750000 
       20.293316 
       22.697181 
        8 
       128.000000 
        66.000000 
        1.000000 
        3.750000 
       33.215826 
        8.000000 
       22.627417 
       19.000000 
     
    
      50% 
        84.500000 
       21.347224 
       23.683670 
        8 
       128.000000 
        78.000000 
        1.000000 
        4.000000 
       34.529411 
        8.000000 
       22.627417 
       22.000000 
     
    
      75% 
        92.000000 
       23.621765 
       26.496192 
        8 
       128.000000 
        84.250000 
        2.250000 
        5.000000 
       37.044697 
        8.000000 
       22.627417 
       28.000000 
     
    
      max 
       109.000000 
       28.319596 
       31.413381 
        8 
       181.019336 
       103.000000 
        6.000000 
       13.000000 
       53.889814 
       11.313708 
       35.313708 
       33.000000 
     
  
Select a subset of synthetics to be used. This takes a random sample from each group DPerc*.
In [8]:
    
syn_feature_meta = mia.analysis.remove_duplicate_index(synthetic_meta)
synthetic_blob_features['phantom_name'] = syn_feature_meta.phantom_name.tolist()
random_synthetic_features = synthetic_blob_features
group = synthetic_blob_features.groupby('phantom_name')
def select_random(x):
    return x.ix[random.sample(x.index, 1)]
random_synthetic_features = group.apply(select_random)
random_synthetic_features.drop('phantom_name', axis=1, inplace=True)
random_synthetic_features.reset_index(drop=True, level=0, inplace=True)
random_synthetic_features
    
    Out[8]:
  
    
       
      blob_count 
      avg_radius 
      std_radius 
      min_radius 
      max_radius 
      small_radius_count 
      med_radius_count 
      large_radius_count 
      density 
      lower_radius_qt 
      upper_radius_qt 
      upper_dist_count 
     
    
      img_name 
       
       
       
       
       
       
       
       
       
       
       
       
     
  
  
    
      test_Mix_DPerc0_c_2.dcm 
       71 
       27.085040 
       23.624541 
       8 
        90.509668 
       54 
       5 
       12 
       40.916216 
       11.313708 
       32.000000 
       24 
     
    
      test_Mix_DPerc10_c_3.dcm 
       89 
       21.499210 
       22.294951 
       8 
       128.000000 
       82 
       2 
        5 
       32.310669 
        8.000000 
       22.627417 
       29 
     
    
      test_Mix_DPerc20_c_4.dcm 
       82 
       20.877556 
       27.635799 
       8 
       181.019336 
       79 
       0 
        3 
       34.492907 
        8.000000 
       22.627417 
       22 
     
    
      test_Mix_DPerc35_c_9.dcm 
       53 
       25.991843 
       28.518937 
       8 
       128.000000 
       47 
       1 
        5 
       41.964861 
       11.313708 
       22.627417 
       13 
     
    
      test_Mix_DPerc5_c_6.dcm 
       76 
       25.187286 
       24.186133 
       8 
       128.000000 
       69 
       4 
        3 
       37.381472 
        8.000000 
       32.000000 
       20 
     
    
      test_Mix_DPerc75_c_6.dcm 
       62 
       22.021813 
       27.265855 
       8 
       128.000000 
       57 
       1 
        4 
       35.358633 
        8.828427 
       16.000000 
       15 
     
  
Join datasets. Results will include both hologic and synthetic images
In [9]:
    
blob_features = pd.concat([hologic_blob_features, random_synthetic_features])
blob_features.describe()
    
    Out[9]:
  
    
       
      blob_count 
      avg_radius 
      std_radius 
      min_radius 
      max_radius 
      small_radius_count 
      med_radius_count 
      large_radius_count 
      density 
      lower_radius_qt 
      upper_radius_qt 
      upper_dist_count 
     
  
  
    
      count 
       366.000000 
       366.000000 
       366.000000 
       366 
       366.000000 
       366.000000 
       366.000000 
       366.000000 
       366.000000 
       366.000000 
       366.000000 
       366.000000 
     
    
      mean 
       248.833333 
        17.779638 
        20.382147 
         8 
       153.884519 
       241.035519 
         3.898907 
         3.898907 
        42.457211 
         8.070167 
        17.691061 
        65.825137 
     
    
      std 
       182.084376 
         3.367862 
         7.282877 
         0 
        40.271853 
       179.707621 
         5.359222 
         2.575575 
         6.860038 
         0.459665 
         3.342671 
        56.883606 
     
    
      min 
        26.000000 
        11.633776 
         5.269543 
         8 
        45.254834 
        22.000000 
         0.000000 
         1.000000 
        32.310669 
         8.000000 
        11.313708 
         5.000000 
     
    
      25% 
       111.250000 
        15.385483 
        14.632588 
         8 
       128.000000 
       105.000000 
         1.000000 
         2.000000 
        37.481327 
         8.000000 
        16.000000 
        27.000000 
     
    
      50% 
       200.000000 
        17.227884 
        20.350684 
         8 
       181.019336 
       192.500000 
         3.000000 
         3.000000 
        41.291876 
         8.000000 
        16.000000 
        48.000000 
     
    
      75% 
       326.750000 
        19.356498 
        25.621973 
         8 
       181.019336 
       310.750000 
         4.000000 
         5.000000 
        45.941928 
         8.000000 
        18.485281 
        82.000000 
     
    
      max 
       978.000000 
        34.201332 
        45.057882 
         8 
       181.019336 
       958.000000 
        44.000000 
        14.000000 
        79.518497 
        11.313708 
        32.000000 
       337.000000 
     
  
In [20]:
    
bf = blob_features.copy()
bf = bf.reset_index()
bf.drop('img_name', axis=1, inplace=True)
bf.to_csv('/Users/samuel/Downloads/blobs_features.csv', header=False)
    
Filter the columns that we want to run with
In [13]:
    
columns = filter(lambda c: c not in [], blob_features.columns)
selected_features = blob_features[columns]
columns
    
    Out[13]:
['blob_count',
 'avg_radius',
 'std_radius',
 'min_radius',
 'max_radius',
 'small_radius_count',
 'med_radius_count',
 'large_radius_count',
 'density',
 'lower_radius_qt',
 'upper_radius_qt',
 'upper_dist_count']
Run t-SNE on features to obtain mapping
In [15]:
    
mapping = mia.analysis.tSNE(selected_features, perplexity=45, learning_rate=400, verbose=2)
    
    
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 366 / 366
[t-SNE] Mean sigma: 1.149708
[t-SNE] Iteration 10: error = 14.6560135, gradient norm = 0.1483312
[t-SNE] Iteration 20: error = 12.6319582, gradient norm = 0.1472542
[t-SNE] Iteration 30: error = 12.3499517, gradient norm = 0.1307045
[t-SNE] Iteration 40: error = 12.1685876, gradient norm = 0.1312051
[t-SNE] Iteration 50: error = 12.4944632, gradient norm = 0.1242800
[t-SNE] Iteration 60: error = 12.2596019, gradient norm = 0.1363835
[t-SNE] Iteration 70: error = 12.2971293, gradient norm = 0.1272094
[t-SNE] Iteration 80: error = 12.5891942, gradient norm = 0.1242036
[t-SNE] Iteration 83: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 83 iterations with early exaggeration: 12.455086
[t-SNE] Iteration 90: error = 0.6541182, gradient norm = 0.0209965
[t-SNE] Iteration 100: error = 0.3727205, gradient norm = 0.0083266
[t-SNE] Iteration 110: error = 0.3364563, gradient norm = 0.0030272
[t-SNE] Iteration 120: error = 0.3262779, gradient norm = 0.0014945
[t-SNE] Iteration 130: error = 0.3212489, gradient norm = 0.0007918
[t-SNE] Iteration 140: error = 0.3201583, gradient norm = 0.0003092
[t-SNE] Iteration 150: error = 0.3197391, gradient norm = 0.0002451
[t-SNE] Iteration 160: error = 0.3195229, gradient norm = 0.0002321
[t-SNE] Iteration 170: error = 0.3194043, gradient norm = 0.0002254
[t-SNE] Iteration 180: error = 0.3193372, gradient norm = 0.0002220
[t-SNE] Iteration 190: error = 0.3192983, gradient norm = 0.0002199
[t-SNE] Iteration 200: error = 0.3192755, gradient norm = 0.0002187
[t-SNE] Iteration 210: error = 0.3192620, gradient norm = 0.0002180
[t-SNE] Iteration 220: error = 0.3192539, gradient norm = 0.0002176
[t-SNE] Iteration 230: error = 0.3192492, gradient norm = 0.0002174
[t-SNE] Iteration 240: error = 0.3192463, gradient norm = 0.0002172
[t-SNE] Iteration 250: error = 0.3192446, gradient norm = 0.0002171
[t-SNE] Iteration 256: error difference 0.000000. Finished.
[t-SNE] Error after 256 iterations: 0.319244
In [16]:
    
labels = mia.analysis.remove_duplicate_index(class_labels)
mapping['BIRADS'] = labels['BIRADS']
hol_map = mapping[:-6]
hol_map.shape
syn_map = mapping[-6:]
syn_map.head()
ax = mia.plotting.plot_scatter_2d(hol_map, [0,1], 'BIRADS')
ax = mia.plotting.plot_scatter_2d(syn_map, [0,1], 'BIRADS', ax=ax, marker='^', s=50)
plt.show()
    
In [90]:
    
left = blob_features[mapping[0] < 6]
right = blob_features[mapping[0] >= 6]
left.describe() - right.describe()
    
    Out[90]:
  
    
       
      blob_count 
      avg_radius 
      std_radius 
      min_radius 
      max_radius 
      small_radius_count 
      med_radius_count 
      large_radius_count 
      density 
      lower_radius_qt 
      upper_radius_qt 
      upper_dist_count 
      BIRADS 
     
  
  
    
      count 
       190.000000 
       190.000000 
       190.000000 
       190 
       190.000000 
       190.000000 
       190.000000 
       190.000000 
       190.000000 
       190.000000 
       190.000000 
       190.000000 
       190.000000 
     
    
      mean 
       180.265615 
        -5.222050 
        -7.616788 
         0 
       -13.321925 
       177.449886 
         2.103826 
         0.711903 
        -5.140102 
        -0.232369 
        -7.014925 
        47.131949 
        -0.917920 
     
    
      std 
       133.087397 
        -1.093255 
        -0.044200 
         0 
        13.514981 
       131.926625 
         4.002086 
         0.894572 
        -2.085017 
        -0.792425 
        -1.198303 
        44.549201 
         0.175433 
     
    
      min 
        27.000000 
        -5.149766 
        -7.542499 
         0 
       -45.254834 
        27.000000 
         0.000000 
         0.000000 
        -0.940616 
         0.000000 
        -8.000000 
         4.000000 
         0.000000 
     
    
      25% 
        70.500000 
        -4.369145 
        -7.638793 
         0 
         0.000000 
        68.500000 
         0.000000 
         0.000000 
        -5.343887 
         0.000000 
        -6.627417 
        13.250000 
        -1.000000 
     
    
      50% 
       146.500000 
        -4.656474 
        -8.864071 
         0 
         0.000000 
       142.000000 
         1.000000 
         0.000000 
        -4.804400 
         0.000000 
        -6.627417 
        35.500000 
        -1.000000 
     
    
      75% 
       258.000000 
        -5.195835 
        -6.098009 
         0 
         0.000000 
       254.750000 
         2.000000 
         1.750000 
        -3.253729 
         0.000000 
        -6.627417 
        60.500000 
        -1.000000 
     
    
      max 
       677.000000 
       -10.318461 
        -7.218612 
         0 
         0.000000 
       668.000000 
        33.000000 
         2.000000 
       -14.123230 
        -2.485281 
        -9.372583 
       255.000000 
         0.000000 
     
  
In [95]:
    
s = blob_features[mapping[0] > 15]
s.describe() - blob_features[mapping[0] <= 15].describe()
    
    Out[95]:
  
    
       
      blob_count 
      avg_radius 
      std_radius 
      min_radius 
      max_radius 
      small_radius_count 
      med_radius_count 
      large_radius_count 
      density 
      lower_radius_qt 
      upper_radius_qt 
      upper_dist_count 
      BIRADS 
     
  
  
    
      count 
      -354.000000 
      -354.000000 
      -354.000000 
      -354 
      -354.000000 
      -354.000000 
      -354.000000 
      -354.000000 
      -354.000000 
      -354.000000 
      -354.000000 
      -354.000000 
      -354.000000 
     
    
      mean 
      -184.119444 
         8.462616 
        10.389491 
         0 
         3.266872 
      -183.138889 
        -1.755556 
         0.775000 
         9.421449 
         3.309106 
         6.632795 
       -46.755556 
         0.477778 
     
    
      std 
      -159.150286 
         1.404357 
         2.819064 
         0 
        -1.435178 
      -156.533161 
        -3.260285 
         1.379287 
         6.049804 
        -0.061661 
         0.675160 
       -47.561481 
         0.139854 
     
    
      min 
         7.000000 
         9.045703 
        16.948537 
         0 
        45.254834 
         7.000000 
         0.000000 
         0.000000 
         5.892578 
         3.313708 
        11.313708 
         2.000000 
         0.000000 
     
    
      25% 
       -59.000000 
         6.921742 
         8.875299 
         0 
        13.254834 
       -62.250000 
         0.000000 
         0.250000 
         6.773016 
         3.313708 
         6.627417 
       -14.000000 
         1.000000 
     
    
      50% 
      -128.500000 
         9.397639 
         5.812960 
         0 
         0.000000 
      -134.000000 
        -1.500000 
         1.000000 
         5.550381 
         3.313708 
         6.627417 
       -28.000000 
         0.000000 
     
    
      75% 
      -247.250000 
        10.136183 
        12.269981 
         0 
         0.000000 
      -238.250000 
        -1.250000 
         0.000000 
        13.896739 
         3.313708 
         6.627417 
       -55.500000 
         0.750000 
     
    
      max 
      -884.000000 
        -2.746227 
         2.819096 
         0 
         0.000000 
      -867.000000 
       -38.000000 
        -2.000000 
        -8.563845 
         2.485281 
         0.000000 
      -307.000000 
         0.000000 
     
  
In [79]:
    
blob_features['BIRADS'] = labels
mia.plotting.plot_scattermatrix(blob_features[['avg_radius', 'max_radius', 'blob_count', 'std_radius', 'small_radius_count', 'density', 'large_radius_count', 'BIRADS']], 'BIRADS')
    
In [14]:
    
mia.analysis.measure_closeness(mapping, labels['BIRADS'])
    
    Out[14]:
3    10.770177
4    10.036130
1     8.987999
2     7.796480
dtype: float64
In [15]:
    
mia.plotting.plot_scatter_2d(syn_map, [0,1], 'BIRADS', marker='^', s=50, annotate=True)
plt.show()
    
In [16]:
    
syn_feature_meta.loc[random_synthetic_features.index]
    
    Out[16]:
  
    
       
      Vol 
      CmprTh 
      SkTh 
      LigThCrs 
      LigThFn 
      #cmprts 
      #cmprts.1 
      Dperc 
      VBD 
      VBD.1 
      BIRADS 
      min_speed 
      max_speed 
      min_ratio 
      max_ratio 
      phantom_name 
     
    
      img_name 
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
     
  
  
    
      test_Mix_DPerc0_c_0.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_1.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_2.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_3.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_4.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_5.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_6.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_7.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_8.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc0_c_9.dcm 
       436 
       5 
       0.5 
       400 
       200 
       333 
       1000 
        0 
       24 
       21 
             1 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc0_c 
     
    
      test_Mix_DPerc10_c_0.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_1.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_2.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_3.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_4.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_5.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_6.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_7.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_8.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc10_c_9.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       10 
       40 
       33 
       2 (med) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc10_c 
     
    
      test_Mix_DPerc20_c_0.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_1.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_2.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_3.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_4.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_5.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_6.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_7.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_8.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc20_c_9.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       20 
       46 
       38 
        2 (hi) 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc20_c 
     
    
      test_Mix_DPerc35_c_0.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_1.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_2.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_3.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_4.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_5.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_6.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_7.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_8.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc35_c_9.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       35 
       55 
       47 
             3 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc35_c 
     
    
      test_Mix_DPerc5_c_0.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_1.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_2.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_3.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_4.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_5.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_6.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_7.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_8.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc5_c_9.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       1000 
        5 
       35 
       27 
       2 (low) 
       0.5 
       2 
       0.5 
       2 
        test_Mix_DPerc5_c 
     
    
      test_Mix_DPerc75_c_0.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_1.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_2.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_3.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_4.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_5.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_6.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_7.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_8.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
    
      test_Mix_DPerc75_c_9.dcm 
       436 
       5 
       1.5 
       600 
       200 
       333 
       2000 
       75 
       67 
       59 
        3 or 4 
       0.5 
       2 
       0.5 
       2 
       test_Mix_DPerc75_c 
     
  
In [17]:
    
mapping.to_csv('/Volumes/Seagate/2015-03-26/mapping-with-both.csv')
    
Compare synthetics with the hologic dataset by class.
In [18]:
    
birads_class = labels['BIRADS'] == 1
syn_class = random_synthetic_features[birads_class]
hol_class = hologic_blob_features[birads_class]
syn_class.describe() - hol_class.describe()
    
    
/Users/samuel/git/major-project/lib/python2.7/site-packages/pandas/core/frame.py:1808: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
    Out[18]:
  
    
       
      blob_count 
      avg_radius 
      std_radius 
      min_radius 
      max_radius 
      small_radius_count 
      med_radius_count 
      large_radius_count 
      density 
      lower_radius_qt 
      upper_radius_qt 
      upper_dist_count 
     
  
  
    
      count 
       -46.000000 
      -46.000000 
      -46.000000 
      -46 
      -46.000000 
       -46.000000 
      -46.000000 
      -46.000000 
      -46.000000 
      -46.000000 
      -46.000000 
       -46.000000 
     
    
      mean 
      -256.210714 
        6.854786 
        0.445133 
        0 
      -73.988703 
      -260.278571 
       -0.625000 
        4.692857 
       -3.262426 
        1.325483 
       12.809197 
       -54.417857 
     
    
      std 
      -139.370870 
        0.281655 
       -4.794043 
        0 
      -38.634888 
      -136.698561 
       -3.282583 
       -0.777543 
       -2.506746 
        1.711192 
        3.280094 
       -48.560634 
     
    
      min 
       -31.000000 
        8.328287 
       13.598249 
        0 
       45.254834 
       -42.000000 
        2.000000 
        6.000000 
        0.867475 
        0.000000 
       11.313708 
        -2.000000 
     
    
      25% 
      -187.500000 
        5.930656 
        2.900174 
        0 
      -90.509668 
      -189.500000 
        0.000000 
        5.250000 
       -1.220780 
        0.000000 
        7.798990 
       -30.500000 
     
    
      50% 
      -219.000000 
        6.637435 
       -1.409568 
        0 
      -90.509668 
      -224.000000 
        0.000000 
        6.000000 
       -2.646244 
        0.000000 
       16.000000 
       -43.000000 
     
    
      75% 
      -316.500000 
        7.947400 
       -3.140442 
        0 
      -90.509668 
      -321.500000 
        0.750000 
        4.000000 
       -4.419133 
        3.313708 
       16.000000 
       -53.000000 
     
    
      max 
      -757.000000 
        3.607515 
       -8.962121 
        0 
      -90.509668 
      -758.000000 
      -26.000000 
       -1.000000 
      -15.833031 
        3.313708 
       12.686292 
      -246.000000 
     
  
In [19]:
    
blob_norm = mia.analysis.normalize_data_frame(blob_features)
blob_norm.columns = blob_features.columns.values
blob_norm['BIRADS'] = labels
columns = filter(lambda c: c not in ['min_radius'], blob_norm.columns)
plotting.parallel_coordinates(blob_norm[columns], 'BIRADS')
plt.show()
    
Content source: samueljackson92/major-project-data
Similar notebooks: