In [1]:
%matplotlib qt
import mia
import pandas as pd
import numpy as np
import re
import random
import matplotlib.pyplot as plt
from pandas.tools import plotting

In [17]:
batch1 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch1_blobs.csv')
batch2 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch2_blobs.csv')
synthetics = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/synthetic_blobs.csv')
synthetics.index = synthetics.img_name

hologic = pd.concat([batch1, batch2])
hologic.index = hologic.img_name
hologic.head()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-fbe373e01dd1> in <module>()
      2 batch2 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch2_blobs.csv')
      3 synthetics = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-04-11/phantom-blobs1.csv')
----> 4 synthetics.index = synthetics.img_name
      5 
      6 hologic = pd.concat([batch1, batch2])

/Users/samuel/git/major-project/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
   1945                 return self[name]
   1946             raise AttributeError("'%s' object has no attribute '%s'" %
-> 1947                                  (type(self).__name__, name))
   1948 
   1949     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'img_name'

In [3]:
hologic_meta_path = '/Volumes/Seagate/mmp_data/2015-03-26/BIRADS.csv'
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, hologic_meta_path)
hologic_meta.head()


Out[3]:
patient_id side view img_name BIRADS img_number
img_name
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1

In [4]:
synthetic_meta_path = '/Volumes/Seagate/mmp_data/2015-03-26/synthetic_meta_data_cleaned.csv'
synthetic_meta = mia.analysis.create_synthetic_meta_data(synthetics, synthetic_meta_path)
synthetic_meta.head()


Out[4]:
Vol CmprTh SkTh LigThCrs LigThFn #cmprts #cmprts.1 Dperc VBD VBD.1 BIRADS min_speed max_speed min_ratio max_ratio phantom_name
img_name
test_Mix_DPerc0_c_0.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_0.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_0.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_0.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_0.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c

In [5]:
synth_class_labels = synthetic_meta['BIRADS']
hologic_class_labels = hologic_meta['BIRADS']
class_labels = pd.concat([hologic_class_labels, synth_class_labels])

#replace BIRADS inspecific BIRADS classes
class_labels.replace('3 or 4', 4, inplace=True)
class_labels.replace(re.compile(r'2 \([a-z]+\)'), 2, inplace=True)
class_labels = class_labels.astype(float)

Compute shape based features from raw blob detections


In [6]:
hologic_blob_features = mia.analysis.features_from_blobs(hologic)
hologic_blob_features.describe()


Out[6]:
blob_count avg_radius std_radius min_radius max_radius small_radius_count med_radius_count large_radius_count density lower_radius_qt upper_radius_qt upper_dist_count
count 360.000000 360.000000 360.000000 360 360.000000 360.000000 360.000000 360.000000 360.000000 360.000000 360.000000 360.000000
mean 251.777778 17.679679 20.295388 8 154.272791 243.975000 3.927778 3.875000 42.546985 8.050626 17.575128 66.580556
std 182.143063 3.290097 7.305919 0 40.349384 179.730972 5.394241 2.559256 6.866999 0.392667 3.161404 57.047643
min 26.000000 11.633776 5.269543 8 45.254834 22.000000 0.000000 1.000000 32.678451 8.000000 11.313708 5.000000
25% 113.000000 15.375540 14.601573 8 128.000000 109.000000 1.000000 2.000000 37.506466 8.000000 16.000000 27.000000
50% 203.500000 17.154971 20.158492 8 181.019336 196.000000 3.000000 3.000000 41.412974 8.000000 16.000000 49.000000
75% 328.250000 19.297899 25.607779 8 181.019336 314.000000 4.000000 5.000000 46.069529 8.000000 16.000000 83.000000
max 978.000000 34.201332 45.057882 8 181.019336 958.000000 44.000000 14.000000 79.518497 11.313708 32.000000 337.000000

In [7]:
synthetic_blob_features = mia.analysis.features_from_blobs(synthetics)
synthetic_blob_features.describe()


Out[7]:
blob_count avg_radius std_radius min_radius max_radius small_radius_count med_radius_count large_radius_count density lower_radius_qt upper_radius_qt upper_dist_count
count 60.000000 60.000000 60.000000 60 60.000000 60.000000 60.000000 60.000000 60.000000 60.000000 60.000000 60.000000
mean 80.733333 21.982949 24.371230 8 130.588167 74.033333 1.666667 5.033333 35.482570 8.358985 22.714521 22.316667
std 15.154422 2.342589 2.647525 0 26.605668 15.483142 1.445820 2.863959 4.033106 1.004434 4.996060 6.474177
min 40.000000 18.645281 19.316414 8 90.509668 33.000000 0.000000 2.000000 29.205906 8.000000 16.000000 8.000000
25% 72.750000 20.293316 22.697181 8 128.000000 66.000000 1.000000 3.750000 33.215826 8.000000 22.627417 19.000000
50% 84.500000 21.347224 23.683670 8 128.000000 78.000000 1.000000 4.000000 34.529411 8.000000 22.627417 22.000000
75% 92.000000 23.621765 26.496192 8 128.000000 84.250000 2.250000 5.000000 37.044697 8.000000 22.627417 28.000000
max 109.000000 28.319596 31.413381 8 181.019336 103.000000 6.000000 13.000000 53.889814 11.313708 35.313708 33.000000

Select a subset of synthetics to be used. This takes a random sample from each group DPerc*.


In [8]:
syn_feature_meta = mia.analysis.remove_duplicate_index(synthetic_meta)

synthetic_blob_features['phantom_name'] = syn_feature_meta.phantom_name.tolist()

random_synthetic_features = synthetic_blob_features
group = synthetic_blob_features.groupby('phantom_name')

def select_random(x):
    return x.ix[random.sample(x.index, 1)]
random_synthetic_features = group.apply(select_random)

random_synthetic_features.drop('phantom_name', axis=1, inplace=True)
random_synthetic_features.reset_index(drop=True, level=0, inplace=True)
random_synthetic_features


Out[8]:
blob_count avg_radius std_radius min_radius max_radius small_radius_count med_radius_count large_radius_count density lower_radius_qt upper_radius_qt upper_dist_count
img_name
test_Mix_DPerc0_c_2.dcm 71 27.085040 23.624541 8 90.509668 54 5 12 40.916216 11.313708 32.000000 24
test_Mix_DPerc10_c_3.dcm 89 21.499210 22.294951 8 128.000000 82 2 5 32.310669 8.000000 22.627417 29
test_Mix_DPerc20_c_4.dcm 82 20.877556 27.635799 8 181.019336 79 0 3 34.492907 8.000000 22.627417 22
test_Mix_DPerc35_c_9.dcm 53 25.991843 28.518937 8 128.000000 47 1 5 41.964861 11.313708 22.627417 13
test_Mix_DPerc5_c_6.dcm 76 25.187286 24.186133 8 128.000000 69 4 3 37.381472 8.000000 32.000000 20
test_Mix_DPerc75_c_6.dcm 62 22.021813 27.265855 8 128.000000 57 1 4 35.358633 8.828427 16.000000 15

Join datasets. Results will include both hologic and synthetic images


In [9]:
blob_features = pd.concat([hologic_blob_features, random_synthetic_features])
blob_features.describe()


Out[9]:
blob_count avg_radius std_radius min_radius max_radius small_radius_count med_radius_count large_radius_count density lower_radius_qt upper_radius_qt upper_dist_count
count 366.000000 366.000000 366.000000 366 366.000000 366.000000 366.000000 366.000000 366.000000 366.000000 366.000000 366.000000
mean 248.833333 17.779638 20.382147 8 153.884519 241.035519 3.898907 3.898907 42.457211 8.070167 17.691061 65.825137
std 182.084376 3.367862 7.282877 0 40.271853 179.707621 5.359222 2.575575 6.860038 0.459665 3.342671 56.883606
min 26.000000 11.633776 5.269543 8 45.254834 22.000000 0.000000 1.000000 32.310669 8.000000 11.313708 5.000000
25% 111.250000 15.385483 14.632588 8 128.000000 105.000000 1.000000 2.000000 37.481327 8.000000 16.000000 27.000000
50% 200.000000 17.227884 20.350684 8 181.019336 192.500000 3.000000 3.000000 41.291876 8.000000 16.000000 48.000000
75% 326.750000 19.356498 25.621973 8 181.019336 310.750000 4.000000 5.000000 45.941928 8.000000 18.485281 82.000000
max 978.000000 34.201332 45.057882 8 181.019336 958.000000 44.000000 14.000000 79.518497 11.313708 32.000000 337.000000

In [20]:
bf = blob_features.copy()
bf = bf.reset_index()
bf.drop('img_name', axis=1, inplace=True)
bf.to_csv('/Users/samuel/Downloads/blobs_features.csv', header=False)

Filter the columns that we want to run with


In [13]:
columns = filter(lambda c: c not in [], blob_features.columns)
selected_features = blob_features[columns]
columns


Out[13]:
['blob_count',
 'avg_radius',
 'std_radius',
 'min_radius',
 'max_radius',
 'small_radius_count',
 'med_radius_count',
 'large_radius_count',
 'density',
 'lower_radius_qt',
 'upper_radius_qt',
 'upper_dist_count']

Run t-SNE on features to obtain mapping


In [15]:
mapping = mia.analysis.tSNE(selected_features, perplexity=45, learning_rate=400, verbose=2)


[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 366 / 366
[t-SNE] Mean sigma: 1.149708
[t-SNE] Iteration 10: error = 14.6560135, gradient norm = 0.1483312
[t-SNE] Iteration 20: error = 12.6319582, gradient norm = 0.1472542
[t-SNE] Iteration 30: error = 12.3499517, gradient norm = 0.1307045
[t-SNE] Iteration 40: error = 12.1685876, gradient norm = 0.1312051
[t-SNE] Iteration 50: error = 12.4944632, gradient norm = 0.1242800
[t-SNE] Iteration 60: error = 12.2596019, gradient norm = 0.1363835
[t-SNE] Iteration 70: error = 12.2971293, gradient norm = 0.1272094
[t-SNE] Iteration 80: error = 12.5891942, gradient norm = 0.1242036
[t-SNE] Iteration 83: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 83 iterations with early exaggeration: 12.455086
[t-SNE] Iteration 90: error = 0.6541182, gradient norm = 0.0209965
[t-SNE] Iteration 100: error = 0.3727205, gradient norm = 0.0083266
[t-SNE] Iteration 110: error = 0.3364563, gradient norm = 0.0030272
[t-SNE] Iteration 120: error = 0.3262779, gradient norm = 0.0014945
[t-SNE] Iteration 130: error = 0.3212489, gradient norm = 0.0007918
[t-SNE] Iteration 140: error = 0.3201583, gradient norm = 0.0003092
[t-SNE] Iteration 150: error = 0.3197391, gradient norm = 0.0002451
[t-SNE] Iteration 160: error = 0.3195229, gradient norm = 0.0002321
[t-SNE] Iteration 170: error = 0.3194043, gradient norm = 0.0002254
[t-SNE] Iteration 180: error = 0.3193372, gradient norm = 0.0002220
[t-SNE] Iteration 190: error = 0.3192983, gradient norm = 0.0002199
[t-SNE] Iteration 200: error = 0.3192755, gradient norm = 0.0002187
[t-SNE] Iteration 210: error = 0.3192620, gradient norm = 0.0002180
[t-SNE] Iteration 220: error = 0.3192539, gradient norm = 0.0002176
[t-SNE] Iteration 230: error = 0.3192492, gradient norm = 0.0002174
[t-SNE] Iteration 240: error = 0.3192463, gradient norm = 0.0002172
[t-SNE] Iteration 250: error = 0.3192446, gradient norm = 0.0002171
[t-SNE] Iteration 256: error difference 0.000000. Finished.
[t-SNE] Error after 256 iterations: 0.319244

In [16]:
labels = mia.analysis.remove_duplicate_index(class_labels)
mapping['BIRADS'] = labels['BIRADS']

hol_map = mapping[:-6]
hol_map.shape

syn_map = mapping[-6:]
syn_map.head()

ax = mia.plotting.plot_scatter_2d(hol_map, [0,1], 'BIRADS')
ax = mia.plotting.plot_scatter_2d(syn_map, [0,1], 'BIRADS', ax=ax, marker='^', s=50)

plt.show()

In [90]:
left = blob_features[mapping[0] < 6]
right = blob_features[mapping[0] >= 6]
left.describe() - right.describe()


Out[90]:
blob_count avg_radius std_radius min_radius max_radius small_radius_count med_radius_count large_radius_count density lower_radius_qt upper_radius_qt upper_dist_count BIRADS
count 190.000000 190.000000 190.000000 190 190.000000 190.000000 190.000000 190.000000 190.000000 190.000000 190.000000 190.000000 190.000000
mean 180.265615 -5.222050 -7.616788 0 -13.321925 177.449886 2.103826 0.711903 -5.140102 -0.232369 -7.014925 47.131949 -0.917920
std 133.087397 -1.093255 -0.044200 0 13.514981 131.926625 4.002086 0.894572 -2.085017 -0.792425 -1.198303 44.549201 0.175433
min 27.000000 -5.149766 -7.542499 0 -45.254834 27.000000 0.000000 0.000000 -0.940616 0.000000 -8.000000 4.000000 0.000000
25% 70.500000 -4.369145 -7.638793 0 0.000000 68.500000 0.000000 0.000000 -5.343887 0.000000 -6.627417 13.250000 -1.000000
50% 146.500000 -4.656474 -8.864071 0 0.000000 142.000000 1.000000 0.000000 -4.804400 0.000000 -6.627417 35.500000 -1.000000
75% 258.000000 -5.195835 -6.098009 0 0.000000 254.750000 2.000000 1.750000 -3.253729 0.000000 -6.627417 60.500000 -1.000000
max 677.000000 -10.318461 -7.218612 0 0.000000 668.000000 33.000000 2.000000 -14.123230 -2.485281 -9.372583 255.000000 0.000000

In [95]:
s = blob_features[mapping[0] > 15]
s.describe() - blob_features[mapping[0] <= 15].describe()


Out[95]:
blob_count avg_radius std_radius min_radius max_radius small_radius_count med_radius_count large_radius_count density lower_radius_qt upper_radius_qt upper_dist_count BIRADS
count -354.000000 -354.000000 -354.000000 -354 -354.000000 -354.000000 -354.000000 -354.000000 -354.000000 -354.000000 -354.000000 -354.000000 -354.000000
mean -184.119444 8.462616 10.389491 0 3.266872 -183.138889 -1.755556 0.775000 9.421449 3.309106 6.632795 -46.755556 0.477778
std -159.150286 1.404357 2.819064 0 -1.435178 -156.533161 -3.260285 1.379287 6.049804 -0.061661 0.675160 -47.561481 0.139854
min 7.000000 9.045703 16.948537 0 45.254834 7.000000 0.000000 0.000000 5.892578 3.313708 11.313708 2.000000 0.000000
25% -59.000000 6.921742 8.875299 0 13.254834 -62.250000 0.000000 0.250000 6.773016 3.313708 6.627417 -14.000000 1.000000
50% -128.500000 9.397639 5.812960 0 0.000000 -134.000000 -1.500000 1.000000 5.550381 3.313708 6.627417 -28.000000 0.000000
75% -247.250000 10.136183 12.269981 0 0.000000 -238.250000 -1.250000 0.000000 13.896739 3.313708 6.627417 -55.500000 0.750000
max -884.000000 -2.746227 2.819096 0 0.000000 -867.000000 -38.000000 -2.000000 -8.563845 2.485281 0.000000 -307.000000 0.000000

In [79]:
blob_features['BIRADS'] = labels
mia.plotting.plot_scattermatrix(blob_features[['avg_radius', 'max_radius', 'blob_count', 'std_radius', 'small_radius_count', 'density', 'large_radius_count', 'BIRADS']], 'BIRADS')

In [14]:
mia.analysis.measure_closeness(mapping, labels['BIRADS'])


Out[14]:
3    10.770177
4    10.036130
1     8.987999
2     7.796480
dtype: float64

In [15]:
mia.plotting.plot_scatter_2d(syn_map, [0,1], 'BIRADS', marker='^', s=50, annotate=True)
plt.show()

In [16]:
syn_feature_meta.loc[random_synthetic_features.index]


Out[16]:
Vol CmprTh SkTh LigThCrs LigThFn #cmprts #cmprts.1 Dperc VBD VBD.1 BIRADS min_speed max_speed min_ratio max_ratio phantom_name
img_name
test_Mix_DPerc0_c_0.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_1.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_2.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_3.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_4.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_5.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_6.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_7.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_8.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc0_c_9.dcm 436 5 0.5 400 200 333 1000 0 24 21 1 0.5 2 0.5 2 test_Mix_DPerc0_c
test_Mix_DPerc10_c_0.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_1.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_2.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_3.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_4.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_5.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_6.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_7.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_8.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc10_c_9.dcm 436 5 1.5 600 200 333 2000 10 40 33 2 (med) 0.5 2 0.5 2 test_Mix_DPerc10_c
test_Mix_DPerc20_c_0.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_1.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_2.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_3.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_4.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_5.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_6.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_7.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_8.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc20_c_9.dcm 436 5 1.5 600 200 333 2000 20 46 38 2 (hi) 0.5 2 0.5 2 test_Mix_DPerc20_c
test_Mix_DPerc35_c_0.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_1.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_2.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_3.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_4.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_5.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_6.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_7.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_8.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc35_c_9.dcm 436 5 1.5 600 200 333 2000 35 55 47 3 0.5 2 0.5 2 test_Mix_DPerc35_c
test_Mix_DPerc5_c_0.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_1.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_2.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_3.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_4.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_5.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_6.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_7.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_8.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc5_c_9.dcm 436 5 1.5 600 200 333 1000 5 35 27 2 (low) 0.5 2 0.5 2 test_Mix_DPerc5_c
test_Mix_DPerc75_c_0.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_1.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_2.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_3.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_4.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_5.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_6.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_7.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_8.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c
test_Mix_DPerc75_c_9.dcm 436 5 1.5 600 200 333 2000 75 67 59 3 or 4 0.5 2 0.5 2 test_Mix_DPerc75_c

In [17]:
mapping.to_csv('/Volumes/Seagate/2015-03-26/mapping-with-both.csv')

Compare synthetics with the hologic dataset by class.


In [18]:
birads_class = labels['BIRADS'] == 1
syn_class = random_synthetic_features[birads_class]
hol_class = hologic_blob_features[birads_class]
syn_class.describe() - hol_class.describe()


/Users/samuel/git/major-project/lib/python2.7/site-packages/pandas/core/frame.py:1808: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
Out[18]:
blob_count avg_radius std_radius min_radius max_radius small_radius_count med_radius_count large_radius_count density lower_radius_qt upper_radius_qt upper_dist_count
count -46.000000 -46.000000 -46.000000 -46 -46.000000 -46.000000 -46.000000 -46.000000 -46.000000 -46.000000 -46.000000 -46.000000
mean -256.210714 6.854786 0.445133 0 -73.988703 -260.278571 -0.625000 4.692857 -3.262426 1.325483 12.809197 -54.417857
std -139.370870 0.281655 -4.794043 0 -38.634888 -136.698561 -3.282583 -0.777543 -2.506746 1.711192 3.280094 -48.560634
min -31.000000 8.328287 13.598249 0 45.254834 -42.000000 2.000000 6.000000 0.867475 0.000000 11.313708 -2.000000
25% -187.500000 5.930656 2.900174 0 -90.509668 -189.500000 0.000000 5.250000 -1.220780 0.000000 7.798990 -30.500000
50% -219.000000 6.637435 -1.409568 0 -90.509668 -224.000000 0.000000 6.000000 -2.646244 0.000000 16.000000 -43.000000
75% -316.500000 7.947400 -3.140442 0 -90.509668 -321.500000 0.750000 4.000000 -4.419133 3.313708 16.000000 -53.000000
max -757.000000 3.607515 -8.962121 0 -90.509668 -758.000000 -26.000000 -1.000000 -15.833031 3.313708 12.686292 -246.000000

Higher dimensional plot

Can show the which are the largest contributing features in the dataset.


In [19]:
blob_norm = mia.analysis.normalize_data_frame(blob_features)
blob_norm.columns = blob_features.columns.values
blob_norm['BIRADS'] = labels

columns = filter(lambda c: c not in ['min_radius'], blob_norm.columns)
plotting.parallel_coordinates(blob_norm[columns], 'BIRADS')
plt.show()