In [1]:
%matplotlib qt
import mia
import pandas as pd
import numpy as np
import re
import random
import matplotlib.pyplot as plt
from pandas.tools import plotting
In [17]:
batch1 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch1_blobs.csv')
batch2 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch2_blobs.csv')
synthetics = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/synthetic_blobs.csv')
synthetics.index = synthetics.img_name
hologic = pd.concat([batch1, batch2])
hologic.index = hologic.img_name
hologic.head()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-17-fbe373e01dd1> in <module>()
2 batch2 = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-03-26/batch2_blobs.csv')
3 synthetics = pd.DataFrame.from_csv('/Volumes/Seagate/mmp_data/2015-04-11/phantom-blobs1.csv')
----> 4 synthetics.index = synthetics.img_name
5
6 hologic = pd.concat([batch1, batch2])
/Users/samuel/git/major-project/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
1945 return self[name]
1946 raise AttributeError("'%s' object has no attribute '%s'" %
-> 1947 (type(self).__name__, name))
1948
1949 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'img_name'
In [3]:
hologic_meta_path = '/Volumes/Seagate/mmp_data/2015-03-26/BIRADS.csv'
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, hologic_meta_path)
hologic_meta.head()
Out[3]:
patient_id
side
view
img_name
BIRADS
img_number
img_name
p214-010-60001-cl.png
21401060001
c
l
p214-010-60001-cl.png
3
1
p214-010-60001-cl.png
21401060001
c
l
p214-010-60001-cl.png
3
1
p214-010-60001-cl.png
21401060001
c
l
p214-010-60001-cl.png
3
1
p214-010-60001-cl.png
21401060001
c
l
p214-010-60001-cl.png
3
1
p214-010-60001-cl.png
21401060001
c
l
p214-010-60001-cl.png
3
1
In [4]:
synthetic_meta_path = '/Volumes/Seagate/mmp_data/2015-03-26/synthetic_meta_data_cleaned.csv'
synthetic_meta = mia.analysis.create_synthetic_meta_data(synthetics, synthetic_meta_path)
synthetic_meta.head()
Out[4]:
Vol
CmprTh
SkTh
LigThCrs
LigThFn
#cmprts
#cmprts.1
Dperc
VBD
VBD.1
BIRADS
min_speed
max_speed
min_ratio
max_ratio
phantom_name
img_name
test_Mix_DPerc0_c_0.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_0.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_0.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_0.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_0.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
In [5]:
synth_class_labels = synthetic_meta['BIRADS']
hologic_class_labels = hologic_meta['BIRADS']
class_labels = pd.concat([hologic_class_labels, synth_class_labels])
#replace BIRADS inspecific BIRADS classes
class_labels.replace('3 or 4', 4, inplace=True)
class_labels.replace(re.compile(r'2 \([a-z]+\)'), 2, inplace=True)
class_labels = class_labels.astype(float)
Compute shape based features from raw blob detections
In [6]:
hologic_blob_features = mia.analysis.features_from_blobs(hologic)
hologic_blob_features.describe()
Out[6]:
blob_count
avg_radius
std_radius
min_radius
max_radius
small_radius_count
med_radius_count
large_radius_count
density
lower_radius_qt
upper_radius_qt
upper_dist_count
count
360.000000
360.000000
360.000000
360
360.000000
360.000000
360.000000
360.000000
360.000000
360.000000
360.000000
360.000000
mean
251.777778
17.679679
20.295388
8
154.272791
243.975000
3.927778
3.875000
42.546985
8.050626
17.575128
66.580556
std
182.143063
3.290097
7.305919
0
40.349384
179.730972
5.394241
2.559256
6.866999
0.392667
3.161404
57.047643
min
26.000000
11.633776
5.269543
8
45.254834
22.000000
0.000000
1.000000
32.678451
8.000000
11.313708
5.000000
25%
113.000000
15.375540
14.601573
8
128.000000
109.000000
1.000000
2.000000
37.506466
8.000000
16.000000
27.000000
50%
203.500000
17.154971
20.158492
8
181.019336
196.000000
3.000000
3.000000
41.412974
8.000000
16.000000
49.000000
75%
328.250000
19.297899
25.607779
8
181.019336
314.000000
4.000000
5.000000
46.069529
8.000000
16.000000
83.000000
max
978.000000
34.201332
45.057882
8
181.019336
958.000000
44.000000
14.000000
79.518497
11.313708
32.000000
337.000000
In [7]:
synthetic_blob_features = mia.analysis.features_from_blobs(synthetics)
synthetic_blob_features.describe()
Out[7]:
blob_count
avg_radius
std_radius
min_radius
max_radius
small_radius_count
med_radius_count
large_radius_count
density
lower_radius_qt
upper_radius_qt
upper_dist_count
count
60.000000
60.000000
60.000000
60
60.000000
60.000000
60.000000
60.000000
60.000000
60.000000
60.000000
60.000000
mean
80.733333
21.982949
24.371230
8
130.588167
74.033333
1.666667
5.033333
35.482570
8.358985
22.714521
22.316667
std
15.154422
2.342589
2.647525
0
26.605668
15.483142
1.445820
2.863959
4.033106
1.004434
4.996060
6.474177
min
40.000000
18.645281
19.316414
8
90.509668
33.000000
0.000000
2.000000
29.205906
8.000000
16.000000
8.000000
25%
72.750000
20.293316
22.697181
8
128.000000
66.000000
1.000000
3.750000
33.215826
8.000000
22.627417
19.000000
50%
84.500000
21.347224
23.683670
8
128.000000
78.000000
1.000000
4.000000
34.529411
8.000000
22.627417
22.000000
75%
92.000000
23.621765
26.496192
8
128.000000
84.250000
2.250000
5.000000
37.044697
8.000000
22.627417
28.000000
max
109.000000
28.319596
31.413381
8
181.019336
103.000000
6.000000
13.000000
53.889814
11.313708
35.313708
33.000000
Select a subset of synthetics to be used. This takes a random sample from each group DPerc*.
In [8]:
syn_feature_meta = mia.analysis.remove_duplicate_index(synthetic_meta)
synthetic_blob_features['phantom_name'] = syn_feature_meta.phantom_name.tolist()
random_synthetic_features = synthetic_blob_features
group = synthetic_blob_features.groupby('phantom_name')
def select_random(x):
return x.ix[random.sample(x.index, 1)]
random_synthetic_features = group.apply(select_random)
random_synthetic_features.drop('phantom_name', axis=1, inplace=True)
random_synthetic_features.reset_index(drop=True, level=0, inplace=True)
random_synthetic_features
Out[8]:
blob_count
avg_radius
std_radius
min_radius
max_radius
small_radius_count
med_radius_count
large_radius_count
density
lower_radius_qt
upper_radius_qt
upper_dist_count
img_name
test_Mix_DPerc0_c_2.dcm
71
27.085040
23.624541
8
90.509668
54
5
12
40.916216
11.313708
32.000000
24
test_Mix_DPerc10_c_3.dcm
89
21.499210
22.294951
8
128.000000
82
2
5
32.310669
8.000000
22.627417
29
test_Mix_DPerc20_c_4.dcm
82
20.877556
27.635799
8
181.019336
79
0
3
34.492907
8.000000
22.627417
22
test_Mix_DPerc35_c_9.dcm
53
25.991843
28.518937
8
128.000000
47
1
5
41.964861
11.313708
22.627417
13
test_Mix_DPerc5_c_6.dcm
76
25.187286
24.186133
8
128.000000
69
4
3
37.381472
8.000000
32.000000
20
test_Mix_DPerc75_c_6.dcm
62
22.021813
27.265855
8
128.000000
57
1
4
35.358633
8.828427
16.000000
15
Join datasets. Results will include both hologic and synthetic images
In [9]:
blob_features = pd.concat([hologic_blob_features, random_synthetic_features])
blob_features.describe()
Out[9]:
blob_count
avg_radius
std_radius
min_radius
max_radius
small_radius_count
med_radius_count
large_radius_count
density
lower_radius_qt
upper_radius_qt
upper_dist_count
count
366.000000
366.000000
366.000000
366
366.000000
366.000000
366.000000
366.000000
366.000000
366.000000
366.000000
366.000000
mean
248.833333
17.779638
20.382147
8
153.884519
241.035519
3.898907
3.898907
42.457211
8.070167
17.691061
65.825137
std
182.084376
3.367862
7.282877
0
40.271853
179.707621
5.359222
2.575575
6.860038
0.459665
3.342671
56.883606
min
26.000000
11.633776
5.269543
8
45.254834
22.000000
0.000000
1.000000
32.310669
8.000000
11.313708
5.000000
25%
111.250000
15.385483
14.632588
8
128.000000
105.000000
1.000000
2.000000
37.481327
8.000000
16.000000
27.000000
50%
200.000000
17.227884
20.350684
8
181.019336
192.500000
3.000000
3.000000
41.291876
8.000000
16.000000
48.000000
75%
326.750000
19.356498
25.621973
8
181.019336
310.750000
4.000000
5.000000
45.941928
8.000000
18.485281
82.000000
max
978.000000
34.201332
45.057882
8
181.019336
958.000000
44.000000
14.000000
79.518497
11.313708
32.000000
337.000000
In [20]:
bf = blob_features.copy()
bf = bf.reset_index()
bf.drop('img_name', axis=1, inplace=True)
bf.to_csv('/Users/samuel/Downloads/blobs_features.csv', header=False)
Filter the columns that we want to run with
In [13]:
columns = filter(lambda c: c not in [], blob_features.columns)
selected_features = blob_features[columns]
columns
Out[13]:
['blob_count',
'avg_radius',
'std_radius',
'min_radius',
'max_radius',
'small_radius_count',
'med_radius_count',
'large_radius_count',
'density',
'lower_radius_qt',
'upper_radius_qt',
'upper_dist_count']
Run t-SNE on features to obtain mapping
In [15]:
mapping = mia.analysis.tSNE(selected_features, perplexity=45, learning_rate=400, verbose=2)
[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 366 / 366
[t-SNE] Mean sigma: 1.149708
[t-SNE] Iteration 10: error = 14.6560135, gradient norm = 0.1483312
[t-SNE] Iteration 20: error = 12.6319582, gradient norm = 0.1472542
[t-SNE] Iteration 30: error = 12.3499517, gradient norm = 0.1307045
[t-SNE] Iteration 40: error = 12.1685876, gradient norm = 0.1312051
[t-SNE] Iteration 50: error = 12.4944632, gradient norm = 0.1242800
[t-SNE] Iteration 60: error = 12.2596019, gradient norm = 0.1363835
[t-SNE] Iteration 70: error = 12.2971293, gradient norm = 0.1272094
[t-SNE] Iteration 80: error = 12.5891942, gradient norm = 0.1242036
[t-SNE] Iteration 83: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 83 iterations with early exaggeration: 12.455086
[t-SNE] Iteration 90: error = 0.6541182, gradient norm = 0.0209965
[t-SNE] Iteration 100: error = 0.3727205, gradient norm = 0.0083266
[t-SNE] Iteration 110: error = 0.3364563, gradient norm = 0.0030272
[t-SNE] Iteration 120: error = 0.3262779, gradient norm = 0.0014945
[t-SNE] Iteration 130: error = 0.3212489, gradient norm = 0.0007918
[t-SNE] Iteration 140: error = 0.3201583, gradient norm = 0.0003092
[t-SNE] Iteration 150: error = 0.3197391, gradient norm = 0.0002451
[t-SNE] Iteration 160: error = 0.3195229, gradient norm = 0.0002321
[t-SNE] Iteration 170: error = 0.3194043, gradient norm = 0.0002254
[t-SNE] Iteration 180: error = 0.3193372, gradient norm = 0.0002220
[t-SNE] Iteration 190: error = 0.3192983, gradient norm = 0.0002199
[t-SNE] Iteration 200: error = 0.3192755, gradient norm = 0.0002187
[t-SNE] Iteration 210: error = 0.3192620, gradient norm = 0.0002180
[t-SNE] Iteration 220: error = 0.3192539, gradient norm = 0.0002176
[t-SNE] Iteration 230: error = 0.3192492, gradient norm = 0.0002174
[t-SNE] Iteration 240: error = 0.3192463, gradient norm = 0.0002172
[t-SNE] Iteration 250: error = 0.3192446, gradient norm = 0.0002171
[t-SNE] Iteration 256: error difference 0.000000. Finished.
[t-SNE] Error after 256 iterations: 0.319244
In [16]:
labels = mia.analysis.remove_duplicate_index(class_labels)
mapping['BIRADS'] = labels['BIRADS']
hol_map = mapping[:-6]
hol_map.shape
syn_map = mapping[-6:]
syn_map.head()
ax = mia.plotting.plot_scatter_2d(hol_map, [0,1], 'BIRADS')
ax = mia.plotting.plot_scatter_2d(syn_map, [0,1], 'BIRADS', ax=ax, marker='^', s=50)
plt.show()
In [90]:
left = blob_features[mapping[0] < 6]
right = blob_features[mapping[0] >= 6]
left.describe() - right.describe()
Out[90]:
blob_count
avg_radius
std_radius
min_radius
max_radius
small_radius_count
med_radius_count
large_radius_count
density
lower_radius_qt
upper_radius_qt
upper_dist_count
BIRADS
count
190.000000
190.000000
190.000000
190
190.000000
190.000000
190.000000
190.000000
190.000000
190.000000
190.000000
190.000000
190.000000
mean
180.265615
-5.222050
-7.616788
0
-13.321925
177.449886
2.103826
0.711903
-5.140102
-0.232369
-7.014925
47.131949
-0.917920
std
133.087397
-1.093255
-0.044200
0
13.514981
131.926625
4.002086
0.894572
-2.085017
-0.792425
-1.198303
44.549201
0.175433
min
27.000000
-5.149766
-7.542499
0
-45.254834
27.000000
0.000000
0.000000
-0.940616
0.000000
-8.000000
4.000000
0.000000
25%
70.500000
-4.369145
-7.638793
0
0.000000
68.500000
0.000000
0.000000
-5.343887
0.000000
-6.627417
13.250000
-1.000000
50%
146.500000
-4.656474
-8.864071
0
0.000000
142.000000
1.000000
0.000000
-4.804400
0.000000
-6.627417
35.500000
-1.000000
75%
258.000000
-5.195835
-6.098009
0
0.000000
254.750000
2.000000
1.750000
-3.253729
0.000000
-6.627417
60.500000
-1.000000
max
677.000000
-10.318461
-7.218612
0
0.000000
668.000000
33.000000
2.000000
-14.123230
-2.485281
-9.372583
255.000000
0.000000
In [95]:
s = blob_features[mapping[0] > 15]
s.describe() - blob_features[mapping[0] <= 15].describe()
Out[95]:
blob_count
avg_radius
std_radius
min_radius
max_radius
small_radius_count
med_radius_count
large_radius_count
density
lower_radius_qt
upper_radius_qt
upper_dist_count
BIRADS
count
-354.000000
-354.000000
-354.000000
-354
-354.000000
-354.000000
-354.000000
-354.000000
-354.000000
-354.000000
-354.000000
-354.000000
-354.000000
mean
-184.119444
8.462616
10.389491
0
3.266872
-183.138889
-1.755556
0.775000
9.421449
3.309106
6.632795
-46.755556
0.477778
std
-159.150286
1.404357
2.819064
0
-1.435178
-156.533161
-3.260285
1.379287
6.049804
-0.061661
0.675160
-47.561481
0.139854
min
7.000000
9.045703
16.948537
0
45.254834
7.000000
0.000000
0.000000
5.892578
3.313708
11.313708
2.000000
0.000000
25%
-59.000000
6.921742
8.875299
0
13.254834
-62.250000
0.000000
0.250000
6.773016
3.313708
6.627417
-14.000000
1.000000
50%
-128.500000
9.397639
5.812960
0
0.000000
-134.000000
-1.500000
1.000000
5.550381
3.313708
6.627417
-28.000000
0.000000
75%
-247.250000
10.136183
12.269981
0
0.000000
-238.250000
-1.250000
0.000000
13.896739
3.313708
6.627417
-55.500000
0.750000
max
-884.000000
-2.746227
2.819096
0
0.000000
-867.000000
-38.000000
-2.000000
-8.563845
2.485281
0.000000
-307.000000
0.000000
In [79]:
blob_features['BIRADS'] = labels
mia.plotting.plot_scattermatrix(blob_features[['avg_radius', 'max_radius', 'blob_count', 'std_radius', 'small_radius_count', 'density', 'large_radius_count', 'BIRADS']], 'BIRADS')
In [14]:
mia.analysis.measure_closeness(mapping, labels['BIRADS'])
Out[14]:
3 10.770177
4 10.036130
1 8.987999
2 7.796480
dtype: float64
In [15]:
mia.plotting.plot_scatter_2d(syn_map, [0,1], 'BIRADS', marker='^', s=50, annotate=True)
plt.show()
In [16]:
syn_feature_meta.loc[random_synthetic_features.index]
Out[16]:
Vol
CmprTh
SkTh
LigThCrs
LigThFn
#cmprts
#cmprts.1
Dperc
VBD
VBD.1
BIRADS
min_speed
max_speed
min_ratio
max_ratio
phantom_name
img_name
test_Mix_DPerc0_c_0.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_1.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_2.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_3.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_4.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_5.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_6.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_7.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_8.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc0_c_9.dcm
436
5
0.5
400
200
333
1000
0
24
21
1
0.5
2
0.5
2
test_Mix_DPerc0_c
test_Mix_DPerc10_c_0.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_1.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_2.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_3.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_4.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_5.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_6.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_7.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_8.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc10_c_9.dcm
436
5
1.5
600
200
333
2000
10
40
33
2 (med)
0.5
2
0.5
2
test_Mix_DPerc10_c
test_Mix_DPerc20_c_0.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_1.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_2.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_3.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_4.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_5.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_6.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_7.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_8.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc20_c_9.dcm
436
5
1.5
600
200
333
2000
20
46
38
2 (hi)
0.5
2
0.5
2
test_Mix_DPerc20_c
test_Mix_DPerc35_c_0.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_1.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_2.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_3.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_4.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_5.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_6.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_7.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_8.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc35_c_9.dcm
436
5
1.5
600
200
333
2000
35
55
47
3
0.5
2
0.5
2
test_Mix_DPerc35_c
test_Mix_DPerc5_c_0.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_1.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_2.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_3.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_4.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_5.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_6.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_7.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_8.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc5_c_9.dcm
436
5
1.5
600
200
333
1000
5
35
27
2 (low)
0.5
2
0.5
2
test_Mix_DPerc5_c
test_Mix_DPerc75_c_0.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_1.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_2.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_3.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_4.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_5.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_6.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_7.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_8.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
test_Mix_DPerc75_c_9.dcm
436
5
1.5
600
200
333
2000
75
67
59
3 or 4
0.5
2
0.5
2
test_Mix_DPerc75_c
In [17]:
mapping.to_csv('/Volumes/Seagate/2015-03-26/mapping-with-both.csv')
Compare synthetics with the hologic dataset by class.
In [18]:
birads_class = labels['BIRADS'] == 1
syn_class = random_synthetic_features[birads_class]
hol_class = hologic_blob_features[birads_class]
syn_class.describe() - hol_class.describe()
/Users/samuel/git/major-project/lib/python2.7/site-packages/pandas/core/frame.py:1808: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
"DataFrame index.", UserWarning)
Out[18]:
blob_count
avg_radius
std_radius
min_radius
max_radius
small_radius_count
med_radius_count
large_radius_count
density
lower_radius_qt
upper_radius_qt
upper_dist_count
count
-46.000000
-46.000000
-46.000000
-46
-46.000000
-46.000000
-46.000000
-46.000000
-46.000000
-46.000000
-46.000000
-46.000000
mean
-256.210714
6.854786
0.445133
0
-73.988703
-260.278571
-0.625000
4.692857
-3.262426
1.325483
12.809197
-54.417857
std
-139.370870
0.281655
-4.794043
0
-38.634888
-136.698561
-3.282583
-0.777543
-2.506746
1.711192
3.280094
-48.560634
min
-31.000000
8.328287
13.598249
0
45.254834
-42.000000
2.000000
6.000000
0.867475
0.000000
11.313708
-2.000000
25%
-187.500000
5.930656
2.900174
0
-90.509668
-189.500000
0.000000
5.250000
-1.220780
0.000000
7.798990
-30.500000
50%
-219.000000
6.637435
-1.409568
0
-90.509668
-224.000000
0.000000
6.000000
-2.646244
0.000000
16.000000
-43.000000
75%
-316.500000
7.947400
-3.140442
0
-90.509668
-321.500000
0.750000
4.000000
-4.419133
3.313708
16.000000
-53.000000
max
-757.000000
3.607515
-8.962121
0
-90.509668
-758.000000
-26.000000
-1.000000
-15.833031
3.313708
12.686292
-246.000000
In [19]:
blob_norm = mia.analysis.normalize_data_frame(blob_features)
blob_norm.columns = blob_features.columns.values
blob_norm['BIRADS'] = labels
columns = filter(lambda c: c not in ['min_radius'], blob_norm.columns)
plotting.parallel_coordinates(blob_norm[columns], 'BIRADS')
plt.show()
Content source: samueljackson92/major-project-data
Similar notebooks: