In [1]:
%matplotlib qt
import pandas as pd
import numpy as np
import mia

In [3]:
b1 = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-03-29/2015-03-29-real-blobs1.csv")
b2 = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-03-29/2015-03-29-real-blobs2.csv")
hologic = pd.concat([b1, b2])
hologic.head()


Out[3]:
x y radius count mean std min 25% 50% 75% max skew kurtosis
p214-010-60001-cl.png 1842 546 128.000000 65536 0.562008 0.152742 0.149020 0.447059 0.564706 0.670588 0.956863 0.092693 -0.556121
p214-010-60001-cl.png 1482 424 128.000000 65536 0.555893 0.151427 0.066667 0.466667 0.580392 0.662745 0.937255 -0.564972 -0.210093
p214-010-60001-cl.png 1355 386 128.000000 65536 0.537767 0.187392 0.066667 0.435294 0.572549 0.678431 0.937255 -0.692950 -0.311739
p214-010-60001-cl.png 2072 658 45.254834 8100 0.596765 0.150992 0.082353 0.521569 0.631373 0.701961 0.858824 -1.038894 0.729220
p214-010-60001-cl.png 1955 737 45.254834 8100 0.543748 0.176320 0.160784 0.396078 0.533333 0.690196 0.909804 0.096619 -1.081598

In [4]:
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, "/Volumes/Seagate/mmp_data/meta_data/BIRADS.csv")
hologic_meta.head()


Out[4]:
patient_id side view img_name BIRADS img_number
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1
p214-010-60001-cl.png 21401060001 c l p214-010-60001-cl.png 3 1

In [5]:
hologic_intensity = hologic[hologic.columns[3:]]
hologic.head()


Out[5]:
x y radius count mean std min 25% 50% 75% max skew kurtosis
p214-010-60001-cl.png 1842 546 128.000000 65536 0.562008 0.152742 0.149020 0.447059 0.564706 0.670588 0.956863 0.092693 -0.556121
p214-010-60001-cl.png 1482 424 128.000000 65536 0.555893 0.151427 0.066667 0.466667 0.580392 0.662745 0.937255 -0.564972 -0.210093
p214-010-60001-cl.png 1355 386 128.000000 65536 0.537767 0.187392 0.066667 0.435294 0.572549 0.678431 0.937255 -0.692950 -0.311739
p214-010-60001-cl.png 2072 658 45.254834 8100 0.596765 0.150992 0.082353 0.521569 0.631373 0.701961 0.858824 -1.038894 0.729220
p214-010-60001-cl.png 1955 737 45.254834 8100 0.543748 0.176320 0.160784 0.396078 0.533333 0.690196 0.909804 0.096619 -1.081598

Group intensity by just the image

Taking the mean of all intensity features, across all scales.


In [6]:
group = hologic_intensity.groupby(hologic_meta.index)
features = group.apply(lambda x: x.mean())
features.head()


Out[6]:
count mean std min 25% 50% 75% max skew kurtosis
p214-010-60001-cl.png 6301.454545 0.565157 0.113734 0.243405 0.494831 0.577718 0.649242 0.803832 -0.416347 -0.120056
p214-010-60001-cr.png 2347.259259 0.581199 0.095119 0.299976 0.521980 0.590753 0.649879 0.785815 -0.430101 0.177652
p214-010-60001-ml.png 3254.320000 0.564710 0.092151 0.292196 0.505412 0.570647 0.631696 0.780902 -0.326400 -0.012157
p214-010-60001-mr.png 2130.638298 0.538615 0.096085 0.271367 0.476505 0.545418 0.606911 0.759282 -0.291700 0.021185
p214-010-60005-cl.png 3766.416667 0.569159 0.080461 0.309600 0.519700 0.577083 0.627237 0.745425 -0.482015 0.229172

In [7]:
mapping = mia.analysis.tSNE(features, verbose=2, learning_rate=300, perplexity=30)


[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 360 / 360
[t-SNE] Mean sigma: 0.914008
[t-SNE] Iteration 10: error = 15.9791306, gradient norm = 0.1726786
[t-SNE] Iteration 20: error = 12.8774800, gradient norm = 0.1636577
[t-SNE] Iteration 30: error = 12.6499411, gradient norm = 0.1538650
[t-SNE] Iteration 40: error = 12.8271935, gradient norm = 0.1489896
[t-SNE] Iteration 50: error = 13.1328890, gradient norm = 0.1379995
[t-SNE] Iteration 60: error = 13.4444826, gradient norm = 0.1573096
[t-SNE] Iteration 70: error = 13.0325209, gradient norm = 0.1509758
[t-SNE] Iteration 80: error = 13.1104162, gradient norm = 0.1374574
[t-SNE] Iteration 83: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 83 iterations with early exaggeration: 13.216586
[t-SNE] Iteration 90: error = 0.8800121, gradient norm = 0.0234803
[t-SNE] Iteration 100: error = 0.6197095, gradient norm = 0.0080950
[t-SNE] Iteration 110: error = 0.5870259, gradient norm = 0.0027154
[t-SNE] Iteration 120: error = 0.5782899, gradient norm = 0.0010706
[t-SNE] Iteration 130: error = 0.5749701, gradient norm = 0.0006860
[t-SNE] Iteration 140: error = 0.5733588, gradient norm = 0.0005958
[t-SNE] Iteration 150: error = 0.5724412, gradient norm = 0.0005811
[t-SNE] Iteration 160: error = 0.5718658, gradient norm = 0.0005860
[t-SNE] Iteration 170: error = 0.5715125, gradient norm = 0.0005724
[t-SNE] Iteration 180: error = 0.5713154, gradient norm = 0.0005631
[t-SNE] Iteration 190: error = 0.5712017, gradient norm = 0.0005585
[t-SNE] Iteration 200: error = 0.5711347, gradient norm = 0.0005563
[t-SNE] Iteration 210: error = 0.5710949, gradient norm = 0.0005551
[t-SNE] Iteration 220: error = 0.5710712, gradient norm = 0.0005544
[t-SNE] Iteration 230: error = 0.5710571, gradient norm = 0.0005540
[t-SNE] Iteration 240: error = 0.5710486, gradient norm = 0.0005538
[t-SNE] Iteration 250: error = 0.5710435, gradient norm = 0.0005537
[t-SNE] Iteration 260: error = 0.5710405, gradient norm = 0.0005536
[t-SNE] Iteration 270: error = 0.5710387, gradient norm = 0.0005535
[t-SNE] Iteration 277: error difference 0.000000. Finished.
[t-SNE] Error after 277 iterations: 0.571038

In [7]:
mia.plotting.plot_scatter_2d(mapping, [0,1], hologic_meta.drop_duplicates().BIRADS)


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x118f8c110>

Grouping by the scale


In [18]:
group = hologic_intensity.groupby([hologic.radius, hologic.index])
img_by_scale = group.apply(lambda x: x.mean())
img_by_scale


Out[18]:
count mean std min 25% 50% 75% max skew kurtosis
radius
8.000000 p214-010-60001-cl.png 256 0.541175 0.099843 0.299020 0.471160 0.546895 0.619036 0.738562 -0.229121 -0.497344
p214-010-60001-cr.png 256 0.577706 0.080117 0.362465 0.527731 0.586415 0.637185 0.737955 -0.386385 -0.152674
p214-010-60001-ml.png 256 0.567857 0.071945 0.358456 0.523070 0.573591 0.619730 0.725735 -0.383011 0.072440
p214-010-60001-mr.png 256 0.533514 0.092172 0.288391 0.477575 0.544102 0.598693 0.722565 -0.403191 0.154246
p214-010-60005-cl.png 256 0.562343 0.067850 0.362092 0.520752 0.568758 0.610850 0.698824 -0.454204 0.201144
p214-010-60005-cr.png 256 0.607468 0.067159 0.405333 0.565255 0.616235 0.656078 0.749647 -0.492056 0.086902
p214-010-60005-ml.png 256 0.560256 0.071800 0.343863 0.517901 0.567102 0.611747 0.710167 -0.498128 0.530424
p214-010-60005-mr.png 256 0.520246 0.084436 0.284477 0.471262 0.527247 0.578309 0.693709 -0.396797 0.172751
p214-010-60008-cl.png 256 0.443514 0.056570 0.291128 0.407989 0.445040 0.481285 0.587886 -0.063950 0.006274
p214-010-60008-cr.png 256 0.424105 0.061033 0.262950 0.387024 0.426528 0.465462 0.574471 -0.103632 0.032360
p214-010-60008-ml.png 256 0.421016 0.057272 0.272885 0.386204 0.422823 0.459212 0.560360 -0.082283 0.000739
p214-010-60008-mr.png 256 0.430683 0.064713 0.263161 0.388417 0.431931 0.475237 0.592105 -0.095415 -0.025644
p214-010-60012-cl.png 256 0.415088 0.069021 0.237393 0.368696 0.415755 0.462766 0.593206 -0.062281 -0.136841
p214-010-60012-cr.png 256 0.537504 0.080374 0.318697 0.486730 0.543592 0.596691 0.707143 -0.365248 0.073103
p214-010-60012-ml.png 256 0.492951 0.075240 0.296379 0.442657 0.496379 0.546819 0.671289 -0.178246 -0.247849
p214-010-60012-mr.png 256 0.532371 0.073184 0.330710 0.482850 0.535819 0.585701 0.703424 -0.242034 -0.162680
p214-010-60013-cl.png 256 0.527141 0.069084 0.337212 0.480158 0.530946 0.577323 0.685934 -0.233861 -0.215146
p214-010-60013-cr.png 256 0.532728 0.076566 0.317576 0.494955 0.541462 0.583725 0.690980 -0.347531 0.089974
p214-010-60013-ml.png 256 0.475882 0.072580 0.278489 0.427759 0.479431 0.528023 0.645809 -0.225998 -0.194242
p214-010-60013-mr.png 256 0.502266 0.082885 0.274875 0.448518 0.511917 0.562798 0.672139 -0.427612 0.060877
p214-010-60020-cl.png 256 0.393171 0.047735 0.268403 0.360511 0.393814 0.426528 0.515996 -0.042920 -0.181987
p214-010-60020-cr.png 256 0.390073 0.049312 0.261187 0.355996 0.390244 0.424375 0.520763 -0.006968 -0.168276
p214-010-60020-ml.png 256 0.391385 0.036081 0.296680 0.366605 0.392460 0.416169 0.483252 -0.036200 -0.142244
p214-010-60020-mr.png 256 0.390972 0.040667 0.284742 0.364382 0.392276 0.419192 0.491717 -0.084466 -0.127795
p214-010-60026-cl.png 256 0.526447 0.070978 0.356401 0.480565 0.539331 0.579123 0.661130 -0.318147 -0.325279
p214-010-60026-cr.png 256 0.514322 0.058829 0.357108 0.482721 0.522181 0.555331 0.641176 -0.355155 -0.025118
p214-010-60026-ml.png 256 0.538515 0.060301 0.352381 0.501401 0.543697 0.581478 0.672129 -0.438997 0.168541
p214-010-60026-mr.png 256 0.567348 0.056484 0.401032 0.530857 0.573787 0.607430 0.693292 -0.413658 0.002706
p214-010-60029-cl.png 256 0.514629 0.058869 0.340946 0.479700 0.518627 0.555421 0.648097 -0.280829 0.028592
p214-010-60029-cr.png 256 0.538468 0.049113 0.387364 0.511275 0.542593 0.571351 0.646405 -0.488463 0.558199
... ... ... ... ... ... ... ... ... ... ... ...
181.019336 p214-010-61062-cr.png 131044 0.423493 0.053798 0.225770 0.386555 0.421849 0.457143 0.686275 0.229873 0.166671
p214-010-61062-ml.png 131044 0.467729 0.061150 0.245752 0.426144 0.465359 0.508497 0.724183 0.166462 -0.072041
p214-010-61062-mr.png 131044 0.454281 0.063189 0.210980 0.412549 0.453333 0.495686 0.752157 0.025026 0.197683
p214-010-61236-cl.png 131044 0.489532 0.068783 0.235294 0.443137 0.490196 0.537255 0.784314 -0.000317 -0.157551
p214-010-61236-cr.png 131044 0.451307 0.079533 0.166667 0.404575 0.452288 0.501961 0.753595 -0.316477 1.127885
p214-010-61236-ml.png 131044 0.477158 0.069419 0.224510 0.428431 0.475490 0.523529 0.768627 0.151063 -0.111429
p214-010-61236-mr.png 131044 0.505840 0.067155 0.211765 0.458824 0.504314 0.550588 0.778039 0.095942 0.031065
p214-010-61445-cl.png 131044 0.424306 0.065117 0.188235 0.380392 0.423529 0.466667 0.694118 0.060990 0.071049
p214-010-61445-cr.png 131044 0.439780 0.061065 0.227451 0.396078 0.435294 0.478431 0.709804 0.226033 -0.048578
p214-010-61445-ml.png 131044 0.436651 0.051564 0.250980 0.401961 0.433333 0.468627 0.668627 0.273889 0.107146
p214-010-61626-cl.png 131044 0.368310 0.060872 0.172549 0.325490 0.360784 0.403922 0.669281 0.540429 0.409959
p214-010-61626-cr.png 131044 0.362193 0.057352 0.173529 0.323529 0.357843 0.398039 0.658824 0.431218 0.329665
p214-010-61626-ml.png 131044 0.428251 0.058135 0.117647 0.388235 0.425490 0.466667 0.686275 0.166555 0.771183
p214-010-61626-mr.png 131044 0.434014 0.065696 0.141176 0.390850 0.431373 0.474510 0.747712 -0.130535 2.799299
p214-010-61823-cl.png 131044 0.449410 0.078618 0.169748 0.395518 0.447059 0.500840 0.775350 0.144123 0.051804
p214-010-61823-cr.png 131044 0.464370 0.095706 0.135294 0.421569 0.471569 0.522549 0.786275 -0.401553 0.810778
p214-010-61823-ml.png 131044 0.468087 0.059323 0.250980 0.427451 0.468627 0.507843 0.719608 0.042925 -0.096569
p214-010-61823-mr.png 131044 0.444546 0.073938 0.163922 0.395294 0.441569 0.491765 0.776471 -0.098477 1.704905
p214-010-62144-cl.png 131044 0.468940 0.068753 0.221078 0.420588 0.467157 0.513725 0.767157 0.196672 0.015201
p214-010-62144-cr.png 131044 0.436896 0.065930 0.203361 0.389916 0.433053 0.480112 0.729412 0.241605 -0.063924
p214-010-62144-ml.png 131044 0.469036 0.061524 0.194510 0.426667 0.465882 0.507451 0.779608 0.205827 0.634529
p214-010-62144-mr.png 131044 0.442979 0.081389 0.176471 0.406078 0.449412 0.494118 0.771765 -0.185726 0.457939
p214-010-62326-cl.png 131044 0.424817 0.082457 0.177451 0.365686 0.417647 0.476471 0.799020 0.408020 -0.019690
p214-010-62326-cr.png 131044 0.479123 0.087026 0.194958 0.419048 0.475070 0.535574 0.826891 0.237127 0.041858
p214-010-62326-ml.png 131044 0.458899 0.088714 0.104575 0.396078 0.452288 0.516340 0.810458 0.293467 -0.080665
p214-010-62326-mr.png 131044 0.525966 0.091743 0.223529 0.461438 0.518954 0.586928 0.861438 0.250958 -0.120667
p214-010-62465-cl.png 131044 0.470173 0.062720 0.235294 0.429412 0.470098 0.512745 0.726471 -0.001126 0.032793
p214-010-62465-cr.png 131044 0.469661 0.074176 0.167507 0.425770 0.468908 0.514846 0.776471 -0.373294 2.825870
p214-010-62465-ml.png 131044 0.466810 0.059551 0.221176 0.428235 0.465882 0.504706 0.729804 -0.192587 1.685467
p214-010-62465-mr.png 131044 0.443260 0.064651 0.194510 0.401569 0.442353 0.484706 0.741176 -0.143590 1.330631

3208 rows × 10 columns


In [19]:
scale_groups = img_by_scale.groupby(level=0)
intensity_by_scale = pd.DataFrame(index=img_by_scale.index.levels[1])

for i,x in scale_groups:
    x = x.reset_index(level=0)
    intensity_by_scale = intensity_by_scale.join(x, rsuffix='_%f' % i)

intensity_by_scale.fillna(0, inplace=True)
intensity_by_scale.head()


Out[19]:
radius count mean std min 25% 50% 75% max skew ... count_181.019336 mean_181.019336 std_181.019336 min_181.019336 25%_181.019336 50%_181.019336 75%_181.019336 max_181.019336 skew_181.019336 kurtosis_181.019336
p214-010-60001-cl.png 8 256 0.541175 0.099843 0.299020 0.471160 0.546895 0.619036 0.738562 -0.229121 ... 0 0.0000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
p214-010-60001-cr.png 8 256 0.577706 0.080117 0.362465 0.527731 0.586415 0.637185 0.737955 -0.386385 ... 0 0.0000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
p214-010-60001-ml.png 8 256 0.567857 0.071945 0.358456 0.523070 0.573591 0.619730 0.725735 -0.383011 ... 0 0.0000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
p214-010-60001-mr.png 8 256 0.533514 0.092172 0.288391 0.477575 0.544102 0.598693 0.722565 -0.403191 ... 0 0.0000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
p214-010-60005-cl.png 8 256 0.562343 0.067850 0.362092 0.520752 0.568758 0.610850 0.698824 -0.454204 ... 131044 0.6213 0.126696 0.086275 0.545098 0.639216 0.717647 0.898039 -0.848023 0.758542

5 rows × 110 columns


In [20]:
scale_intensity_mapping = mia.analysis.tSNE(intensity_by_scale, verbose=2, early_exaggeration=4.0, learning_rate=300, n_components=2)


[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 360 / 360
[t-SNE] Mean sigma: 3.756919
[t-SNE] Iteration 10: error = 16.4016013, gradient norm = 0.1688268
[t-SNE] Iteration 20: error = 14.7862626, gradient norm = 0.1617319
[t-SNE] Iteration 30: error = 14.7653099, gradient norm = 0.1496908
[t-SNE] Iteration 40: error = 14.9401848, gradient norm = 0.1421683
[t-SNE] Iteration 50: error = 14.5767463, gradient norm = 0.1430706
[t-SNE] Iteration 60: error = 14.3711297, gradient norm = 0.1565211
[t-SNE] Iteration 70: error = 14.3838250, gradient norm = 0.1528340
[t-SNE] Iteration 80: error = 14.8325966, gradient norm = 0.1385605
[t-SNE] Iteration 83: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 83 iterations with early exaggeration: 15.138149
[t-SNE] Iteration 90: error = 1.1324730, gradient norm = 0.0276323
[t-SNE] Iteration 100: error = 0.6782669, gradient norm = 0.0121122
[t-SNE] Iteration 110: error = 0.6024729, gradient norm = 0.0040551
[t-SNE] Iteration 120: error = 0.5854538, gradient norm = 0.0017706
[t-SNE] Iteration 130: error = 0.5761196, gradient norm = 0.0011355
[t-SNE] Iteration 140: error = 0.5706134, gradient norm = 0.0010904
[t-SNE] Iteration 150: error = 0.5666443, gradient norm = 0.0011026
[t-SNE] Iteration 160: error = 0.5638926, gradient norm = 0.0008925
[t-SNE] Iteration 170: error = 0.5627418, gradient norm = 0.0007677
[t-SNE] Iteration 180: error = 0.5622281, gradient norm = 0.0007349
[t-SNE] Iteration 190: error = 0.5619537, gradient norm = 0.0007282
[t-SNE] Iteration 200: error = 0.5617920, gradient norm = 0.0007300
[t-SNE] Iteration 210: error = 0.5616925, gradient norm = 0.0007347
[t-SNE] Iteration 220: error = 0.5616306, gradient norm = 0.0007395
[t-SNE] Iteration 230: error = 0.5615924, gradient norm = 0.0007432
[t-SNE] Iteration 240: error = 0.5615688, gradient norm = 0.0007457
[t-SNE] Iteration 250: error = 0.5615545, gradient norm = 0.0007474
[t-SNE] Iteration 260: error = 0.5615459, gradient norm = 0.0007485
[t-SNE] Iteration 270: error = 0.5615406, gradient norm = 0.0007492
[t-SNE] Iteration 280: error = 0.5615375, gradient norm = 0.0007496
[t-SNE] Iteration 290: error = 0.5615356, gradient norm = 0.0007498
[t-SNE] Iteration 298: error difference 0.000000. Finished.
[t-SNE] Error after 298 iterations: 0.561535

In [30]:
mia.plotting.plot_scatter_2d(scale_intensity_mapping, [0,1], hologic_meta.drop_duplicates().BIRADS)


Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x1210f9e90>

In [29]:
scale_intensity_mapping.to_csv('/Volumes/Seagate/mmp_data/2015-03-29/2015-03-29-real-blobs-mapping.csv')

In [35]:
left_cluster = intensity_by_scale[scale_intensity_mapping[0] < 0]
right_cluster = intensity_by_scale[scale_intensity_mapping[1] >= 0]

left_cluster.describe() - right_cluster.describe()


Out[35]:
radius count mean std min 25% 50% 75% max skew ... count_181.019336 mean_181.019336 std_181.019336 min_181.019336 25%_181.019336 50%_181.019336 75%_181.019336 max_181.019336 skew_181.019336 kurtosis_181.019336
count 10 10 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 ... 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000
mean 0 0 -0.041455 -0.009067 -0.015293 -0.035778 -0.043643 -0.048935 -0.051689 0.118502 ... 49676.810458 0.156187 0.020916 0.079722 0.142632 0.154198 0.167900 0.272427 0.089696 0.332090
std 0 0 -0.018827 -0.005417 -0.011185 -0.017092 -0.019940 -0.022112 -0.022432 -0.036502 ... 3661.199205 -0.010322 -0.011943 0.032754 -0.000943 -0.013129 -0.022539 -0.007514 0.120809 0.563803
min 0 0 -0.023204 0.003641 0.000000 -0.018029 -0.023208 -0.027822 -0.031980 0.208785 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -0.228587 0.318250
25% 0 0 -0.023505 -0.005327 -0.004373 -0.015647 -0.022595 -0.024271 -0.039922 0.128846 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -0.006782 -0.022706
50% 0 0 -0.038009 -0.007710 -0.012497 -0.034340 -0.037971 -0.043126 -0.040652 0.118385 ... 131044.000000 0.425070 0.062707 0.135784 0.370588 0.424020 0.474020 0.719468 0.000000 0.000000
75% 0 0 -0.053795 -0.013147 -0.025892 -0.045028 -0.057560 -0.065187 -0.072254 0.091100 ... 0.000000 0.019604 0.003122 0.140294 0.049020 0.029575 0.014216 0.060131 0.190089 0.197829
max 0 0 -0.114578 -0.044181 -0.039558 -0.107378 -0.122456 -0.125365 -0.121307 0.014102 ... 0.000000 -0.087184 -0.044180 0.060784 -0.066667 -0.096078 -0.113725 -0.003922 0.660110 1.925030

8 rows × 110 columns