In [1]:
%matplotlib inline
import pandas as pd
from pandas.tools import plotting
import mia

In [2]:
raw = pd.DataFrame.from_csv('../results/2015-03-15-results-blobs.csv')
raw.index = raw.image_name
raw.head()


Out[2]:
x y radius image_name patient_id view side class
image_name
p214-010-60001-cl.png 1639 536 128.000000 p214-010-60001-cl.png 21401060001 c l 3
p214-010-60001-cl.png 1335 328 128.000000 p214-010-60001-cl.png 21401060001 c l 3
p214-010-60001-cl.png 1867 636 64.000000 p214-010-60001-cl.png 21401060001 c l 3
p214-010-60001-cl.png 2072 648 45.254834 p214-010-60001-cl.png 21401060001 c l 3
p214-010-60001-cl.png 2016 839 45.254834 p214-010-60001-cl.png 21401060001 c l 3

In [3]:
features = mia.reduction.feature_statistics(raw)
features.describe()


Out[3]:
blob_count avg_radius std_radius min_radius max_radius small_radius_count med_radius_count large_radius_count density lower_radius_qt upper_radius_qt upper_dist_count patient_id class
count 360.000000 360.000000 360.000000 360 360.000000 360.000000 360.000000 360.000000 360.000000 360.000000 360.000000 360.000000 3.600000e+02 360.000000
mean 156.852778 18.693714 21.523458 8 153.189790 151.250000 2.725000 2.877778 60.871859 8.197902 18.691750 41.675000 2.140106e+10 2.533333
std 99.265677 4.080252 7.966148 0 39.602474 98.051511 3.780088 1.566201 13.039158 0.764154 4.394923 32.620762 5.295273e+02 0.958178
min 26.000000 12.256703 6.094591 8 45.254834 23.000000 0.000000 1.000000 42.650978 8.000000 11.313708 5.000000 2.140106e+10 1.000000
25% 82.000000 15.775176 15.420732 8 128.000000 77.750000 1.000000 2.000000 51.612142 8.000000 16.000000 20.000000 2.140106e+10 2.000000
50% 132.500000 17.922227 21.204195 8 181.019336 127.500000 2.000000 2.000000 57.892584 8.000000 16.000000 31.000000 2.140106e+10 3.000000
75% 200.250000 20.493649 26.711628 8 181.019336 190.500000 3.000000 4.000000 66.955485 8.000000 22.627417 52.000000 2.140106e+10 3.000000
max 631.000000 37.188077 51.391117 8 181.019336 628.000000 34.000000 8.000000 111.438717 11.313708 45.254834 241.000000 2.140106e+10 4.000000

In [4]:
r = raw[raw['radius'] > 30]
mia.plotting.plot_risk_classes(r, 'radius')



In [16]:
mia.plotting.plot_risk_classes(features, 'upper_dist_count')



In [30]:
selected_columns = ['blob_count', 'avg_radius', 'std_radius', 'upper_radius_qt']
selected_features = features[selected_columns]
selected_features.describe()


Out[30]:
blob_count avg_radius std_radius upper_radius_qt
count 360.000000 360.000000 360.000000 360.000000
mean 156.852778 18.693714 21.523458 18.691750
std 99.265677 4.080252 7.966148 4.394923
min 26.000000 12.256703 6.094591 11.313708
25% 82.000000 15.775176 15.420732 16.000000
50% 132.500000 17.922227 21.204195 16.000000
75% 200.250000 20.493649 26.711628 22.627417
max 631.000000 37.188077 51.391117 45.254834

In [37]:
%matplotlib qt
mapping = mia.analysis.tSNE(selected_features, n_components=3, learning_rate=200, perplexity=35,
                            early_exaggeration=4.0, verbose=2)
mapping['class'] = features['class']
# mia.plotting.plot_scatter_2d(mapping, [0,1], labels='class')
mia.plotting.plot_scatter_3d(mapping, [0,1,2], labels=mapping['class'])


[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 360 / 360
[t-SNE] Mean sigma: 0.488665
[t-SNE] Iteration 10: error = 16.3209470, gradient norm = 0.1531477
[t-SNE] Iteration 20: error = 12.1055069, gradient norm = 0.1728686
[t-SNE] Iteration 30: error = 11.4114051, gradient norm = 0.1705614
[t-SNE] Iteration 40: error = 11.6730105, gradient norm = 0.1540901
[t-SNE] Iteration 50: error = 11.7546230, gradient norm = 0.1419830
[t-SNE] Iteration 60: error = 12.2627918, gradient norm = 0.1569128
[t-SNE] Iteration 70: error = 11.2874802, gradient norm = 0.1600637
[t-SNE] Iteration 80: error = 11.4941722, gradient norm = 0.1550822
[t-SNE] Iteration 83: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 83 iterations with early exaggeration: 11.176040
[t-SNE] Iteration 90: error = 0.5433609, gradient norm = 0.0200394
[t-SNE] Iteration 100: error = 0.3091525, gradient norm = 0.0080954
[t-SNE] Iteration 110: error = 0.2608823, gradient norm = 0.0041306
[t-SNE] Iteration 120: error = 0.2478559, gradient norm = 0.0025223
[t-SNE] Iteration 130: error = 0.2406393, gradient norm = 0.0024766
[t-SNE] Iteration 140: error = 0.2368743, gradient norm = 0.0010233
[t-SNE] Iteration 150: error = 0.2378585, gradient norm = 0.0007322
[t-SNE] Iteration 160: error = 0.2381354, gradient norm = 0.0006945
[t-SNE] Iteration 170: did not make any progress during the last 30 episodes. Finished.
[t-SNE] Error after 170 iterations: 0.238122