In [1]:
# %matplotlib inline
import pandas as pd
from pandas.tools import plotting
import mia

In [2]:
raw = pd.DataFrame.from_csv('../results/synthetics2/2015-03-15-results_blobs.csv')
raw.index = raw.img_name
raw = raw.sort()
raw.head()


Out[2]:
x y radius img_name
img_name
test_Mix_DPerc0_c_0.dcm 1619 569 8.000000 test_Mix_DPerc0_c_0.dcm
test_Mix_DPerc0_c_0.dcm 1654 636 64.000000 test_Mix_DPerc0_c_0.dcm
test_Mix_DPerc0_c_0.dcm 1634 1564 64.000000 test_Mix_DPerc0_c_0.dcm
test_Mix_DPerc0_c_0.dcm 1581 837 90.509668 test_Mix_DPerc0_c_0.dcm
test_Mix_DPerc0_c_0.dcm 1604 730 64.000000 test_Mix_DPerc0_c_0.dcm

In [3]:
labels = mia.reduction.load_synthetic_meta_data('../synthetic_labels.csv')
labels['group_id'] = labels.index.values
labels


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-3-d82c596344b1> in <module>()
----> 1 labels = mia.reduction.load_synthetic_meta_data('../synthetic_labels.csv')
      2 labels['group_id'] = labels.index.values
      3 labels

AttributeError: 'module' object has no attribute 'load_synthetic_meta_data'

In [ ]:
import re
regex_string = r"test_Mix_DPerc(\d+)_c_\d+.dcm"
name_regex = re.compile(regex_string)

m = []
for name in raw.index.values:
    gid = re.match(name_regex, name).group(1)
    meta_info = labels[labels['group_id'] == int(gid)]
    m.append(meta_info)
    
meta_data = pd.concat(m, ignore_index=True)
meta_data.index = raw.index.values
meta_data.head()

# meta_data = mia.reduction.create_meta_data_for_synthetic_mammogram(raw, labels)
# meta_data.head()

In [ ]:
raw['class'] = meta_data.group_id
mia.plotting.plot_risk_classes_single(r, 'radius')

In [ ]:
features = pd.DataFrame()
for index, frame in raw.groupby('img_name'):
    shape_props = mia.features.blobs.blob_props(frame)
    features = pd.concat([features, shape_props], ignore_index=True)

features.index = raw['img_name'].unique() 
features.head()

In [ ]:
# %matplotlib qt
%matplotlib inline

meta_data['index'] = meta_data.index
md = meta_data.drop_duplicates(subset=['index'])

features['class'] = md.group_id

In [ ]:
mia.plotting.plot_risk_classes_single(features[(features['class'] == 5) | (features['class'] == 10)], 'avg_radius')

In [ ]:
selected_columns = ['blob_count', 'avg_radius', 'std_radius', 'max_radius', 'upper_radius_qt', 'large_radius_count', 'med_radius_count', 'density']
selected_features = features[selected_columns]
selected_features.describe()
# selected_features['class'] = md['class']
# mia.plotting.plot_scattermatrix(selected_features, label_name='class')
selected_features.head()

In [ ]:
features_norm = mia.analysis.normalize_data_frame(features)
features_norm.columns = features.columns
features_norm['class'] = md['BIRADS']
features_norm.head()

Radviz of Shape Features

The graph below shows that the features are most dependant on:

  • max_radius
  • s.d. of the radius
  • avg_radius

Other things of note are:

  • the upper qt has a far smaller pull on the data compared to the real mammograms.
  • average radius and blob count play more of a factor

In [ ]:
%matplotlib qt
columns = [c for c in features_norm.columns if c not in []]
plotting.parallel_coordinates(features_norm[columns], 'class')

In [ ]:
%matplotlib qt
mapping = mia.analysis.tSNE(selected_features, learning_rate=100, perplexity=10,
                            early_exaggeration=5.0, verbose=2)
mapping['class'] = md.group_id
mia.plotting.plot_scatter_2d(mapping, [0,1], labels='class')

In [ ]:
selected_features[[0,1]] = mapping[[0,1]]

class_2 = selected_features[md.BIRADS == 2]
upper_cluster = class_2[class_2[1] > 10]
lower_cluster = class_2[class_2[1] <= 10]

In [ ]:
upper_cluster.describe() - lower_cluster.describe()

In [ ]:
point = selected_features[selected_features.index.values == 'test_Mix_DPerc75_c_6.dcm']
class_3 = selected_features[md.BIRADS == 4]
class_3.describe()

In [ ]:
point

In [ ]:
selected_features.head()