In [1]:
# %matplotlib inline
import pandas as pd
from pandas.tools import plotting
import mia
In [2]:
raw = pd.DataFrame.from_csv('../results/synthetics2/2015-03-15-results_blobs.csv')
raw.index = raw.img_name
raw = raw.sort()
raw.head()
Out[2]:
In [3]:
labels = mia.reduction.load_synthetic_meta_data('../synthetic_labels.csv')
labels['group_id'] = labels.index.values
labels
In [ ]:
import re
regex_string = r"test_Mix_DPerc(\d+)_c_\d+.dcm"
name_regex = re.compile(regex_string)
m = []
for name in raw.index.values:
gid = re.match(name_regex, name).group(1)
meta_info = labels[labels['group_id'] == int(gid)]
m.append(meta_info)
meta_data = pd.concat(m, ignore_index=True)
meta_data.index = raw.index.values
meta_data.head()
# meta_data = mia.reduction.create_meta_data_for_synthetic_mammogram(raw, labels)
# meta_data.head()
In [ ]:
raw['class'] = meta_data.group_id
mia.plotting.plot_risk_classes_single(r, 'radius')
In [ ]:
features = pd.DataFrame()
for index, frame in raw.groupby('img_name'):
shape_props = mia.features.blobs.blob_props(frame)
features = pd.concat([features, shape_props], ignore_index=True)
features.index = raw['img_name'].unique()
features.head()
In [ ]:
# %matplotlib qt
%matplotlib inline
meta_data['index'] = meta_data.index
md = meta_data.drop_duplicates(subset=['index'])
features['class'] = md.group_id
In [ ]:
mia.plotting.plot_risk_classes_single(features[(features['class'] == 5) | (features['class'] == 10)], 'avg_radius')
In [ ]:
selected_columns = ['blob_count', 'avg_radius', 'std_radius', 'max_radius', 'upper_radius_qt', 'large_radius_count', 'med_radius_count', 'density']
selected_features = features[selected_columns]
selected_features.describe()
# selected_features['class'] = md['class']
# mia.plotting.plot_scattermatrix(selected_features, label_name='class')
selected_features.head()
In [ ]:
features_norm = mia.analysis.normalize_data_frame(features)
features_norm.columns = features.columns
features_norm['class'] = md['BIRADS']
features_norm.head()
In [ ]:
%matplotlib qt
columns = [c for c in features_norm.columns if c not in []]
plotting.parallel_coordinates(features_norm[columns], 'class')
In [ ]:
%matplotlib qt
mapping = mia.analysis.tSNE(selected_features, learning_rate=100, perplexity=10,
early_exaggeration=5.0, verbose=2)
mapping['class'] = md.group_id
mia.plotting.plot_scatter_2d(mapping, [0,1], labels='class')
In [ ]:
selected_features[[0,1]] = mapping[[0,1]]
class_2 = selected_features[md.BIRADS == 2]
upper_cluster = class_2[class_2[1] > 10]
lower_cluster = class_2[class_2[1] <= 10]
In [ ]:
upper_cluster.describe() - lower_cluster.describe()
In [ ]:
point = selected_features[selected_features.index.values == 'test_Mix_DPerc75_c_6.dcm']
class_3 = selected_features[md.BIRADS == 4]
class_3.describe()
In [ ]:
point
In [ ]:
selected_features.head()