In [14]:
%matplotlib qt
import pandas as pd
import numpy as np
import mia
Loading the hologic and synthetic datasets.
In [15]:
hologic = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-01/hologic.csv")
phantom = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-01/synthetics1-blobs.csv")
Loading the meta data for the real and synthetic datasets.
In [16]:
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, "/Volumes/Seagate/mmp_data/meta_data/BIRADS.csv")
phantom_meta = mia.analysis.create_synthetic_meta_data(phantom, "/Volumes/Seagate/mmp_data/meta_data/synthetic_meta_data_cleaned.csv")
phantom_meta.index.name = 'img_name'
Load the texture data generated from the real and synthetic blobs
In [17]:
hologic_texture = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-01/hologic_texture.csv")
phantom_texture = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-01/synthetics1_texture.csv")
hologic_texture.drop(['x', 'y', 'breast_area'], inplace=True, axis=1)
phantom_texture.drop(['x', 'y', 'breast_area'], inplace=True, axis=1)
Group blobs by radius and image name
In [18]:
hologic_texture_features = mia.analysis.group_by_scale_space(hologic_texture)
phantom_texture_features = mia.analysis.group_by_scale_space(phantom_texture)
Select random subset of the phantoms cases. This is important so that each synthetic case is only represented once.
In [19]:
syn_feature_meta = mia.analysis.remove_duplicate_index(phantom_meta)
phantom_texture_features['phantom_name'] = syn_feature_meta.phantom_name.tolist()
phantom_texture_features_subset = mia.analysis.create_random_subset(phantom_texture_features, 'phantom_name')
In [20]:
features = pd.concat([hologic_texture_features, phantom_texture_features_subset])
assert features.shape[0] == 366
In [21]:
hologic_labels = hologic_meta.drop_duplicates().BIRADS
phantom_labels = phantom_meta['VBD.1']
class_labels = pd.concat([hologic_labels, phantom_labels])
class_labels.index.name = "img_name"
labels = mia.analysis.remove_duplicate_index(class_labels)[0]
In [162]:
selected_features = features
mapping = mia.analysis.tSNE(selected_features, n_components=2, learning_rate=300, perplexity=30, verbose=1)
In [163]:
def plot_mapping(m):
hologic_map = m.loc[hologic_texture_features.index]
phantom_map = m.loc[phantom_texture_features_subset.index]
hol_labels = labels[hologic_map.index]
syn_labels = labels[phantom_map.index]
ax = mia.plotting.plot_scatter_2d(hologic_map, labels=hol_labels, s=10)
ax = mia.plotting.plot_scatter_2d(phantom_map, labels=syn_labels, ax=ax, marker='^', s=50)
plot_mapping(mapping)
In [164]:
mapping = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-01/both_texture_mapping.csv")
plot_mapping(mapping)
In [165]:
index = hologic_texture_features.columns[:]
phantom_texture_features_subset[index].describe() - hologic_texture_features[index].describe()
Out[165]:
In [172]:
f = features.copy()
f = mia.analysis.normalize_data_frame(f)
f.columns = features.columns
cols = [features.columns[i::4] for i in range(4)]
cols = [c for l in cols for c in l]
f = f[cols[10*3:4*10]]
f['class'] = np.zeros(f.shape[0])
f['class'].loc[phantom_texture_features_subset.index] = np.ones(phantom_texture_features_subset.shape[0])
pd.tools.plotting.parallel_coordinates(f, 'class')
Out[172]:
In [186]:
f = hologic_texture_features[hologic_texture_features.columns[:4]]
f['class'] = hologic_meta.drop_duplicates().BIRADS
f
mia.plotting.plot_scattermatrix(f, 'class')
In [191]:
left = hologic_texture_features[mapping['0'] < -4]
right = hologic_texture_features[mapping['0'] >= -4]
left.describe() - right.describe()
Out[191]: