In [1]:
%matplotlib qt
import pandas as pd
import numpy as np
import mia
Loading the hologic and synthetic datasets.
In [2]:
hologic = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-01/hologic.csv")
phantom = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-14/synthetics1-blobs.csv")
Loading the meta data for the real and synthetic datasets.
In [3]:
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, "/Volumes/Seagate/mmp_data/meta_data/BIRADS.csv")
phantom_meta = mia.analysis.create_synthetic_meta_data(phantom, "/Volumes/Seagate/mmp_data/meta_data/synthetic_meta_data_cleaned.csv")
phantom_meta.index.name = 'img_name'
Load the texture data generated from the real and synthetic blobs
In [4]:
hologic_intensity = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-01/hologic_intensity.csv")
phantom_intensity = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-14/synthetics1_intensity.csv")
hologic_intensity.drop(['x', 'y', 'breast_area'], inplace=True, axis=1)
phantom_intensity.drop(['x', 'y', 'breast_area'], inplace=True, axis=1)
Group blobs by radius and image name
In [5]:
hologic_intensity_features = mia.analysis.group_by_scale_space(hologic_intensity)
phantom_intensity_features = mia.analysis.group_by_scale_space(phantom_intensity)
hologic_intensity_features.head()
Out[5]:
Select random subset of the phantoms cases. This is important so that each synthetic case is only represented once.
In [6]:
syn_feature_meta = mia.analysis.remove_duplicate_index(phantom_meta)
phantom_intensity_features['phantom_name'] = syn_feature_meta.phantom_name.tolist()
phantom_intensity_features_subset = mia.analysis.create_random_subset(phantom_intensity_features, 'phantom_name')
phantom_intensity_features_subset
Out[6]:
In [16]:
features = pd.concat([hologic_intensity_features, phantom_intensity_features_subset], axis=0)
phantom_intensity_features_subset
from IPython.display import display
pd.options.display.max_columns = None
display(phantom_intensity_features_subset)
# assert features.shape[0] == 366
In [8]:
hologic_labels = hologic_meta.drop_duplicates().BIRADS
phantom_labels = phantom_meta['VBD.1']
class_labels = pd.concat([hologic_labels, phantom_labels])
class_labels.index.name = "img_name"
labels = mia.analysis.remove_duplicate_index(class_labels)[0]
In [15]:
features.tail()
Out[15]:
In [9]:
selected_features = features[[c for c in features.columns if 'count' not in c]]
selected_features.drop([c for c in features.columns if 'skew'in c or 'kurtosis' in c], axis=1, inplace=True)
mapping = mia.analysis.tSNE(selected_features, n_components=2, learning_rate=300, perplexity=30, verbose=1)
In [14]:
def plot_mapping(m):
hologic_map = m.loc[hologic_intensity_features.index]
phantom_map = m.loc[phantom_intensity_features_subset.index]
hol_labels = labels[hologic_map.index]
syn_labels = labels[phantom_map.index]
ax = mia.plotting.plot_scatter_2d(hologic_map, labels=hol_labels, s=10)
ax = mia.plotting.plot_scatter_2d(phantom_map, labels=syn_labels, ax=ax, marker='^', s=50)
plot_mapping(mapping)
In [42]:
ordered_features = mia.analysis.sort_by_scale_space(selected_features, 9)
# ordered_features['class']
ordered_features.index.name = 'img_name'
ordered_features['class'] = labels
ordered_features.sort('class', inplace=True)
# ordered_features['class'].loc[mapping[1] < 0] = np.ones(mapping[mapping[1] < 0].shape[0])
# ordered_features['class'].loc[phantom_intensity_features_subset.index] = np.ones(phantom_intensity_features_subset.shape[0]) + 1
pd.tools.plotting.parallel_coordinates(ordered_features, 'class')
# left = ordered_features[mapping[0] < 0]
# right = ordered_features[mapping[0] >= 0]
Out[42]:
In [13]:
mapping = pd.DataFrame.from_csv("/Volumes/Seagate/mmp_data/2015-04-01/both_intensity_mapping.csv")
plot_mapping(mapping)
In [14]:
mapping.loc[phantom_intensity_features_subset.index]
Out[14]:
In [15]:
f = features.copy()
f = mia.analysis.normalize_data_frame(f)
f.columns = features.columns
cols = [features.columns[i::10] for i in range(10)]
cols = [c for l in cols for c in l]
f = f[cols]
f = f[f.columns[9*10:10*10]]
f['class'] = np.zeros(f.shape[0])
f['class'].loc[phantom_intensity_features_subset.index] = np.ones(phantom_intensity_features_subset.shape[0])
pd.tools.plotting.parallel_coordinates(f, 'class')
Out[15]:
In [16]:
phantom_intensity_features.describe() - hologic_intensity_features.describe()
Out[16]: