In [41]:
%matplotlib qt
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import mia


Warning: Cannot change to a different GUI toolkit: qt. Using osx instead.

Loading and Preprocessing

Loading the hologic and synthetic datasets.


In [3]:
hologic = pd.DataFrame.from_csv("real_texture.csv")
hologic.drop(hologic.columns[:2], axis=1, inplace=True)
hologic.drop('breast_area', axis=1, inplace=True)

phantom = pd.DataFrame.from_csv("synthetic_texture.csv")
phantom.drop(phantom.columns[:2], axis=1, inplace=True)
phantom.drop('breast_area', axis=1, inplace=True)

Loading the meta data for the real and synthetic datasets.


In [4]:
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, "meta_data/real_meta.csv")
phantom_meta = mia.analysis.create_synthetic_meta_data(phantom, "meta_data/synthetic_meta.csv")
phantom_meta.index.name = 'img_name'

Prepare the BI-RADS/VBD labels for both datasets.


In [5]:
hologic_labels = hologic_meta.drop_duplicates().BIRADS
phantom_labels = phantom_meta['VBD.1']

class_labels = pd.concat([hologic_labels, phantom_labels])
class_labels.index.name = "img_name"
labels = mia.analysis.remove_duplicate_index(class_labels)[0]

Creating Features

Create blob features from distribution of blobs


In [6]:
hologic_texture_features = mia.analysis.group_by_scale_space(hologic)
phantom_texture_features = mia.analysis.group_by_scale_space(phantom)

Take a random subset of the phantom mammograms. This is important so that each case is not over represented.


In [7]:
syn_feature_meta = mia.analysis.remove_duplicate_index(phantom_meta)
phantom_texture_features['phantom_name'] = syn_feature_meta.phantom_name.tolist()
phantom_texture_features_subset = mia.analysis.create_random_subset(phantom_texture_features, 'phantom_name')

# hologic_texture_features['patient_id'] = hologic_meta['patient_id'].drop_duplicates()
# hologic_texture_features_subset = mia.analysis.create_random_subset(hologic_texture_features, 'patient_id')

Combine the features from both datasets.


In [8]:
features = pd.concat([hologic_texture_features, phantom_texture_features_subset])
assert features.shape[0] == 366
features.head()


Out[8]:
contrast dissimilarity homogeneity energy contrast_1 dissimilarity_1 homogeneity_1 energy_1 contrast_2 dissimilarity_2 ... homogeneity_7 energy_7 contrast_8 dissimilarity_8 homogeneity_8 energy_8 contrast_9 dissimilarity_9 homogeneity_9 energy_9
p214-010-60001-cl.png 180.015284 10.623496 0.092774 0.068239 281.402566 10.773226 0.103179 0.056349 166.662740 10.250118 ... 0.099589 0.016556 163.937053 10.084599 0.099519 0.013505 157.454233 9.781551 0.105151 0.019975
p214-010-60001-cr.png 241.273806 10.449699 0.101719 0.073124 257.052029 10.725951 0.098164 0.051589 236.447806 10.048496 ... 0.104395 0.015297 166.288393 9.918993 0.107002 0.022819 157.454233 9.781551 0.105151 0.019975
p214-010-60001-ml.png 217.490041 11.182093 0.089156 0.068559 255.275936 11.383161 0.091460 0.052581 223.895510 10.859934 ... 0.094498 0.014957 171.293827 10.346745 0.094648 0.014245 157.454233 9.781551 0.105151 0.019975
p214-010-60001-mr.png 288.353799 11.644971 0.093407 0.073823 237.048490 11.298087 0.089295 0.050686 173.248493 10.351740 ... 0.099926 0.015496 166.981656 10.147627 0.096699 0.013841 157.454233 9.781551 0.105151 0.019975
p214-010-60005-cl.png 160.227140 10.034384 0.099470 0.069002 157.603883 9.991076 0.096144 0.051515 148.042716 9.647045 ... 0.106255 0.017021 166.288393 9.918993 0.107002 0.022819 127.449202 8.913421 0.110486 0.016995

5 rows × 40 columns

Filter some features, such as the min, to remove noise.


In [9]:
selected_features = features.copy()

Compare Real and Synthetic Features

Compare the distributions of features detected from the real mammograms and the phantoms using the Kolmogorov-Smirnov two sample test.


In [80]:
ks_stats = [list(stats.ks_2samp(hologic_texture_features[col], 
                                phantom_texture_features[col]))
                                for col in hologic_texture_features.columns]

ks_test = pd.DataFrame(ks_stats, columns=['KS', 'p-value'], index=hologic_texture_features.columns)
ks_test.to_latex("tables/texture_features_ks.tex")
ks_test


Out[80]:
KS p-value
contrast 0.381944 5.383186e-09
dissimilarity 1.000000 3.587622e-59
homogeneity 1.000000 3.587622e-59
energy 1.000000 3.587622e-59
contrast_1 0.586111 1.318746e-20
dissimilarity_1 1.000000 3.587622e-59
homogeneity_1 1.000000 3.587622e-59
energy_1 1.000000 3.587622e-59
contrast_2 0.863889 2.874007e-44
dissimilarity_2 1.000000 3.587622e-59
homogeneity_2 1.000000 3.587622e-59
energy_2 1.000000 3.587622e-59
contrast_3 0.923611 1.538596e-50
dissimilarity_3 1.000000 3.587622e-59
homogeneity_3 1.000000 3.587622e-59
energy_3 1.000000 3.587622e-59
contrast_4 0.845833 1.870608e-42
dissimilarity_4 1.000000 3.587622e-59
homogeneity_4 1.000000 3.587622e-59
energy_4 1.000000 3.587622e-59
contrast_5 0.979167 9.485680e-57
dissimilarity_5 1.000000 3.587622e-59
homogeneity_5 1.000000 3.587622e-59
energy_5 1.000000 3.587622e-59
contrast_6 1.000000 3.587622e-59
dissimilarity_6 1.000000 3.587622e-59
homogeneity_6 1.000000 3.587622e-59
energy_6 0.994444 1.605940e-58
contrast_7 0.969444 1.230285e-55
dissimilarity_7 1.000000 3.587622e-59
homogeneity_7 1.000000 3.587622e-59
energy_7 1.000000 3.587622e-59
contrast_8 0.986111 1.497318e-57
dissimilarity_8 1.000000 3.587622e-59
homogeneity_8 1.000000 3.587622e-59
energy_8 0.997222 7.598385e-59
contrast_9 1.000000 3.587622e-59
dissimilarity_9 1.000000 3.587622e-59
homogeneity_9 1.000000 3.587622e-59
energy_9 1.000000 3.587622e-59

Dimensionality Reduction

t-SNE

Running t-SNE to obtain a two dimensional representation.


In [11]:
kwargs = {
    'learning_rate': 300,
    'perplexity': 30,
    'verbose': 1
}

In [12]:
SNE_mapping_2d, error = mia.analysis.tSNE(selected_features, n_components=2, **kwargs)


[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 366 / 366
[t-SNE] Mean sigma: 0.834920
[t-SNE] Error after 83 iterations with early exaggeration: 12.847253
[t-SNE] Error after 296 iterations: 0.467811

In [13]:
mia.plotting.plot_mapping_2d(SNE_mapping_2d, hologic_texture_features.index, phantom_texture_features_subset.index, labels)
plt.savefig('figures/mappings/texture_SNE_mapping_2d.png', dpi=300)


Running t-SNE to obtain a 3 dimensional mapping


In [14]:
SNE_mapping_3d, error = mia.analysis.tSNE(selected_features, n_components=3, **kwargs)


[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 366 / 366
[t-SNE] Mean sigma: 0.834920
[t-SNE] Error after 100 iterations with early exaggeration: 14.777595
[t-SNE] Error after 271 iterations: 0.948866

In [42]:
mia.plotting.plot_mapping_3d(SNE_mapping_3d, hologic_texture_features.index, phantom_texture_features_subset.index, labels)


Out[42]:
<matplotlib.axes._subplots.Axes3DSubplot at 0x110926f90>

Isomap

Running Isomap to obtain a 2 dimensional mapping


In [16]:
iso_kwargs = {
    'n_neighbors': 10,
}

In [17]:
iso_mapping_2d, error = mia.analysis.isomap(selected_features, n_components=2, **iso_kwargs)

In [18]:
mia.plotting.plot_mapping_2d(iso_mapping_2d, hologic_texture_features.index, phantom_texture_features_subset.index, labels)
plt.savefig('figures/mappings/texture_iso_mapping_2d.png', dpi=300)



In [19]:
iso_mapping_3d, error = mia.analysis.isomap(selected_features, n_components=3, **iso_kwargs)

In [43]:
mia.plotting.plot_mapping_3d(iso_mapping_3d, hologic_texture_features.index, phantom_texture_features_subset.index, labels)


/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/mpl_toolkits/mplot3d/axes3d.py:1094: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  if self.button_pressed in self._rotate_btn:
Out[43]:
<matplotlib.axes._subplots.Axes3DSubplot at 0x1110e5e10>

Locally Linear Embedding

Running locally linear embedding to obtain 2d mapping


In [21]:
lle_kwargs = {
    'n_neighbors': 10,
}

In [22]:
lle_mapping_2d, error = mia.analysis.lle(selected_features, n_components=2, **lle_kwargs)

In [23]:
mia.plotting.plot_mapping_2d(lle_mapping_2d, hologic_texture_features.index, phantom_texture_features_subset.index, labels)
plt.savefig('figures/mappings/texture_lle_mapping_2d.png', dpi=300)



In [24]:
lle_mapping_3d, error = mia.analysis.lle(selected_features, n_components=3, **lle_kwargs)

In [44]:
mia.plotting.plot_mapping_3d(lle_mapping_3d, hologic_texture_features.index, phantom_texture_features_subset.index, labels)


Out[44]:
<matplotlib.axes._subplots.Axes3DSubplot at 0x1105098d0>

Quality Assessment of Dimensionality Reduction

Assess the quality of the DR against measurements from the co-ranking matrices. First create co-ranking matrices for each of the dimensionality reduction mappings


In [26]:
max_k = 10

In [27]:
SNE_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features, SNE_mapping_2d)
iso_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features, iso_mapping_2d)
lle_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features, lle_mapping_2d)

SNE_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features, SNE_mapping_3d)
iso_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features, iso_mapping_3d)
lle_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features, lle_mapping_3d)

2D Mappings


In [28]:
SNE_trustworthiness_2d = [mia.coranking.trustworthiness(SNE_mapping_2d_cm, k) for k in range(1, max_k)]
iso_trustworthiness_2d = [mia.coranking.trustworthiness(iso_mapping_2d_cm, k) for k in range(1, max_k)]
lle_trustworthiness_2d = [mia.coranking.trustworthiness(lle_mapping_2d_cm, k) for k in range(1, max_k)]

In [29]:
trustworthiness_df = pd.DataFrame([SNE_trustworthiness_2d,
                                   iso_trustworthiness_2d,
                                   lle_trustworthiness_2d], 
                                   index=['SNE', 'Isomap', 'LLE']).T
trustworthiness_df.plot()
plt.savefig('figures/quality_measures/texture_trustworthiness_2d.png', dpi=300)



In [30]:
SNE_continuity_2d = [mia.coranking.continuity(SNE_mapping_2d_cm, k) for k in range(1, max_k)]
iso_continuity_2d = [mia.coranking.continuity(iso_mapping_2d_cm, k) for k in range(1, max_k)]
lle_continuity_2d = [mia.coranking.continuity(lle_mapping_2d_cm, k) for k in range(1, max_k)]

In [31]:
continuity_df = pd.DataFrame([SNE_continuity_2d,
                              iso_continuity_2d,
                              lle_continuity_2d], 
                              index=['SNE', 'Isomap', 'LLE']).T
continuity_df.plot()
plt.savefig('figures/quality_measures/texture_continuity_2d.png', dpi=300)



In [32]:
SNE_lcmc_2d = [mia.coranking.LCMC(SNE_mapping_2d_cm, k) for k in range(2, max_k)]
iso_lcmc_2d = [mia.coranking.LCMC(iso_mapping_2d_cm, k) for k in range(2, max_k)]
lle_lcmc_2d = [mia.coranking.LCMC(lle_mapping_2d_cm, k) for k in range(2, max_k)]

In [33]:
lcmc_df = pd.DataFrame([SNE_lcmc_2d,
                        iso_lcmc_2d,
                        lle_lcmc_2d], 
                        index=['SNE', 'Isomap', 'LLE']).T
lcmc_df.plot()
plt.savefig('figures/quality_measures/texture_lcmc_2d.png', dpi=300)


3D Mappings


In [34]:
SNE_trustworthiness_3d = [mia.coranking.trustworthiness(SNE_mapping_3d_cm, k) for k in range(1, max_k)]
iso_trustworthiness_3d = [mia.coranking.trustworthiness(iso_mapping_3d_cm, k) for k in range(1, max_k)]
lle_trustworthiness_3d = [mia.coranking.trustworthiness(lle_mapping_3d_cm, k) for k in range(1, max_k)]

In [35]:
trustworthiness3d_df = pd.DataFrame([SNE_trustworthiness_3d,
                                   iso_trustworthiness_3d,
                                   lle_trustworthiness_3d], 
                                   index=['SNE', 'Isomap', 'LLE']).T
trustworthiness3d_df.plot()
plt.savefig('figures/quality_measures/texture_trustworthiness_3d.png', dpi=300)



In [36]:
SNE_continuity_3d = [mia.coranking.continuity(SNE_mapping_3d_cm, k) for k in range(1, max_k)]
iso_continuity_3d = [mia.coranking.continuity(iso_mapping_3d_cm, k) for k in range(1, max_k)]
lle_continuity_3d = [mia.coranking.continuity(lle_mapping_3d_cm, k) for k in range(1, max_k)]

In [37]:
continuity3d_df = pd.DataFrame([SNE_continuity_3d,
                              iso_continuity_3d,
                              lle_continuity_3d], 
                              index=['SNE', 'Isomap', 'LLE']).T
continuity3d_df.plot()
plt.savefig('figures/quality_measures/texture_continuity_3d.png', dpi=300)



In [38]:
SNE_lcmc_3d = [mia.coranking.LCMC(SNE_mapping_3d_cm, k) for k in range(2, max_k)]
iso_lcmc_3d = [mia.coranking.LCMC(iso_mapping_3d_cm, k) for k in range(2, max_k)]
lle_lcmc_3d = [mia.coranking.LCMC(lle_mapping_3d_cm, k) for k in range(2, max_k)]

In [39]:
lcmc3d_df = pd.DataFrame([SNE_lcmc_3d,
                        iso_lcmc_3d,
                        lle_lcmc_3d], 
                        index=['SNE', 'Isomap', 'LLE']).T
lcmc3d_df.plot()
plt.savefig('figures/quality_measures/texture_lcmc_3d.png', dpi=300)