In [89]:
%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import mia
Loading the hologic and synthetic datasets.
In [42]:
hologic = pd.DataFrame.from_csv("real_texture_lines.csv")
# hologic.drop(hologic.columns[:2], axis=1, inplace=True)
hologic.drop('breast_area', axis=1, inplace=True)
phantom = pd.DataFrame.from_csv("synthetic_texture_lines.csv")
# phantom.drop(phantom.columns, axis=1, inplace=True)
phantom.drop('breast_area', axis=1, inplace=True)
Loading the meta data for the real and synthetic datasets.
In [43]:
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, "meta_data/real_meta.csv")
phantom_meta = mia.analysis.create_synthetic_meta_data(phantom,
"meta_data/synthetic_meta.csv")
phantom_meta.index.name = 'img_name'
Prepare the BI-RADS/VBD labels for both datasets.
In [44]:
hologic_labels = hologic_meta.drop_duplicates().BIRADS
phantom_labels = phantom_meta['VBD.1']
class_labels = pd.concat([hologic_labels, phantom_labels])
class_labels.index.name = "img_name"
labels = mia.analysis.remove_duplicate_index(class_labels)[0]
Create blob features from distribution of blobs
In [45]:
hologic_texture_features = hologic[hologic.columns[5:]]
hologic_texture_features = hologic_texture_features.groupby(hologic.index).agg(np.mean)
phantom_texture_features = phantom[phantom.columns[5:]]
phantom_texture_features = phantom_texture_features.groupby(phantom.index).agg(np.mean)
Take a random subset of the real mammograms. This is important so that each patient is not over represented.
In [46]:
hologic_texture_features['patient_id'] = hologic_meta.drop_duplicates()['patient_id']
hologic_texture_features_subset = mia.analysis.create_random_subset(hologic_texture_features,
'patient_id')
Take a random subset of the phantom mammograms. This is important so that each case is not over represented.
In [47]:
syn_feature_meta = mia.analysis.remove_duplicate_index(phantom_meta)
phantom_texture_features['phantom_name'] = syn_feature_meta.phantom_name.tolist()
phantom_texture_features_subset = mia.analysis.create_random_subset(phantom_texture_features,
'phantom_name')
Combine the features from both datasets.
In [48]:
features = pd.concat([hologic_texture_features_subset, phantom_texture_features_subset])
assert features.shape[0] == 96
features.head()
Out[48]:
Filter some features, such as the min, to remove noise.
In [49]:
selected_features = features.copy()
Compare the distributions of features detected from the real mammograms and the phantoms using the Kolmogorov-Smirnov two sample test.
In [50]:
ks_stats = [list(stats.ks_2samp(hologic[col],
phantom[col]))
for col in hologic.columns]
ks_test = pd.DataFrame(ks_stats, columns=['KS', 'p-value'], index=hologic.columns)
ks_test.to_latex("tables/texture_features_ks_lines.tex")
ks_test
Out[50]:
In [51]:
real_index = hologic_texture_features_subset.index
phantom_index = phantom_texture_features_subset.index
In [52]:
kwargs = {
'learning_rate': 200,
'perplexity': 20,
'verbose': 1
}
In [53]:
SNE_mapping_2d = mia.analysis.tSNE(selected_features, n_components=2, **kwargs)
In [54]:
mia.plotting.plot_mapping_2d(SNE_mapping_2d, real_index, phantom_index, labels)
plt.savefig('figures/mappings/lines_texture_SNE_mapping_2d.png', dpi=300)
Running t-SNE to obtain a 3 dimensional mapping
In [55]:
SNE_mapping_3d = mia.analysis.tSNE(selected_features, n_components=3, **kwargs)
In [85]:
mia.plotting.plot_mapping_3d(SNE_mapping_3d, real_index, phantom_index, labels)
Out[85]:
In [57]:
iso_kwargs = {
'n_neighbors': 4,
}
In [58]:
iso_mapping_2d = mia.analysis.isomap(selected_features, n_components=2, **iso_kwargs)
In [59]:
mia.plotting.plot_mapping_2d(iso_mapping_2d, real_index, phantom_index, labels)
plt.savefig('figures/mappings/lines_texture_iso_mapping_2d.png', dpi=300)
In [60]:
iso_mapping_3d = mia.analysis.isomap(selected_features, n_components=3, **iso_kwargs)
In [87]:
mia.plotting.plot_mapping_3d(iso_mapping_3d, real_index, phantom_index, labels)
Out[87]:
In [62]:
lle_kwargs = {
'n_neighbors': 4,
}
In [63]:
lle_mapping_2d = mia.analysis.lle(selected_features, n_components=2, **lle_kwargs)
In [64]:
mia.plotting.plot_mapping_2d(lle_mapping_2d, real_index, phantom_index, labels)
plt.savefig('figures/mappings/lines_texture_lle_mapping_2d.png', dpi=300)
In [65]:
lle_mapping_3d = mia.analysis.lle(selected_features, n_components=3, **lle_kwargs)
In [88]:
mia.plotting.plot_mapping_3d(lle_mapping_3d, real_index, phantom_index, labels)
Out[88]:
Assess the quality of the DR against measurements from the co-ranking matrices. First create co-ranking matrices for each of the dimensionality reduction mappings
In [67]:
max_k = 50
In [68]:
SNE_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features,
SNE_mapping_2d)
iso_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features,
iso_mapping_2d)
lle_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features,
lle_mapping_2d)
SNE_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features,
SNE_mapping_3d)
iso_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features,
iso_mapping_3d)
lle_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features,
lle_mapping_3d)
In [69]:
SNE_trustworthiness_2d = [mia.coranking.trustworthiness(SNE_mapping_2d_cm, k)
for k in range(1, max_k)]
iso_trustworthiness_2d = [mia.coranking.trustworthiness(iso_mapping_2d_cm, k)
for k in range(1, max_k)]
lle_trustworthiness_2d = [mia.coranking.trustworthiness(lle_mapping_2d_cm, k)
for k in range(1, max_k)]
In [70]:
trustworthiness_df = pd.DataFrame([SNE_trustworthiness_2d,
iso_trustworthiness_2d,
lle_trustworthiness_2d],
index=['SNE', 'Isomap', 'LLE']).T
trustworthiness_df.plot()
plt.savefig('figures/quality_measures/lines_texture_trustworthiness_2d.png', dpi=300)
In [71]:
SNE_continuity_2d = [mia.coranking.continuity(SNE_mapping_2d_cm, k)
for k in range(1, max_k)]
iso_continuity_2d = [mia.coranking.continuity(iso_mapping_2d_cm, k)
for k in range(1, max_k)]
lle_continuity_2d = [mia.coranking.continuity(lle_mapping_2d_cm, k)
for k in range(1, max_k)]
In [72]:
continuity_df = pd.DataFrame([SNE_continuity_2d,
iso_continuity_2d,
lle_continuity_2d],
index=['SNE', 'Isomap', 'LLE']).T
continuity_df.plot()
plt.savefig('figures/quality_measures/lines_texture_continuity_2d.png', dpi=300)
In [73]:
SNE_lcmc_2d = [mia.coranking.LCMC(SNE_mapping_2d_cm, k)
for k in range(2, max_k)]
iso_lcmc_2d = [mia.coranking.LCMC(iso_mapping_2d_cm, k)
for k in range(2, max_k)]
lle_lcmc_2d = [mia.coranking.LCMC(lle_mapping_2d_cm, k)
for k in range(2, max_k)]
In [74]:
lcmc_df = pd.DataFrame([SNE_lcmc_2d,
iso_lcmc_2d,
lle_lcmc_2d],
index=['SNE', 'Isomap', 'LLE']).T
lcmc_df.plot()
plt.savefig('figures/quality_measures/lines_texture_lcmc_2d.png', dpi=300)
In [75]:
SNE_trustworthiness_3d = [mia.coranking.trustworthiness(SNE_mapping_3d_cm, k)
for k in range(1, max_k)]
iso_trustworthiness_3d = [mia.coranking.trustworthiness(iso_mapping_3d_cm, k)
for k in range(1, max_k)]
lle_trustworthiness_3d = [mia.coranking.trustworthiness(lle_mapping_3d_cm, k)
for k in range(1, max_k)]
In [76]:
trustworthiness3d_df = pd.DataFrame([SNE_trustworthiness_3d,
iso_trustworthiness_3d,
lle_trustworthiness_3d],
index=['SNE', 'Isomap', 'LLE']).T
trustworthiness3d_df.plot()
plt.savefig('figures/quality_measures/lines_texture_trustworthiness_3d.png', dpi=300)
In [77]:
SNE_continuity_3d = [mia.coranking.continuity(SNE_mapping_3d_cm, k)
for k in range(1, max_k)]
iso_continuity_3d = [mia.coranking.continuity(iso_mapping_3d_cm, k)
for k in range(1, max_k)]
lle_continuity_3d = [mia.coranking.continuity(lle_mapping_3d_cm, k)
for k in range(1, max_k)]
In [78]:
continuity3d_df = pd.DataFrame([SNE_continuity_3d,
iso_continuity_3d,
lle_continuity_3d],
index=['SNE', 'Isomap', 'LLE']).T
continuity3d_df.plot()
plt.savefig('figures/quality_measures/lines_texture_continuity_3d.png', dpi=300)
In [79]:
SNE_lcmc_3d = [mia.coranking.LCMC(SNE_mapping_3d_cm, k)
for k in range(2, max_k)]
iso_lcmc_3d = [mia.coranking.LCMC(iso_mapping_3d_cm, k)
for k in range(2, max_k)]
lle_lcmc_3d = [mia.coranking.LCMC(lle_mapping_3d_cm, k)
for k in range(2, max_k)]
In [80]:
lcmc3d_df = pd.DataFrame([SNE_lcmc_3d,
iso_lcmc_3d,
lle_lcmc_3d],
index=['SNE', 'Isomap', 'LLE']).T
lcmc3d_df.plot()
plt.savefig('figures/quality_measures/lines_texture_lcmc_3d.png', dpi=300)