In [97]:

    
%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import mia









    



Warning: Cannot change to a different GUI toolkit: qt. Using osx instead.

Loading and Preprocessing

Loading the hologic and synthetic datasets.



In [56]:

    
hologic = pd.DataFrame.from_csv("real_texture.csv")
hologic.drop(hologic.columns[:2], axis=1, inplace=True)
hologic.drop('breast_area', axis=1, inplace=True)

phantom = pd.DataFrame.from_csv("synthetic_texture.csv")
phantom.drop(phantom.columns[:2], axis=1, inplace=True)
phantom.drop('breast_area', axis=1, inplace=True)

Loading the meta data for the real and synthetic datasets.



In [57]:

    
hologic_meta = mia.analysis.create_hologic_meta_data(hologic, "meta_data/real_meta.csv")
phantom_meta = mia.analysis.create_synthetic_meta_data(phantom, 
                                                       "meta_data/synthetic_meta.csv")
phantom_meta.index.name = 'img_name'

Prepare the BI-RADS/VBD labels for both datasets.



In [58]:

    
hologic_labels = hologic_meta.drop_duplicates().BIRADS
phantom_labels = phantom_meta['VBD.1']

class_labels = pd.concat([hologic_labels, phantom_labels])
class_labels.index.name = "img_name"
labels = mia.analysis.remove_duplicate_index(class_labels)[0]

Creating Features

Create blob features from distribution of blobs



In [59]:

    
hologic_texture_features = mia.analysis.group_by_scale_space(hologic)
phantom_texture_features = mia.analysis.group_by_scale_space(phantom)

Take a random subset of the real mammograms. This is important so that each patient is not over represented.



In [60]:

    
hologic_texture_features['patient_id'] = hologic_meta.drop_duplicates()['patient_id']
hologic_texture_features_subset = mia.analysis.create_random_subset(hologic_texture_features, 
                                                                    'patient_id')

Take a random subset of the phantom mammograms. This is important so that each case is not over represented.



In [61]:

    
syn_feature_meta = mia.analysis.remove_duplicate_index(phantom_meta)
phantom_texture_features['phantom_name'] = syn_feature_meta.phantom_name.tolist()
phantom_texture_features_subset = mia.analysis.create_random_subset(phantom_texture_features, 
                                                                    'phantom_name')

Combine the features from both datasets.



In [62]:

    
features = pd.concat([hologic_texture_features_subset, phantom_texture_features_subset])
assert features.shape[0] == 96
features.head()









    Out[62]:






  
    
      
      contrast
      dissimilarity
      homogeneity
      energy
      contrast_1
      dissimilarity_1
      homogeneity_1
      energy_1
      contrast_2
      dissimilarity_2
      ...
      homogeneity_7
      energy_7
      contrast_8
      dissimilarity_8
      homogeneity_8
      energy_8
      contrast_9
      dissimilarity_9
      homogeneity_9
      energy_9
    
  
  
    
      p214-010-60001-ml.png
       217.490041
       11.182093
       0.089156
       0.068559
       255.275936
       11.383161
       0.091460
       0.052581
       223.895510
       10.859934
      ...
       0.094498
       0.014957
       171.293827
       10.346745
       0.094648
       0.014245
       157.454233
        9.781551
       0.105151
       0.019975
    
    
      p214-010-60005-cr.png
       153.433967
        9.793844
       0.097550
       0.069263
       258.457447
        9.835271
       0.120798
       0.066882
       148.101614
        9.619357
      ...
       0.133482
       0.019024
       206.110004
        7.723766
       0.155662
       0.028688
       132.380141
        9.052372
       0.108430
       0.015630
    
    
      p214-010-60008-cl.png
       278.832070
       12.986904
       0.077585
       0.069229
       277.870310
       12.970713
       0.079270
       0.052845
       277.135918
       12.925121
      ...
       0.077955
       0.019611
       262.540916
       12.908503
       0.077737
       0.018621
       257.790604
       12.788839
       0.078467
       0.019016
    
    
      p214-010-60012-cl.png
       228.203678
       11.830961
       0.083644
       0.068756
       236.048146
       11.893532
       0.084507
       0.050689
       205.001806
       11.381168
      ...
       0.108649
       0.025046
       188.258062
       10.873996
       0.089765
       0.014025
       157.454233
        9.781551
       0.105151
       0.019975
    
    
      p214-010-60013-ml.png
       233.480009
       12.147959
       0.081455
       0.067923
       243.073751
       12.150686
       0.083336
       0.051553
       221.442525
       11.778895
      ...
       0.082859
       0.014600
       230.437848
       12.089526
       0.081013
       0.015532
       203.846013
       11.353827
       0.086053
       0.013723
    
  

5 rows × 40 columns

Filter some features, such as the min, to remove noise.



In [63]:

    
selected_features = features.copy()

Compare Real and Synthetic Features

Compare the distributions of features detected from the real mammograms and the phantoms using the Kolmogorov-Smirnov two sample test.



In [64]:

    
ks_stats = [list(stats.ks_2samp(hologic_texture_features[col], 
                                phantom_texture_features[col]))
                                for col in hologic_texture_features_subset.columns]

ks_test = pd.DataFrame(ks_stats, columns=['KS', 'p-value'], 
                       index=hologic_texture_features_subset.columns)
ks_test.to_latex("tables/texture_features_ks.tex")
ks_test









    Out[64]:






  
    
      
      KS
      p-value
    
  
  
    
      contrast
       0.381944
       5.383186e-09
    
    
      dissimilarity
       1.000000
       3.587622e-59
    
    
      homogeneity
       1.000000
       3.587622e-59
    
    
      energy
       1.000000
       3.587622e-59
    
    
      contrast_1
       0.586111
       1.318746e-20
    
    
      dissimilarity_1
       1.000000
       3.587622e-59
    
    
      homogeneity_1
       1.000000
       3.587622e-59
    
    
      energy_1
       1.000000
       3.587622e-59
    
    
      contrast_2
       0.863889
       2.874007e-44
    
    
      dissimilarity_2
       1.000000
       3.587622e-59
    
    
      homogeneity_2
       1.000000
       3.587622e-59
    
    
      energy_2
       1.000000
       3.587622e-59
    
    
      contrast_3
       0.923611
       1.538596e-50
    
    
      dissimilarity_3
       1.000000
       3.587622e-59
    
    
      homogeneity_3
       1.000000
       3.587622e-59
    
    
      energy_3
       1.000000
       3.587622e-59
    
    
      contrast_4
       0.845833
       1.870608e-42
    
    
      dissimilarity_4
       1.000000
       3.587622e-59
    
    
      homogeneity_4
       1.000000
       3.587622e-59
    
    
      energy_4
       1.000000
       3.587622e-59
    
    
      contrast_5
       0.979167
       9.485680e-57
    
    
      dissimilarity_5
       1.000000
       3.587622e-59
    
    
      homogeneity_5
       1.000000
       3.587622e-59
    
    
      energy_5
       1.000000
       3.587622e-59
    
    
      contrast_6
       1.000000
       3.587622e-59
    
    
      dissimilarity_6
       1.000000
       3.587622e-59
    
    
      homogeneity_6
       1.000000
       3.587622e-59
    
    
      energy_6
       0.994444
       1.605940e-58
    
    
      contrast_7
       0.969444
       1.230285e-55
    
    
      dissimilarity_7
       1.000000
       3.587622e-59
    
    
      homogeneity_7
       1.000000
       3.587622e-59
    
    
      energy_7
       1.000000
       3.587622e-59
    
    
      contrast_8
       0.986111
       1.497318e-57
    
    
      dissimilarity_8
       1.000000
       3.587622e-59
    
    
      homogeneity_8
       1.000000
       3.587622e-59
    
    
      energy_8
       0.997222
       7.598385e-59
    
    
      contrast_9
       1.000000
       3.587622e-59
    
    
      dissimilarity_9
       1.000000
       3.587622e-59
    
    
      homogeneity_9
       1.000000
       3.587622e-59
    
    
      energy_9
       1.000000
       3.587622e-59

Dimensionality Reduction

t-SNE

Running t-SNE to obtain a two dimensional representation.



In [65]:

    
real_index = hologic_texture_features_subset.index
phantom_index = phantom_texture_features_subset.index



In [66]:

    
kwargs = {
    'learning_rate': 200,
    'perplexity': 20,
    'verbose': 1
}



In [67]:

    
SNE_mapping_2d = mia.analysis.tSNE(selected_features, n_components=2, **kwargs)









    



[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 96 / 96
[t-SNE] Mean sigma: 1.192292
[t-SNE] Error after 83 iterations with early exaggeration: 11.373997
[t-SNE] Error after 141 iterations: 0.460456



In [68]:

    
mia.plotting.plot_mapping_2d(SNE_mapping_2d, real_index, phantom_index, labels)
plt.savefig('figures/mappings/texture_SNE_mapping_2d.png', dpi=300)

Running t-SNE to obtain a 3 dimensional mapping



In [69]:

    
SNE_mapping_3d = mia.analysis.tSNE(selected_features, n_components=3, **kwargs)









    



[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 96 / 96
[t-SNE] Mean sigma: 1.192292
[t-SNE] Error after 100 iterations with early exaggeration: 16.345359
[t-SNE] Error after 301 iterations: 2.602024



In [98]:

    
mia.plotting.plot_mapping_3d(SNE_mapping_3d, real_index, phantom_index, labels)









    Out[98]:





<matplotlib.axes._subplots.Axes3DSubplot at 0x10d6cc350>

Isomap

Running Isomap to obtain a 2 dimensional mapping



In [71]:

    
iso_kwargs = {
    'n_neighbors': 4,
}



In [72]:

    
iso_mapping_2d = mia.analysis.isomap(selected_features, n_components=2, **iso_kwargs)



In [73]:

    
mia.plotting.plot_mapping_2d(iso_mapping_2d, real_index, phantom_index, labels)
plt.savefig('figures/mappings/texture_iso_mapping_2d.png', dpi=300)



In [74]:

    
iso_mapping_3d = mia.analysis.isomap(selected_features, n_components=3, **iso_kwargs)



In [101]:

    
mia.plotting.plot_mapping_3d(iso_mapping_3d, real_index, phantom_index, labels)









    Out[101]:





<matplotlib.axes._subplots.Axes3DSubplot at 0x10ee15750>

Locally Linear Embedding

Running locally linear embedding to obtain 2d mapping



In [76]:

    
lle_kwargs = {
    'n_neighbors': 5,
}



In [77]:

    
lle_mapping_2d = mia.analysis.lle(selected_features, n_components=2, **lle_kwargs)



In [78]:

    
mia.plotting.plot_mapping_2d(lle_mapping_2d, real_index, phantom_index, labels)
plt.savefig('figures/mappings/texture_lle_mapping_2d.png', dpi=300)



In [79]:

    
lle_mapping_3d = mia.analysis.lle(selected_features, n_components=3, **lle_kwargs)



In [100]:

    
mia.plotting.plot_mapping_3d(lle_mapping_3d, real_index, phantom_index, labels)









    Out[100]:





<matplotlib.axes._subplots.Axes3DSubplot at 0x10c0ef1d0>

Quality Assessment of Dimensionality Reduction

Assess the quality of the DR against measurements from the co-ranking matrices. First create co-ranking matrices for each of the dimensionality reduction mappings



In [81]:

    
max_k = 10



In [82]:

    
SNE_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features, 
                                                   SNE_mapping_2d)
iso_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features, 
                                                   iso_mapping_2d)
lle_mapping_2d_cm = mia.coranking.coranking_matrix(selected_features, 
                                                   lle_mapping_2d)

SNE_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features, 
                                                   SNE_mapping_3d)
iso_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features, 
                                                   iso_mapping_3d)
lle_mapping_3d_cm = mia.coranking.coranking_matrix(selected_features, 
                                                   lle_mapping_3d)

2D Mappings



In [83]:

    
SNE_trustworthiness_2d = [mia.coranking.trustworthiness(SNE_mapping_2d_cm, k) 
                          for k in range(1, max_k)]
iso_trustworthiness_2d = [mia.coranking.trustworthiness(iso_mapping_2d_cm, k) 
                          for k in range(1, max_k)]
lle_trustworthiness_2d = [mia.coranking.trustworthiness(lle_mapping_2d_cm, k) 
                          for k in range(1, max_k)]



In [84]:

    
trustworthiness_df = pd.DataFrame([SNE_trustworthiness_2d,
                                   iso_trustworthiness_2d,
                                   lle_trustworthiness_2d], 
                                   index=['SNE', 'Isomap', 'LLE']).T
trustworthiness_df.plot()
plt.savefig('figures/quality_measures/texture_trustworthiness_2d.png', dpi=300)



In [85]:

    
SNE_continuity_2d = [mia.coranking.continuity(SNE_mapping_2d_cm, k) 
                     for k in range(1, max_k)]
iso_continuity_2d = [mia.coranking.continuity(iso_mapping_2d_cm, k) 
                     for k in range(1, max_k)]
lle_continuity_2d = [mia.coranking.continuity(lle_mapping_2d_cm, k) 
                     for k in range(1, max_k)]



In [86]:

    
continuity_df = pd.DataFrame([SNE_continuity_2d,
                              iso_continuity_2d,
                              lle_continuity_2d], 
                              index=['SNE', 'Isomap', 'LLE']).T
continuity_df.plot()
plt.savefig('figures/quality_measures/texture_continuity_2d.png', dpi=300)



In [87]:

    
SNE_lcmc_2d = [mia.coranking.LCMC(SNE_mapping_2d_cm, k) 
               for k in range(2, max_k)]
iso_lcmc_2d = [mia.coranking.LCMC(iso_mapping_2d_cm, k) 
               for k in range(2, max_k)]
lle_lcmc_2d = [mia.coranking.LCMC(lle_mapping_2d_cm, k) 
               for k in range(2, max_k)]



In [88]:

    
lcmc_df = pd.DataFrame([SNE_lcmc_2d,
                        iso_lcmc_2d,
                        lle_lcmc_2d], 
                        index=['SNE', 'Isomap', 'LLE']).T
lcmc_df.plot()
plt.savefig('figures/quality_measures/texture_lcmc_2d.png', dpi=300)

3D Mappings



In [89]:

    
SNE_trustworthiness_3d = [mia.coranking.trustworthiness(SNE_mapping_3d_cm, k) 
                          for k in range(1, max_k)]
iso_trustworthiness_3d = [mia.coranking.trustworthiness(iso_mapping_3d_cm, k) 
                          for k in range(1, max_k)]
lle_trustworthiness_3d = [mia.coranking.trustworthiness(lle_mapping_3d_cm, k) 
                          for k in range(1, max_k)]



In [90]:

    
trustworthiness3d_df = pd.DataFrame([SNE_trustworthiness_3d,
                                   iso_trustworthiness_3d,
                                   lle_trustworthiness_3d], 
                                   index=['SNE', 'Isomap', 'LLE']).T
trustworthiness3d_df.plot()
plt.savefig('figures/quality_measures/texture_trustworthiness_3d.png', dpi=300)



In [91]:

    
SNE_continuity_3d = [mia.coranking.continuity(SNE_mapping_3d_cm, k) 
                     for k in range(1, max_k)]
iso_continuity_3d = [mia.coranking.continuity(iso_mapping_3d_cm, k) 
                     for k in range(1, max_k)]
lle_continuity_3d = [mia.coranking.continuity(lle_mapping_3d_cm, k) 
                     for k in range(1, max_k)]



In [92]:

    
continuity3d_df = pd.DataFrame([SNE_continuity_3d,
                              iso_continuity_3d,
                              lle_continuity_3d], 
                              index=['SNE', 'Isomap', 'LLE']).T
continuity3d_df.plot()
plt.savefig('figures/quality_measures/texture_continuity_3d.png', dpi=300)



In [93]:

    
SNE_lcmc_3d = [mia.coranking.LCMC(SNE_mapping_3d_cm, k) 
               for k in range(2, max_k)]
iso_lcmc_3d = [mia.coranking.LCMC(iso_mapping_3d_cm, k) 
               for k in range(2, max_k)]
lle_lcmc_3d = [mia.coranking.LCMC(lle_mapping_3d_cm, k) 
               for k in range(2, max_k)]



In [94]:

    
lcmc3d_df = pd.DataFrame([SNE_lcmc_3d,
                        iso_lcmc_3d,
                        lle_lcmc_3d], 
                        index=['SNE', 'Isomap', 'LLE']).T
lcmc3d_df.plot()
plt.savefig('figures/quality_measures/texture_lcmc_3d.png', dpi=300)

	contrast	dissimilarity	homogeneity	energy	contrast_1	dissimilarity_1	homogeneity_1	energy_1	contrast_2	dissimilarity_2	...	homogeneity_7	energy_7	contrast_8	dissimilarity_8	homogeneity_8	energy_8	contrast_9	dissimilarity_9	homogeneity_9	energy_9
p214-010-60001-ml.png	217.490041	11.182093	0.089156	0.068559	255.275936	11.383161	0.091460	0.052581	223.895510	10.859934	...	0.094498	0.014957	171.293827	10.346745	0.094648	0.014245	157.454233	9.781551	0.105151	0.019975
p214-010-60005-cr.png	153.433967	9.793844	0.097550	0.069263	258.457447	9.835271	0.120798	0.066882	148.101614	9.619357	...	0.133482	0.019024	206.110004	7.723766	0.155662	0.028688	132.380141	9.052372	0.108430	0.015630
p214-010-60008-cl.png	278.832070	12.986904	0.077585	0.069229	277.870310	12.970713	0.079270	0.052845	277.135918	12.925121	...	0.077955	0.019611	262.540916	12.908503	0.077737	0.018621	257.790604	12.788839	0.078467	0.019016
p214-010-60012-cl.png	228.203678	11.830961	0.083644	0.068756	236.048146	11.893532	0.084507	0.050689	205.001806	11.381168	...	0.108649	0.025046	188.258062	10.873996	0.089765	0.014025	157.454233	9.781551	0.105151	0.019975
p214-010-60013-ml.png	233.480009	12.147959	0.081455	0.067923	243.073751	12.150686	0.083336	0.051553	221.442525	11.778895	...	0.082859	0.014600	230.437848	12.089526	0.081013	0.015532	203.846013	11.353827	0.086053	0.013723

	KS	p-value
contrast	0.381944	5.383186e-09
dissimilarity	1.000000	3.587622e-59
homogeneity	1.000000	3.587622e-59
energy	1.000000	3.587622e-59
contrast_1	0.586111	1.318746e-20
dissimilarity_1	1.000000	3.587622e-59
homogeneity_1	1.000000	3.587622e-59
energy_1	1.000000	3.587622e-59
contrast_2	0.863889	2.874007e-44
dissimilarity_2	1.000000	3.587622e-59
homogeneity_2	1.000000	3.587622e-59
energy_2	1.000000	3.587622e-59
contrast_3	0.923611	1.538596e-50
dissimilarity_3	1.000000	3.587622e-59
homogeneity_3	1.000000	3.587622e-59
energy_3	1.000000	3.587622e-59
contrast_4	0.845833	1.870608e-42
dissimilarity_4	1.000000	3.587622e-59
homogeneity_4	1.000000	3.587622e-59
energy_4	1.000000	3.587622e-59
contrast_5	0.979167	9.485680e-57
dissimilarity_5	1.000000	3.587622e-59
homogeneity_5	1.000000	3.587622e-59
energy_5	1.000000	3.587622e-59
contrast_6	1.000000	3.587622e-59
dissimilarity_6	1.000000	3.587622e-59
homogeneity_6	1.000000	3.587622e-59
energy_6	0.994444	1.605940e-58
contrast_7	0.969444	1.230285e-55
dissimilarity_7	1.000000	3.587622e-59
homogeneity_7	1.000000	3.587622e-59
energy_7	1.000000	3.587622e-59
contrast_8	0.986111	1.497318e-57
dissimilarity_8	1.000000	3.587622e-59
homogeneity_8	1.000000	3.587622e-59
energy_8	0.997222	7.598385e-59
contrast_9	1.000000	3.587622e-59
dissimilarity_9	1.000000	3.587622e-59
homogeneity_9	1.000000	3.587622e-59
energy_9	1.000000	3.587622e-59