In [107]:
%matplotlib inline
import pandas as pd
import numpy as np
import mia
import re
In [46]:
hologic = pd.DataFrame.from_csv('../2015-03-28-real-intensity.csv')
hologic.head()
Out[46]:
In [3]:
hologic_meta = mia.analysis.create_hologic_meta_data(hologic_cluster, '../data/BIRADS.csv')
hologic_meta.head()
Out[3]:
In [94]:
selected_columns = filter(lambda x: x in ['kurtosis', 'skew', 'mean'], hologic.columns)
In [77]:
mapping = mia.analysis.tSNE(hologic, verbose=2, learning_rate=300)
In [79]:
mia.plotting.plot_scatter_2d(mapping, [0,1], hologic_meta.BIRADS)
Out[79]:
In [95]:
mia.plotting.plot_scatter_3d(hologic, selected_columns, hologic_meta.BIRADS)
In [44]:
mapping.to_csv('../2015-03-28-real-intensity-mapping.csv')
Scatter matrix of the properties
In [73]:
h = hologic[selected_columns].copy()
h['BIRADS'] = hologic_meta.BIRADS
mia.plotting.plot_scattermatrix(h, 'BIRADS')
In [72]:
mapping['class'] = hologic_meta.BIRADS
mia.io_tools.dump_mapping_to_json(mapping, [0,1], '../mapping_viz/data.json')
In [104]:
class_1 = hologic[hologic_meta.BIRADS == 1]
class_4 = hologic[hologic_meta.BIRADS == 4]
# left = class_1[mapping[0] < 0 ]
# right = class_1[mapping[0] >= 0]
class_1.describe() - class_4.describe()
Out[104]:
In [93]:
hol_norm = mia.analysis.normalize_data_frame(hologic)
hol_norm.columns = hologic.columns
hol_norm = hol_norm[selected_columns]
hol_norm['BIRADS'] = hologic_meta.BIRADS
pd.tools.plotting.radviz(hol_norm, 'BIRADS')
Out[93]:
In [112]:
phantoms = pd.DataFrame.from_csv('../2015-03-28-phantom-intensity.csv')
phantoms.head()
Out[112]:
In [113]:
phantom_meta = mia.analysis.create_synthetic_meta_data(phantoms, '/Volumes/Seagate/2015-03-26/synthetic_meta_data_cleaned.csv')
#replace BIRADS inspecific BIRADS classes
phantom_meta.BIRADS.replace('3 or 4', 4, inplace=True)
phantom_meta.BIRADS.replace(re.compile(r'2 \([a-z]+\)'), 2, inplace=True)
phantom_meta.BIRADS = phantom_meta.BIRADS.astype(float)
phantom_meta.head()
Out[113]:
In [114]:
import random
group = phantoms.groupby(phantom_meta.phantom_name)
def select_random(x):
return x.ix[random.sample(x.index, 1)]
random_synthetic_features = group.apply(select_random)
random_synthetic_features.reset_index(drop=True, level=0, inplace=True)
random_synthetic_features
Out[114]:
In [115]:
features = pd.concat([hologic_cluster, random_synthetic_features])
class_labels = pd.concat([hologic_meta.BIRADS, phantom_meta.loc[random_synthetic_features.index].BIRADS])
class_labels.shape
Out[115]:
In [122]:
joint_mapping = mia.analysis.tSNE(features, verbose=2, learning_rate=300)
In [123]:
%matplotlib qt
joint_mapping['BIRADS'] = class_labels
hol_map = joint_mapping[:-6]
hol_map.shape
syn_map = joint_mapping[-6:]
syn_map.head()
ax = mia.plotting.plot_scatter_2d(hol_map, [0,1], 'BIRADS')
ax = mia.plotting.plot_scatter_2d(syn_map, [0,1], 'BIRADS', ax=ax, marker='^', s=50)
In [129]:
hologic.describe()
Out[129]: