Li Li, Qiang Zheng, Qiang Zou, Shivani Rajput, Anota Ijaduola, Zhiming Wu, Xiaoping Wang, Huibo Cao, Suhas Somnath, Stephen Jesse, Miaofang Chi, Zheng Gai, David Parker, and Athena Sefat
Notebook written by: Suhas Somnath
2/9/2016
In [ ]:
# Ensure python 3 compatibility:
from __future__ import division, print_function, absolute_import
from os import path
import numpy as np
import h5py
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.signal import medfilt
from scipy.ndimage.filters import gaussian_filter
from sklearn.utils.extmath import randomized_svd
from sklearn.cluster import KMeans
import pycroscopy as px
from pycroscopy.io.translators.omicron_asc import AscTranslator
# set up notebook to show plots within the notebook
% matplotlib inline
In [ ]:
#%% Load file
file_path = r"E:\Shivani\STS_Au-Ba122 crystal\I(V) TraceUp Tue Sep 20 09_17_08 2016 [14-1] STM_Spectroscopy STM.asc"
folder_path, file_name = path.split(file_path)
file_name = file_name[:-4] + '_'
In [ ]:
tran = AscTranslator()
h5_path = tran.translate(file_path)
hdf = px.ioHDF5(h5_path)
h5_main = px.hdf_utils.getDataSet(hdf.file, 'Raw_Data')[-1]
In [ ]:
x_label = 'Bias (V)'
y_label = 'Current (a.u.)'
In [ ]:
def print_hdf_tree(h5_obj):
print(h5_obj.name)
if isinstance(h5_obj, h5py.Group):
for child in h5_obj:
print_hdf_tree(h5_obj[child])
In [ ]:
print('Datasets and datagroups within the file:\n------------------------------------')
print_hdf_tree(h5_main.file)
print('\nThe main dataset:\n------------------------------------')
print(h5_main)
print('\nThe ancillary datasets:\n------------------------------------')
print(hdf.file['/Measurement_000/Channel_000/Position_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Position_Values'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Values'])
print('\nMetadata or attributes in a datagroup\n------------------------------------')
for key in hdf.file['/Measurement_000'].attrs:
print('{} : {}'.format(key, hdf.file['/Measurement_000'].attrs[key]))
In [ ]:
# Read some basic parameters
h5_meas_grp = hdf.file['/Measurement_000']
num_rows = int(h5_meas_grp.attrs['y-pixels'])
num_cols = int(h5_meas_grp.attrs['x-pixels'])
num_pos = num_rows * num_cols
spectra_length = int(h5_meas_grp.attrs['z-points'])
volt_vec = px.hdf_utils.getAuxData(h5_main, auxDataName='Spectroscopic_Values')[-1][0]
In [ ]:
raw_data_3d = np.reshape(h5_main[()], (num_rows, num_cols, spectra_length))
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
px.plot_utils.plot_map(axes[0], raw_data_3d[:, :, 0], cmap=px.plot_utils.cmap_jet_white_center())
axes[0].set_aspect(1)
axes[0].invert_yaxis()
axes[0].set_title('slice of data - use this for cropping the data')
axes[1].plot(volt_vec, raw_data_3d[10, 10, :])
axes[1].set_xlabel('Bias (V)')
axes[1].set_ylabel('Current (nA)')
axes[1].set_title('Response at a single pixel')
fig.tight_layout()
In [ ]:
#%% User supplied information for cropping data:
start_row = 30 # 0 is the first row
end_row = num_rows # num_rows is the last row
start_col = 0
end_col = num_cols # num_cols is the last column
volt_chop = 0.3
In [ ]:
# now crop the data according the specifications above:
volt_index = np.where(np.logical_and(volt_vec >= -volt_chop, volt_vec <= volt_chop))[0]
volt_vec = volt_vec[volt_index]
raw_data_3d = raw_data_3d[start_row:end_row, start_col:end_col, volt_index]
num_rows, num_cols, spectra_length = raw_data_3d.shape
raw_data_2d = raw_data_3d.reshape(-1, spectra_length)
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
px.plot_utils.plot_map(axes[0], raw_data_3d[:, :, 0], cmap=px.plot_utils.cmap_jet_white_center())
axes[0].set_aspect(1)
axes[0].invert_yaxis()
axes[0].set_title('Cropped data')
axes[1].plot(volt_vec, raw_data_3d[10, 10, :])
axes[1].set_xlabel('Bias (V)')
axes[1].set_ylabel('Current (nA)')
axes[1].set_title('Cropped response at a single pixel')
fig.tight_layout()
In [ ]:
#%% Do k-means to identify the principal response types and where (spatially) the occur in the data
num_clusters = 16 # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(raw_data_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (raw_data_3d.shape[0], raw_data_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_V_' + str(num_clusters) + '_clusters.png'),
format='png', dpi=300)
In [ ]:
# throw out bad points using a threshold. Works better than finding outliers using K-means
current_threshold = 0.2
bad_pixels = np.where(np.max(np.abs(raw_data_2d), axis=1) > current_threshold)[0]
# find the pixels that are good:
good_pixels = [item for item in np.arange(raw_data_2d.shape[0]) if item not in bad_pixels]
# find the average of all the good pixels
good_resp = np.mean(raw_data_2d[good_pixels], axis=0)
fig, axis = plt.subplots(ncols=2, figsize=(10, 5))
axis[0].set_title('Bad pixels')
px.plot_utils.plot_line_family(axis[0], volt_vec, raw_data_2d[bad_pixels], label_prefix='Cluster')
axis[1].set_title('Mean of all other pixels')
axis[1].plot(volt_vec, good_resp)
fig.savefig(path.join(folder_path, file_name + '_outliers_removal.png'), format='png', dpi=300)
# now replace bad pixels by the mean of the good pixels
outlier_free_2d = np.copy(raw_data_2d)
outlier_free_2d[bad_pixels] = good_resp
outlier_free_3d = np.reshape(outlier_free_2d, raw_data_3d.shape)
In [ ]:
estimators = KMeans(num_clusters)
results = estimators.fit(outlier_free_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (outlier_free_3d.shape[0], outlier_free_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'After_removing_outliers_K_means_V_' + str(num_clusters) + '_clusters.png'),
format='png', dpi=300)
In [ ]:
num_comps = 256
U, S, V = randomized_svd(outlier_free_2d, num_comps, n_iter=3)
fig_V, axes_V = px.plot_utils.plot_loops(volt_vec, V, title='SVD Eigenvectors',
evenly_spaced=False, plots_on_side=4, use_rainbow_plots=False,
x_label=x_label, y_label=y_label, subtitles='Eigenvector')
fig_V.savefig(path.join(folder_path, file_name + 'SVD_Eigenvectors.png'), format='png', dpi=200)
loadings = np.reshape(U, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))
fig_U, axes_U = px.plot_utils.plot_map_stack(loadings, num_comps=16, cmap=px.plot_utils.cmap_jet_white_center())
fig_U.savefig(path.join(folder_path, file_name + 'SVD_Eigenvalues.png'), format='png', dpi=150)
fig_S, axes_S = px.plot_utils.plotScree(S)
fig_S.savefig(path.join(folder_path, file_name + 'SVD_S.png'), format='png', dpi=300)
In [ ]:
#%% SVD filtering - Reconstructing the original data with a subset of the SVD components to remove noise present in the senior components
# Note that this method of filtering does NOT downsample the data
filter_comps = 32
filtered_2d = np.dot(np.dot(U[:, :filter_comps], np.eye(filter_comps)*S[:filter_comps]), V[:filter_comps, :])
filtered_3d = np.reshape(filtered_2d, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))
# Compare the raw and filtered data
fig_filt, axes = px.plot_utils.plot_loops(volt_vec, [outlier_free_2d, filtered_2d], line_colors=['k', 'r'],
dataset_names=['Raw','SVD Filtered']
y_label='Current (a.u.)', title='SVD filtering')
fig_filt.savefig(path.join(folder_path, file_name + '_SVD_Filtered_data_I_' + str(filter_comps) + '_comps.png'),
format='png', dpi=300)
In [ ]:
# Try a Gaussian filter on the stack:
gaus_sigma = 0.35
gaus_cycles = 1
source_dset = filtered_3d.copy()
for cyc_ind in range(gaus_cycles):
sink_dset = np.zeros(raw_data_3d.shape)
for volt_ind in range(spectra_length):
volt_slice = source_dset[:, :, volt_ind]
sink_dset[:, :, volt_ind] = gaussian_filter(volt_slice, gaus_sigma)
source_dset = sink_dset.copy()
gaus_filt_3d = sink_dset
# See some example loops:
gaus_filt_2d = np.reshape(gaus_filt_3d, raw_data_2d.shape)
fig, axes = px.viz.plot_utils.plot_loops(volt_vec, [filtered_2d, gaus_filt_2d], line_colors=['k', 'r'],
dataset_names=['Raw', 'Filtered'], x_label=x_label, y_label=y_label,
title='Gaussian Filter')
fig.savefig(path.join(folder_path, file_name + 'Gaussian_Filter_sigma_' + str(gaus_sigma) + '_cycles_' +
str(gaus_cycles) + '.png'), format='png', dpi=300)
In [ ]:
#%% Offset the data in the current axis by setting the current at 0V to 0
mid_pt = int(volt_vec.size * 0.5)
offsets = np.mean(gaus_filt_2d[:, mid_pt-1:mid_pt+1], axis=1)
offseted_data_2d = gaus_filt_2d - np.repeat(np.atleast_2d(offsets).transpose(), spectra_length, axis=1)
offseted_data_3d = np.reshape(offseted_data_2d, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))
#%% Plotting an example IV to check correction
row_ind = 15
col_ind = 12
fig, ax = plt.subplots()
ax.plot(volt_vec, filtered_3d[row_ind, col_ind], 'bo-', label='Raw')
ax.plot(volt_vec, offseted_data_3d[row_ind, col_ind], 'r*-', label='Offsetted')
ax.plot([volt_vec[0], volt_vec[-1]], [0, 0], 'k')
ax.plot([0, 0], [np.min(filtered_3d[row_ind, col_ind]), np.max(filtered_3d[row_ind, col_ind])], 'k')
# ax.set_xlim([volt_vec[mid_pt-5], volt_vec[mid_pt + 5]])
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
extents = 5
ax.set_xlim([volt_vec[mid_pt - extents], volt_vec[mid_pt + extents]])
chosen_sections_1 = filtered_3d[row_ind, col_ind, mid_pt - extents:mid_pt + extents]
chosen_sections_2 = offseted_data_3d[row_ind, col_ind, mid_pt - extents:mid_pt + extents]
ax.set_ylim([np.min(np.hstack([chosen_sections_1, chosen_sections_2])),
np.max(np.hstack([chosen_sections_1, chosen_sections_2]))])
ax.set_title('Ofsetting the current')
ax.legend(loc='best')
fig.savefig(path.join(folder_path, file_name + '_offsetting_current.png'), format='png', dpi=300)
In [ ]:
#%% Calculating dI/dV now:
dIdV_3d = np.diff(offseted_data_3d, axis=2)
# Adding 0 to spectral axis since we lose one index due to differential
dIdV_3d = np.dstack((dIdV_3d, np.zeros(shape=(offseted_data_3d.shape[0], offseted_data_3d.shape[1]))))
dIdV_2d = np.reshape(dIdV_3d, offseted_data_2d.shape)
In [ ]:
# Apply some median filters:
med_filt_size = 3
dIdV_filt_3d = medfilt(dIdV_3d, [1, 1, med_filt_size])
dIdV_filt_2d = np.reshape(dIdV_filt_3d, raw_data_2d.shape)
fig, axes = px.viz.plot_utils.plot_loops(volt_vec, [dIdV_2d, dIdV_filt_2d], line_colors=['k', 'r'],
dataset_names=['Raw', 'Filtered'], x_label=x_label, y_label=y_label,
title='Median Filter')
fig.savefig(path.join(folder_path, file_name + 'Median_Filter_size_' + str(med_filt_size) + '.png'), format='png', dpi=300)
In [ ]:
num_clusters = 8 # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(dIdV_filt_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (dIdV_filt_3d.shape[0], dIdV_filt_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_filtered_dIdV_' +
str(num_clusters) + '_clusters.png'), format='png', dpi=300)
In [ ]:
# Downsample and average
box_size = 3
down_samp_didv_3d = np.zeros((dIdV_filt_3d.shape[0]-box_size + 1,
dIdV_filt_3d.shape[1] - box_size + 1,
dIdV_filt_3d.shape[2]))
down_samp_iv_3d = np.zeros(down_samp_didv_3d.shape)
for row_ind in range(down_samp_didv_3d.shape[0]):
row_start = row_ind
row_end = row_ind + box_size
for col_ind in range(down_samp_didv_3d.shape[1]):
col_start = col_ind
col_end = col_ind + box_size
box_didv_data = np.reshape(dIdV_filt_3d[row_start: row_end, col_start: col_end], (-1, spectra_length))
mean_di_dv = np.mean(box_didv_data, axis=0)
box_iv_data = np.reshape(offseted_data_3d[row_start: row_end, col_start: col_end], (-1, spectra_length))
mean_iv = np.mean(box_iv_data, axis=0)
offset_iv = mean_iv - np.mean(mean_iv[mid_pt - 1:mid_pt + 1])
#shifted_di_dv = mean_di_dv - np.min(mean_di_dv[int(0.5*spectra_length) - 2: int(0.5*spectra_length) + 2])
ldos = volt_vec * mean_di_dv / offset_iv
down_samp_iv_3d[row_ind, col_ind] = offset_iv
down_samp_didv_3d[row_ind, col_ind] = mean_di_dv
down_samp_didv_2d = np.reshape(down_samp_didv_3d, (-1, spectra_length))
down_samp_iv_2d = np.reshape(down_samp_didv_3d, (-1, spectra_length))
In [ ]:
num_clusters = 8 # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(down_samp_didv_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (down_samp_didv_3d.shape[0], down_samp_didv_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_downsampled_dIdV_' + str(box_size) + '_box_' +
str(num_clusters) + '_clusters.png'), format='png', dpi=300)
In [ ]:
y = centroids[-1]
x = volt_vec
from scipy.interpolate import interp1d
f2 = interp1d(x, y, kind='cubic')
xnew = np.linspace(volt_vec[0], volt_vec[-1], num=250, endpoint=True)
plt.plot(x, y, 'k', xnew, f2(xnew), 'r')
In [ ]: