Improving superconductivity in BaFe2As2-based crystals by cobalt clustering and electronic uniformity

Li Li, Qiang Zheng, Qiang Zou, Shivani Rajput, Anota Ijaduola, Zhiming Wu, Xiaoping Wang, Huibo Cao, Suhas Somnath, Stephen Jesse, Miaofang Chi, Zheng Gai, David Parker, and Athena Sefat

Scientific Reports

Notebook written by: Suhas Somnath

2/9/2016


In [ ]:
# Ensure python 3 compatibility:
from __future__ import division, print_function, absolute_import

from os import path
import numpy as np
import h5py

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1 import make_axes_locatable

from scipy.signal import medfilt
from scipy.ndimage.filters import gaussian_filter
from sklearn.utils.extmath import randomized_svd
from sklearn.cluster import KMeans

import pycroscopy as px
from pycroscopy.io.translators.omicron_asc import AscTranslator

# set up notebook to show plots within the notebook
% matplotlib inline

In [ ]:
#%% Load file
file_path = r"E:\Shivani\STS_Au-Ba122 crystal\I(V) TraceUp Tue Sep 20 09_17_08 2016 [14-1]  STM_Spectroscopy STM.asc"
folder_path, file_name = path.split(file_path)
file_name = file_name[:-4] + '_'

In [ ]:
tran = AscTranslator()
h5_path = tran.translate(file_path)

hdf = px.ioHDF5(h5_path)
h5_main = px.hdf_utils.getDataSet(hdf.file, 'Raw_Data')[-1]

In [ ]:
x_label = 'Bias (V)'
y_label = 'Current (a.u.)'

In [ ]:
def print_hdf_tree(h5_obj):
    print(h5_obj.name)
    if isinstance(h5_obj, h5py.Group):
        for child in h5_obj:
            print_hdf_tree(h5_obj[child])

In [ ]:
print('Datasets and datagroups within the file:\n------------------------------------')
print_hdf_tree(h5_main.file)
 
print('\nThe main dataset:\n------------------------------------')
print(h5_main)
print('\nThe ancillary datasets:\n------------------------------------')
print(hdf.file['/Measurement_000/Channel_000/Position_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Position_Values'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Values'])

print('\nMetadata or attributes in a datagroup\n------------------------------------')
for key in hdf.file['/Measurement_000'].attrs:
    print('{} : {}'.format(key, hdf.file['/Measurement_000'].attrs[key]))

In [ ]:
# Read some basic parameters
h5_meas_grp = hdf.file['/Measurement_000']
num_rows = int(h5_meas_grp.attrs['y-pixels'])
num_cols = int(h5_meas_grp.attrs['x-pixels'])
num_pos = num_rows * num_cols
spectra_length = int(h5_meas_grp.attrs['z-points'])
volt_vec = px.hdf_utils.getAuxData(h5_main, auxDataName='Spectroscopic_Values')[-1][0]

In [ ]:
raw_data_3d = np.reshape(h5_main[()], (num_rows, num_cols, spectra_length))
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
px.plot_utils.plot_map(axes[0], raw_data_3d[:, :, 0], cmap=px.plot_utils.cmap_jet_white_center())
axes[0].set_aspect(1)
axes[0].invert_yaxis()
axes[0].set_title('slice of data - use this for cropping the data')
axes[1].plot(volt_vec, raw_data_3d[10, 10, :])
axes[1].set_xlabel('Bias (V)')
axes[1].set_ylabel('Current (nA)')
axes[1].set_title('Response at a single pixel')
fig.tight_layout()

In [ ]:
#%% User supplied information for cropping data:
start_row = 30  # 0 is the first row
end_row = num_rows  # num_rows is the last row
start_col = 0
end_col = num_cols  # num_cols is the last column
volt_chop = 0.3

In [ ]:
# now crop the data according the specifications above:
volt_index = np.where(np.logical_and(volt_vec >= -volt_chop, volt_vec <= volt_chop))[0]
volt_vec = volt_vec[volt_index]

raw_data_3d = raw_data_3d[start_row:end_row, start_col:end_col, volt_index]
num_rows, num_cols, spectra_length = raw_data_3d.shape
raw_data_2d = raw_data_3d.reshape(-1, spectra_length)

fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
px.plot_utils.plot_map(axes[0], raw_data_3d[:, :, 0], cmap=px.plot_utils.cmap_jet_white_center())
axes[0].set_aspect(1)
axes[0].invert_yaxis()
axes[0].set_title('Cropped data')
axes[1].plot(volt_vec, raw_data_3d[10, 10, :])
axes[1].set_xlabel('Bias (V)')
axes[1].set_ylabel('Current (nA)')
axes[1].set_title('Cropped response at a single pixel')
fig.tight_layout()

In [ ]:
#%% Do k-means to identify the principal response types and where (spatially) the occur in the data
num_clusters = 16  # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(raw_data_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (raw_data_3d.shape[0], raw_data_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
                                                                spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_V_' + str(num_clusters) + '_clusters.png'),
                format='png', dpi=300)

In [ ]:
# throw out bad points using a threshold. Works better than finding outliers using K-means
current_threshold = 0.2
bad_pixels = np.where(np.max(np.abs(raw_data_2d), axis=1) > current_threshold)[0]

# find the pixels that are good:
good_pixels = [item for item in np.arange(raw_data_2d.shape[0]) if item not in bad_pixels]

# find the average of all the good pixels
good_resp = np.mean(raw_data_2d[good_pixels], axis=0)

fig, axis = plt.subplots(ncols=2, figsize=(10, 5))
axis[0].set_title('Bad pixels')
px.plot_utils.plot_line_family(axis[0], volt_vec, raw_data_2d[bad_pixels], label_prefix='Cluster')
axis[1].set_title('Mean of all other pixels')
axis[1].plot(volt_vec, good_resp)
fig.savefig(path.join(folder_path, file_name + '_outliers_removal.png'), format='png', dpi=300)

# now replace bad pixels by the mean of the good pixels
outlier_free_2d = np.copy(raw_data_2d)
outlier_free_2d[bad_pixels] = good_resp
outlier_free_3d = np.reshape(outlier_free_2d, raw_data_3d.shape)

In [ ]:
estimators = KMeans(num_clusters)
results = estimators.fit(outlier_free_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (outlier_free_3d.shape[0], outlier_free_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
                                                                spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'After_removing_outliers_K_means_V_' + str(num_clusters) + '_clusters.png'),
                format='png', dpi=300)

In [ ]:
num_comps = 256
U, S, V = randomized_svd(outlier_free_2d, num_comps, n_iter=3)

fig_V, axes_V = px.plot_utils.plot_loops(volt_vec, V, title='SVD Eigenvectors',
                                         evenly_spaced=False, plots_on_side=4, use_rainbow_plots=False,
                                         x_label=x_label, y_label=y_label, subtitles='Eigenvector')
fig_V.savefig(path.join(folder_path, file_name + 'SVD_Eigenvectors.png'), format='png', dpi=200)

loadings = np.reshape(U, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))
fig_U, axes_U = px.plot_utils.plot_map_stack(loadings, num_comps=16, cmap=px.plot_utils.cmap_jet_white_center())
fig_U.savefig(path.join(folder_path, file_name + 'SVD_Eigenvalues.png'), format='png', dpi=150)

fig_S, axes_S = px.plot_utils.plotScree(S)
fig_S.savefig(path.join(folder_path, file_name + 'SVD_S.png'), format='png', dpi=300)

In [ ]:
#%% SVD filtering - Reconstructing the original data with a subset of the SVD components to remove noise present in the senior components
# Note that this method of filtering does NOT downsample the data
filter_comps = 32
filtered_2d = np.dot(np.dot(U[:, :filter_comps], np.eye(filter_comps)*S[:filter_comps]), V[:filter_comps, :])
filtered_3d = np.reshape(filtered_2d, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))

# Compare the raw and filtered data
fig_filt, axes = px.plot_utils.plot_loops(volt_vec, [outlier_free_2d, filtered_2d], line_colors=['k', 'r'],
                                          dataset_names=['Raw','SVD Filtered']
                                          y_label='Current (a.u.)', title='SVD filtering')
fig_filt.savefig(path.join(folder_path, file_name + '_SVD_Filtered_data_I_' + str(filter_comps) + '_comps.png'),
                 format='png', dpi=300)

In [ ]:
# Try a Gaussian filter on the stack:
gaus_sigma = 0.35
gaus_cycles = 1
source_dset = filtered_3d.copy()
for cyc_ind in range(gaus_cycles):
    sink_dset = np.zeros(raw_data_3d.shape)
    for volt_ind in range(spectra_length):
        volt_slice = source_dset[:, :, volt_ind]
        sink_dset[:, :, volt_ind] = gaussian_filter(volt_slice, gaus_sigma)
    source_dset = sink_dset.copy()
gaus_filt_3d = sink_dset
# See some example loops:
gaus_filt_2d = np.reshape(gaus_filt_3d, raw_data_2d.shape)
fig, axes = px.viz.plot_utils.plot_loops(volt_vec, [filtered_2d, gaus_filt_2d], line_colors=['k', 'r'],
                                         dataset_names=['Raw', 'Filtered'], x_label=x_label, y_label=y_label,
                                         title='Gaussian Filter')
fig.savefig(path.join(folder_path, file_name + 'Gaussian_Filter_sigma_' + str(gaus_sigma) + '_cycles_' +
                      str(gaus_cycles) + '.png'), format='png', dpi=300)

In [ ]:
#%% Offset the data in the current axis by setting the current at 0V to 0

mid_pt = int(volt_vec.size * 0.5)
offsets = np.mean(gaus_filt_2d[:, mid_pt-1:mid_pt+1], axis=1)
offseted_data_2d = gaus_filt_2d - np.repeat(np.atleast_2d(offsets).transpose(), spectra_length, axis=1)
offseted_data_3d = np.reshape(offseted_data_2d, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))

#%% Plotting an example IV to check correction
row_ind = 15
col_ind = 12

fig, ax = plt.subplots()
ax.plot(volt_vec, filtered_3d[row_ind, col_ind], 'bo-', label='Raw')
ax.plot(volt_vec, offseted_data_3d[row_ind, col_ind], 'r*-', label='Offsetted')
ax.plot([volt_vec[0], volt_vec[-1]], [0, 0], 'k')
ax.plot([0, 0], [np.min(filtered_3d[row_ind, col_ind]), np.max(filtered_3d[row_ind, col_ind])], 'k')
# ax.set_xlim([volt_vec[mid_pt-5], volt_vec[mid_pt + 5]])
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
extents = 5
ax.set_xlim([volt_vec[mid_pt - extents], volt_vec[mid_pt + extents]])
chosen_sections_1 = filtered_3d[row_ind, col_ind, mid_pt - extents:mid_pt + extents]
chosen_sections_2 = offseted_data_3d[row_ind, col_ind, mid_pt - extents:mid_pt + extents]
ax.set_ylim([np.min(np.hstack([chosen_sections_1, chosen_sections_2])),
             np.max(np.hstack([chosen_sections_1, chosen_sections_2]))])
ax.set_title('Ofsetting the current')
ax.legend(loc='best')
fig.savefig(path.join(folder_path, file_name + '_offsetting_current.png'), format='png', dpi=300)

In [ ]:
#%% Calculating dI/dV now:
dIdV_3d = np.diff(offseted_data_3d, axis=2)
# Adding 0 to spectral axis since we lose one index due to differential
dIdV_3d = np.dstack((dIdV_3d, np.zeros(shape=(offseted_data_3d.shape[0], offseted_data_3d.shape[1]))))
dIdV_2d = np.reshape(dIdV_3d, offseted_data_2d.shape)

In [ ]:
# Apply some median filters:
med_filt_size = 3
dIdV_filt_3d = medfilt(dIdV_3d, [1, 1, med_filt_size])
dIdV_filt_2d = np.reshape(dIdV_filt_3d, raw_data_2d.shape)
fig, axes = px.viz.plot_utils.plot_loops(volt_vec, [dIdV_2d, dIdV_filt_2d], line_colors=['k', 'r'],
                                         dataset_names=['Raw', 'Filtered'], x_label=x_label, y_label=y_label,
                                         title='Median Filter')
fig.savefig(path.join(folder_path, file_name + 'Median_Filter_size_' + str(med_filt_size) + '.png'), format='png', dpi=300)

In [ ]:
num_clusters = 8  # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(dIdV_filt_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (dIdV_filt_3d.shape[0], dIdV_filt_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
                                                                spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_filtered_dIdV_' +
                          str(num_clusters) + '_clusters.png'), format='png', dpi=300)

In [ ]:
# Downsample and average
box_size = 3
down_samp_didv_3d = np.zeros((dIdV_filt_3d.shape[0]-box_size + 1,
                              dIdV_filt_3d.shape[1] - box_size + 1,
                              dIdV_filt_3d.shape[2]))
down_samp_iv_3d = np.zeros(down_samp_didv_3d.shape)
for row_ind in range(down_samp_didv_3d.shape[0]):
    row_start = row_ind
    row_end = row_ind + box_size
    for col_ind in range(down_samp_didv_3d.shape[1]):
        col_start = col_ind
        col_end = col_ind + box_size

        box_didv_data = np.reshape(dIdV_filt_3d[row_start: row_end, col_start: col_end], (-1, spectra_length))
        mean_di_dv = np.mean(box_didv_data, axis=0)
        box_iv_data = np.reshape(offseted_data_3d[row_start: row_end, col_start: col_end], (-1, spectra_length))
        mean_iv = np.mean(box_iv_data, axis=0)
        offset_iv = mean_iv - np.mean(mean_iv[mid_pt - 1:mid_pt + 1])
        #shifted_di_dv = mean_di_dv - np.min(mean_di_dv[int(0.5*spectra_length) - 2: int(0.5*spectra_length) + 2])
        ldos = volt_vec * mean_di_dv / offset_iv
        down_samp_iv_3d[row_ind, col_ind] = offset_iv
        down_samp_didv_3d[row_ind, col_ind] = mean_di_dv

down_samp_didv_2d = np.reshape(down_samp_didv_3d, (-1, spectra_length))
down_samp_iv_2d = np.reshape(down_samp_didv_3d, (-1, spectra_length))

In [ ]:
num_clusters = 8  # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(down_samp_didv_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (down_samp_didv_3d.shape[0], down_samp_didv_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
                                                                spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_downsampled_dIdV_' + str(box_size) + '_box_' +
                          str(num_clusters) + '_clusters.png'), format='png', dpi=300)

In [ ]:
y = centroids[-1]
x = volt_vec
from scipy.interpolate import interp1d
f2 = interp1d(x, y, kind='cubic')
xnew = np.linspace(volt_vec[0], volt_vec[-1], num=250, endpoint=True)
plt.plot(x, y, 'k', xnew, f2(xnew), 'r')

In [ ]: