In [31]:
import numpy as np
from pymks.bases import PrimitiveBasis
In [32]:
?PrimitiveBasis
In [33]:
data_cat = np.array([[1, 2, 0]])
Use the primitive basis or microstructure function to discretise the data. This is the most straightforward basis to understand.
$ m_k[h; s] = \delta[h - \phi_k[s]] $
$ \delta [x] = \max \left( 1 - \left|\frac{x}{\Delta h}\right|, 0 \right) $
In [34]:
basis = PrimitiveBasis(n_states=3, domain=[0, 2])
data_disc = basis.discretize(data_cat)
Discretizing the data adds a new index. The last index has the same length as n_states
.
In [35]:
print(data_cat.shape)
print(data_disc.shape)
Categorical data now has digital representation.
In [36]:
print(data_cat[0, 0])
print(data_disc[0, 0, :])
Try this with continuous data.
In [37]:
data_con = np.array([[0.1, 0.5, 0.3]])
In [38]:
basis = PrimitiveBasis(n_states=5, domain=[0, 1])
data_disc = basis.discretize(data_con)
In [39]:
print(data_con.shape)
print(data_disc.shape)
The continuous data varies between 0 and 1. This is a first order discretized $\delta$ function.
In [40]:
print(data_con[0, 0])
print(data_disc[0, 0])
Using a Legendre basis to discretize data. Legendre polynomials are good for this discretization because they are orthogonal with respect to the L2-norm
In [41]:
data_leg = np.array([[-0.5, 0.1, 0.9]])
In [42]:
from pymks.bases import LegendreBasis
basis = LegendreBasis(n_states=4, domain=[-1, 1])
In [43]:
data_disc = basis.discretize(data_leg)
print(data_disc.shape)
In [44]:
print(data_disc)
We can see how these are constructed using Scipy's eval_legendre
.
In [45]:
from scipy.special import eval_legendre
In [46]:
eval_legendre(np.arange(4), -0.5) * (np.arange(4) + 0.5)
Out[46]:
In [47]:
from pymks.datasets import make_microstructure
In [48]:
?make_microstructure
Use a Fourier Gaussian to create artificial microstructures. Fourier Gaussian comes from numpy.ndimage
.
In [49]:
data = make_microstructure(grain_size=(20, 20))
By default 10 microstructures are generated with make_microstructures
.
In [50]:
print(data.shape)
View the results using Matplotlib's imshow
In [51]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'Paired'
plt.imshow(data[0]);
Construct a different microstructure using a fiber structure.
In [52]:
data_fiber = make_microstructure(grain_size=(95, 15))
View the results using imshow
In [53]:
plt.imshow(data_fiber[0])
plt.show()
plt.imshow(data_fiber[1])
plt.show()
In [54]:
np.random.seed(0)
data_blob = make_microstructure(n_samples=50, grain_size=(30, 30))
data_fiber_v = make_microstructure(n_samples=50, grain_size=(100, 10))
data_fiber_h = make_microstructure(n_samples=50, grain_size=(10, 100))
In [55]:
plt.imshow(data_blob[2]);
In [56]:
plt.imshow(data_fiber_v[1]);
In [57]:
plt.imshow(data_fiber_h[0]);
In [58]:
data_fiber_v.shape
Out[58]:
Concatenate our data into a single array.
In [59]:
data = np.concatenate([data_blob, data_fiber_v, data_fiber_h], axis=0)
In [60]:
data.shape
Out[60]:
View all the data using subplots with Matplotlib
In [61]:
fig = plt.figure(figsize=(12, 12))
for i in range(12):
for j in range(12):
count = i * 12 + j
ax = fig.add_subplot(12, 12, count + 1)
ax.set_aspect('equal')
img = plt.imshow(data[count])
plt.axis('off')
img.set_cmap('Paired')
In [62]:
from pymks.stats import correlate
basis = PrimitiveBasis(n_states=2, domain=[0, 1])
data_corr = correlate(data, basis=basis)
In [63]:
data_corr.shape
Out[63]:
The two point stats reflect the shape of the structures
In [64]:
fig, axs = plt.subplots(1, 3)
for i, ax in enumerate(axs):
im = ax.imshow(data_corr[0, :, :, i], vmin=0.0, vmax=1.0, cmap="viridis")
fig.set_size_inches(15, 5)
fig.colorbar(im, ax=axs.ravel().tolist());
In [65]:
print(data_corr[0, :, :, 0] + data_corr[0, :, :, 1] + data_corr[0, :, :, 2] + data_corr[0, ::-1, ::-1, 2])
In [66]:
fig, axs = plt.subplots(1, 3)
for i, ax in enumerate(axs):
im = ax.imshow(data_corr[50, :, :, i], vmin=0.0, vmax=1.0, cmap="viridis")
fig.set_size_inches(15, 5)
fig.colorbar(im, ax=axs.ravel().tolist());
In [67]:
fig, axs = plt.subplots(1, 3)
for i, ax in enumerate(axs):
im = ax.imshow(data_corr[-1, :, :, i], vmin=0.0, vmax=1.0, cmap="viridis")
fig.set_size_inches(15, 5)
fig.colorbar(im, ax=axs.ravel().tolist());
In [68]:
data_corr.shape
Out[68]:
In [69]:
data_reshape = np.reshape(data_corr, (data_corr.shape[0], data_corr[0].size))
In [70]:
print(data_reshape.shape)
In [71]:
data_mean = data_reshape - np.mean(data_reshape, axis=1)[:, None]
In [72]:
from sklearn.decomposition import PCA
pca_model = PCA(n_components=5)
In [73]:
data_pca = pca_model.fit_transform(data_mean)
In [74]:
data_pca.shape
Out[74]:
In [75]:
# NBVAL_IGNORE_OUTPUT
colors = ['r'] * 50 + ['b'] * 50 + ['g'] * 50
fig, axss = plt.subplots(5, 5)
for i, axs in enumerate(axss):
for j, ax in enumerate(axs):
ax.scatter(data_pca[:, i], data_pca[:, j], color=colors)
fig.set_size_inches(12, 12)
fig.show()
In [76]:
classification = np.concatenate([np.zeros(50), np.ones(50), np.ones(50) + 1]).astype(int)
In [77]:
# NBVAL_SKIP
from bqplot import pyplot as bq_plt
bq_plt.figure(title="PCA2 v PCA3")
bq_plt.scatter(data_pca[:, 2], data_pca[:, 3], color=classification)
bq_plt.show()
In [78]:
# NBVAL_IGNORE_OUTPUT
%timeit correlate(data, basis=basis)
In [351]:
print(data.shape)
In [352]:
data_corr.shape
Out[352]:
In [353]:
import dask.array as da
data_da = da.from_array(data, chunks=(5, 101, 101))
print(data_da.shape)
print(data_da.chunks)
In [354]:
# NBVAL_IGNORE_OUTPUT
import dask.threaded
import dask.multiprocessing
def correlate_(data_):
basis = PrimitiveBasis(n_states=2, domain=[0, 1])
return correlate(data_, basis=basis)
data_da = da.from_array(data, chunks=(5, 101, 101))
map_ = da.map_blocks(correlate_, data_da, new_axis=1)
%timeit map_.compute(num_workers=2, get=dask.multiprocessing.get)
In [355]:
# NBVAL_IGNORE_OUTPUT
%timeit map_.compute(num_workers=4, get=dask.multiprocessing.get)
In [356]:
# NBVAL_IGNORE_OUTPUT
%timeit map_.compute(get=dask.threaded.get)
In [357]:
from sklearn.linear_model import LogisticRegression
In [358]:
model = LogisticRegression(C=1)
model.fit(data_pca, classification)
Out[358]:
In [359]:
classification_pred = model.predict(data_pca)
In [360]:
# NBVAL_IGNORE_OUTPUT
from sklearn.metrics import confusion_matrix
confusion_matrix(classification, classification_pred)
Out[360]:
In [361]:
X, y = np.reshape(data, (data.shape[0], data[0].size)), classification
In [362]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
In [363]:
print(X_train.shape)
In [364]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=2)
train_index, test_index = next(skf.split(X, y))
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
In [365]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
def transform(X):
X_ = np.reshape(X, (X.shape[0],) + data[0].shape)
basis = PrimitiveBasis(n_states=2, domain=[0, 1])
X_ = correlate(X, basis=basis)
X_ = np.reshape(X_, (X_.shape[0], X_[0].size))
return X_ - np.mean(X_, axis=1)[:, None]
transformer = FunctionTransformer(transform)
model = Pipeline([('transformer', transformer),
('pca', PCA(n_components=5)),
('classifier', LogisticRegression())])
In [366]:
# NBVAL_IGNORE_OUTPUT
model.fit(X_train, y_train)
Out[366]:
In [367]:
y_pred = model.predict(X_test)
In [368]:
# NBVAL_IGNORE_OUTPUT
mat = confusion_matrix(y_test, y_pred)
print(mat)
In [369]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
print(scores)