In [1]:
import parser_macl as ps
In [2]:
reload(ps)
dt=ps.read_data("../ICSD/icsd-ternaries.csv")
In [3]:
from sklearn.preprocessing import StandardScaler
In [3]:
dt_array=ps.get_array_form(dt)
In [5]:
scaler=StandardScaler()
dt_scaled=scaler.fit_transform(dt_array)
In [6]:
from sklearn.cluster import KMeans
In [7]:
kmeans=KMeans(n_clusters=10,random_state=42)
In [8]:
labels=kmeans.fit_predict(dt_scaled)
In [9]:
from collections import Counter
print Counter(labels)
In [10]:
dt["Composition"][labels==5]
Out[10]:
In [7]:
dt_slice=dt_array[0:10000]
In [5]:
from sklearn.metrics.pairwise import euclidean_distances
In [8]:
dist=euclidean_distances(dt_slice)
In [16]:
dist.shape
Out[16]:
In [17]:
import matplotlib.pyplot as plt
In [18]:
%matplotlib inline
plt.figure(figsize=(10,10))
plt.imshow(euclidean_distances(dist))
plt.colorbar()
Out[18]:
In [ ]: