In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from datetime import datetime
from sklearn.preprocessing import StandardScaler
%matplotlib inline
In [3]:
healthy = pd.read_csv("/data/healthy.csv", header=None)
healthy.info()
In [12]:
In [4]:
plt.imshow(healthy, cmap = "gray")
Out[4]:
In [5]:
tumor = pd.read_csv("/data/tumor.csv", header=None)
tumor.info()
In [6]:
plt.imshow(tumor, cmap = "gray")
Out[6]:
In [21]:
plt.figure(figsize = (10, 5))
plt.subplot(121)
plt.hist(healthy.values.flatten(), bins = 30);
plt.title("Histogram: Healthy Brain")
plt.subplot(122)
plt.hist(tumor.values.flatten(), bins = 30);
plt.title("Histogram: Brain with disease")
plt.tight_layout()
In [39]:
%%time
km1 = KMeans(n_clusters=5, n_init=10, init="k-means++")
X = healthy.values.reshape(-1, 1)
y = km1.fit_predict(X)
summary = pd.DataFrame(X)
summary["cluster"] = y
summary = summary.groupby("cluster")[0].agg(["mean", "count"])
summary["pct"] = summary["count"]/len(X)
summary.drop(columns=["count"], inplace=True)
print(summary.sort_values("mean"))
plt.imshow(y.reshape(566, 646), cmap = "gray")
print("Inertia: ", km1.inertia_)
In [40]:
%%time
km2 = KMeans(n_clusters=5,n_init=10, init="k-means++")
X = tumor.values.reshape(-1, 1)
y = km2.fit_predict(X)
plt.imshow(y.reshape(tumor.shape[0], tumor.shape[1])
, cmap = "gray", interpolation="nearest")
print("Inertia: ", km2.inertia_)
summary = pd.DataFrame(X)
summary["cluster"] = y
summary = summary.groupby("cluster")[0].agg(["mean", "count"])
summary["pct"] = summary["count"]/len(X)
summary.drop(columns=["count"], inplace=True)
print(summary.sort_values("mean"))
In [ ]: