In [1]:
%load_ext autoreload
%autoreload
import sys
sys.path.append("./..")
In [2]:
%autoreload
from clustering.equal_groups import EqualGroupsKMeans
In [3]:
%matplotlib inline
# %matplotlib notebook
import sys
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
In [17]:
all_dfs = []
for timegroup in "ABCDEFG":
df = pd.read_csv('data/2dcluster.csv')
df['TIMEGROUP'] = timegroup
all_dfs.append(df)
X = pd.concat(all_dfs)
X = X.reset_index(drop=True)
In [20]:
X_features = X[['X', 'Y']].sample(500)
In [ ]:
%autoreload
clf = EqualGroupsKMeans(n_clusters=5, random_state=0)
%timeit clf.fit(X_features)
In [ ]:
clf = EqualGroupsKMeans(n_clusters=7, random_state=0)
clf.fit(X_features)
In [ ]:
X_features['labels'] = clf.labels_
In [ ]:
X_features.plot.scatter(x='X', y='Y', c='labels')
In [ ]:
In [ ]: