notebook.community

Edit and run



In [1]:

    
%load_ext autoreload
%autoreload 
import sys
sys.path.append("./..")



In [2]:

    
%autoreload 
from clustering.equal_groups import EqualGroupsKMeans



In [3]:

    
%matplotlib inline
# %matplotlib notebook
import sys
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np



In [17]:

    
all_dfs = []

for timegroup in "ABCDEFG":
    df = pd.read_csv('data/2dcluster.csv')
    df['TIMEGROUP'] =  timegroup
    all_dfs.append(df)
    
X = pd.concat(all_dfs)
X = X.reset_index(drop=True)



In [20]:

    
X_features = X[['X', 'Y']].sample(500)



In [ ]:

    
%autoreload 
clf = EqualGroupsKMeans(n_clusters=5, random_state=0)
%timeit clf.fit(X_features)









    



6.88 s ± 154 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)



In [ ]:

    
clf = EqualGroupsKMeans(n_clusters=7, random_state=0)
clf.fit(X_features)



In [ ]:

    
X_features['labels'] = clf.labels_



In [ ]:

    
X_features.plot.scatter(x='X', y='Y', c='labels')



In [ ]:



In [ ]: