In [2]:
import numpy as np
import pandas as pd
import pickle
import hdbscan
In [3]:
hdbscan_model = pickle.load(open('hdbscan_cluster_jac_10.pickle', 'rb'))
In [6]:
col_names = ['S'+str(i) for i in range(52)]
In [10]:
bmatrix_date = np.load('bmatrix_train_date.npy')
path_df = pd.DataFrame(data=bmatrix_date, columns=col_names, dtype=int)
del bmatrix_date
In [11]:
path_df.head()
Out[11]:
In [12]:
path_df['idx'] = path_df.index + 1
In [15]:
path_df = path_df.set_index('idx')
In [16]:
path_df_train = path_df[path_df.sum(axis=1) > 0]
In [20]:
test_labels, strengths = hdbscan.approximate_predict(hdbscan_model, path_df_train.values)
test_labels
Out[20]:
In [29]:
%%time
hdbscan_model_man_10 = pickle.load(open('hdbscan_cluster_man_10.pickle', 'rb'))
In [44]:
test_labels_man_10, strengths = hdbscan.approximate_predict(hdbscan_model_man_10, path_df_train.values)
test_labels_man_10
Out[44]:
In [49]:
test_labels_man_10.shape
Out[49]:
In [50]:
path_df_train.shape
Out[50]:
In [51]:
path_df_train['product'] = test_labels_man_10
In [52]:
path_df_train.to_csv("product_man_10.csv")
In [ ]: