In [1]:
import glob
import numpy as np
import pandas as pd
from grafting_classifier import GraftingClassifier
#import dask.dataframe as dd
#import dask.array as da
In [2]:
class_train = glob.glob("data/*.csv")
print(class_train)
In [3]:
def train_label(fname):
targetname = fname.replace(".csv", ".labels")
return pd.read_csv(targetname)
In [6]:
train1 = pd.read_csv(class_train[8]).fillna(0)
y = train_label(class_train[8])
train1_cols = np.array_split(range(train1.shape[1]), int(train1.shape[1]/10.0) + 1)
all_cols = []
mod = GraftingClassifier()
for idx, collist in enumerate(train1_cols):
if idx == 0:
column_list = list(np.array(list(train1.columns))[collist])
mod.fit(train1[column_list], y)
all_cols.extend(list(collist))
else:
all_cols.extend(list(collist))
column_list = list(np.array(list(train1.columns))[all_cols])
mod.partial_fit(train1[column_list], y)
In [7]:
mod.coef_.shape
Out[7]:
In [8]:
train1.shape
Out[8]: