In [1]:
import glob

import numpy as np
import pandas as pd

from grafting_classifier import GraftingClassifier
from sklearn.linear_model import SGDClassifier
from ogfs_classifier import OGFSClassifier
from osfs_classifier import OSFSClassifier
from dpp_classifier import DPPClassifier
from dpp_classifier_mitra import DPPClassifier as DPPClassifier2
from dpp_classifier_ogfs import DPPClassifier as DPPClassifier3

from sklearn.metrics import log_loss, accuracy_score

#import dask.dataframe as dd
#import dask.array as da

In [4]:
class_train = glob.glob("microarray/*_train.csv")
for x in class_train:
    print(x, pd.read_csv(x).shape)


microarray\colon_train.csv (62, 2000)
microarray\leukemia_train.csv (72, 7129)
microarray\lung_cancer_train.csv (181, 12533)
microarray\prostate_train.csv (102, 12600)

In [5]:
class_train = glob.glob("uci/*_train.csv")
for x in class_train:
    print(x, pd.read_csv(x).shape)


uci\Ionosphere_train.csv (351, 34)
uci\spambase_train.csv (4601, 57)
uci\spectf_train.csv (267, 44)
uci\wdbc_train.csv (569, 30)

In [6]:
class_train = glob.glob("NIPS/*_train.csv")
for x in class_train:
    print(x, pd.read_csv(x).shape)


NIPS\arcene_train.csv (100, 10000)
NIPS\dexter_train.csv (300, 20000)
NIPS\dorothea_train.csv (800, 100000)
NIPS\gisette_train.csv (6000, 5000)
NIPS\madelon_train.csv (2000, 500)