In [1]:
from pml.api import *

data = load("../dataset_ext2.csv")

In [2]:
data.num_samples()


Out[2]:
80

In [3]:
data.feature_list()


Out[3]:
['ELEC199',
 'CHEM150',
 'PHYS125',
 'CSC115',
 'ENGR110',
 'CSC111',
 'MATH100',
 'MATH101',
 'ENGL135',
 'MATH110',
 'ENGR120',
 'PHYS122',
 'MECH141']

In [4]:
data.get_label_value_counts()


Out[4]:
f    30
s    29
p    21

In [5]:
data.combine_labels(["s", "p"], "s")
data.get_label_value_counts()


Out[5]:
s    50
f    30

In [6]:
training, testing = data.split(0.6, random=True)
training.num_samples()


Out[6]:
48

In [7]:
testing.num_samples()


Out[7]:
32

In [ ]: