The sole purpose of this notebook is to debug... Most people may not find this stuff too useful


In [ ]:
# program wide imports
import numpy as np
import sys
import pickle
sys.path.append("../b2ag/")

In [ ]:
# evaluate the paramters file import functionality
import file_io
file_io = reload(file_io) 
params = file_io.load_parameter_file("../data/parameters.txt")
print params

In [ ]:
# evaluate loading a file
import file_io
file_io = reload(file_io)
data, labels = file_io.load_table("../data/digit-raw.csv")
print "Data Samples: ",len(data)," of ",len(labels)
print "Features: ",len(data[0])
print "Classes: ",len(np.unique(labels))

In [ ]:
# write a pickle
# evaluate loading a file
import file_io
file_io = reload(file_io)
data, labels = file_io.load_table("../data/digit-raw.csv")
pickle.dump({"data":data,"labels":labels}, open("../data/digit-raw.pkl", "w"))
data2, labels2 = file_io.load_pickle("../data/digit-raw.pkl")

print "Data Samples: ",len(data2)," of ",len(labels2)
print "Features: ",len(data2[0])
print "Classes: ",len(np.unique(labels2))

In [ ]:
# test single classifier (from parameters.txt)
import file_io
import classifiers 
file_io = reload(file_io)
classifiers = reload(classifiers)
data, labels = file_io.load_pickle("../data/digit-raw.pkl")
params = file_io.load_parameter_file("../data/parameters.txt")
mdl = classifiers.base_model(params)
mdl.fit(data[:1000,:],labels[:1000])
y = mdl.predict(data[1001:,:])
p = mdl.predict(data[1001:,:],probs=True)
print mdl.predict_error(data[1001:,:],labels[1001:])
print mdl.params

In [ ]:
# test load a mat file
import file_io
file_io = reload(file_io)
data, labels = file_io.load_mat("../data/breastEW.mat")
print "Data Samples: ",len(data)," of ",len(labels)
print "Features: ",len(data[0])
print "Classes: ",len(np.unique(labels))

In [ ]:
# test single classifier (cart)
import file_io
import classifiers 
file_io = reload(file_io)
classifiers = reload(classifiers)
data, labels = file_io.load_pickle("../data/digit-raw.pkl")
params = {"type":"cart"}
mdl = classifiers.base_model(params)
mdl.fit(data[:1000,:],labels[:1000])
y = mdl.predict(data[1001:,:])
p = mdl.predict(data[1001:,:],probs=True)
print mdl.predict_error(data[1001:,:],labels[1001:])
print mdl.params

In [ ]:
# test single classifier (knn)
import file_io
import classifiers 
file_io = reload(file_io)
classifiers = reload(classifiers)
data, labels = file_io.load_pickle("../data/digit-raw.pkl")
params = {"type":"knn","n_neighbors":7}
mdl = classifiers.base_model(params)
mdl.fit(data[:1000,:],labels[:1000])
y = mdl.predict(data[1001:,:])
p = mdl.predict(data[1001:,:],probs=True)
print mdl.predict_error(data[1001:,:],labels[1001:])
print mdl.params

In [ ]:
# test b2ag
import file_io
import numpy as np
import classifiers
file_io = reload(file_io)
classifiers = reload(classifiers)
data, labels = file_io.load_pickle("../data/digit-raw.pkl")
n_models=3
params={"type":"lr"}
percent_train=1.
percent_eval=1.
ens = classifiers.b2ag(n_models=n_models,params=params,percent_train=percent_train,percent_eval=percent_eval)
ens = ens.fit(data[:1000,:],labels[:1000])
confidence,error_bar = ens.predict(data[1001:,:])
print confidence[0], np.argmax(confidence[0]), labels[1001]
print error_bar[0]

In [ ]: