In [1]:
from pymldb import Connection
mldb = Connection("http://localhost")
#we'll need these also later!
import pandas as pd
import numpy as np
from IPython.display import display, HTML
%matplotlib inline
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 15, 5
from ipywidgets import interact
In [2]:
mldb.v1.datasets("bank").put({
"type": "text.csv.tabular",
"params": {
"dataFileUrl":
"archive+"+
"https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip"+
"#bank-additional/bank-additional-full.csv",
"delimiter": ";"
}
})
Out[2]:
In [3]:
mldb.query("select * from bank limit 10")
Out[3]:
In [15]:
training_procedure = mldb.v1.procedures("bank_exp")
training_procedure.put({
"type": "classifier.experiment",
"params": {
"experimentName": "bank",
"keepArtifacts": True,
"training_dataset": { "id": "bank" },
"testing_dataset": { "id": "bank" },
"dataset_folds": [{
"training_where": "rowHash() % 3 != 0",
"testing_where": "rowHash() % 3 = 0",
}],
"modelFileUrlPattern": "file://models/bank.cls",
"algorithm": "bbdt",
"select": "* excluding (y, duration)",
"label": "y='yes'",
"outputAccuracyDataset": True,
"mode": "boolean"
}
})
result = training_procedure.runs.post({})
print "\nArea under ROC curve = %0.4f\n" % result.json()["status"]["folds"][0]["results"]["auc"]
In [26]:
training_procedure.runs("2015-10-25T02:46:14.328745Z-5bc7042b732cb41f").details.get()
Out[26]:
In [16]:
accuracy = mldb.query("select * from bank_results_0 order by score desc")
@interact
def accuracy_plot( threshold_index=[0,len(accuracy)-1]):
row = accuracy.iloc[threshold_index]
cols = ["trueNegatives","falsePositives","falseNegatives","truePositives",]
f, (ax1, ax2) = plt.subplots(1, 2)
accuracy.plot(ax=ax1, x="falsePositiveRate", y="truePositiveRate",
legend=False, title="ROC Curve, threshold=%.4f" % row.score).set_ylabel('truePositiveRate')
ax1.plot(row.falsePositiveRate, row.truePositiveRate, 'gs')
ax2.pie(row[cols], labels=cols, autopct='%1.1f%%', startangle = 90,
colors=['lightskyblue','lightcoral','lightcoral', 'lightskyblue'])
ax2.axis('equal')
f.subplots_adjust(hspace=.75)
plt.show()
In [17]:
explanation = mldb.v1.functions("bank_explainer")
explanation.put({
"type": "classifier.explain",
"params": { "modelFileUrl": "file://models/bank.cls" }
})
Out[17]:
In [21]:
mldb.query("""
select avg(
@bank_explainer(
label: y='yes',
features: {*}
)[explanation]
) as *
from bank
where rowHash() % 3 = 0
group by y
""").transpose().plot(kind='barh', title="Feature Importance")
plt.xticks(rotation=0)
plt.show()
In [ ]: