In [1]:
import seldon.pipeline.auto_transforms as auto
import pandas as pd
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
header=None,names=["target","Alcohol","Malic acid","Ash","Alcalinity of ash","Magnesium","Total phenols","Flavanoids",
"Nonflavanoid phenols","Proanthocyanins","Color intensity","Hue","OD280/OD315 of diluted wines",
"Proline"])
In [2]:
df.head()
Out[2]:
Create an auto transform to scale numeric columns automatically.
In [3]:
df["target"] = df["target"] - 1
t_auto = auto.Auto_transform(exclude=["target"])
df2 = t_auto.fit_transform(df)
df2.head()
Out[3]:
Create an XGBoost classifier and run 5-fold cross validation on the data.
In [4]:
from seldon import xgb
import seldon.pipeline.cross_validation as cf
xgb = xgb.XGBoostClassifier(target="target")
cv = cf.Seldon_KFold(xgb,5)
cv.fit(df2)
Out[4]:
In [5]:
print "Average accuracy ",cv.get_score()
In [ ]: