Train the Model


In [ ]:
import sys
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

training_data = pd.read_csv('data/model_training.csv', sep=',', names=['label', 'feature0', 'feature1', 'feature2', 'feature3'])

features_df = training_data.loc[:,'feature0':]
labels_df = training_data.loc[:, 'label']

In [ ]:
from sklearn2pmml import PMMLPipeline

pipeline = PMMLPipeline([
    ("classifier", DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=3, min_samples_leaf=5))
])

model = pipeline.fit(features_df, labels_df)

print(pipeline)

In [ ]:
from sklearn2pmml import sklearn2pmml

sklearn2pmml(pipeline, "model.pmml", with_repr = True)
pmmlBytes = bytearray(open('model.pmml', 'rb').read())
print(pmmlBytes.decode('utf-8'))

In [ ]:
%%bash 

ls -l model.pmml

In [ ]: