In [ ]:
import sys
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
training_data = pd.read_csv('data/model_training.csv', sep=',', names=['label', 'feature0', 'feature1', 'feature2', 'feature3'])
features_df = training_data.loc[:,'feature0':]
labels_df = training_data.loc[:, 'label']
In [ ]:
from sklearn2pmml import PMMLPipeline
pipeline = PMMLPipeline([
("classifier", DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=3, min_samples_leaf=5))
])
model = pipeline.fit(features_df, labels_df)
print(pipeline)
In [ ]:
from sklearn2pmml import sklearn2pmml
sklearn2pmml(pipeline, "model.pmml", with_repr = True)
pmmlBytes = bytearray(open('model.pmml', 'rb').read())
print(pmmlBytes.decode('utf-8'))
In [ ]:
%%bash
ls -l model.pmml
In [ ]: