In [17]:
! pip install sklearn-pandas
! pip install -U scikit-learn
In [23]:
import json
import numpy as np
from pandas import read_csv
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
#from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.preprocessing import FunctionTransformer
In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
In [3]:
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
In [4]:
dataframe = read_csv(url, names=features + [label])
In [5]:
X = dataframe[features]
Y = dataframe[label]
In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
In [9]:
clf = LogisticRegression()
In [10]:
clf.fit(X_train, Y_train)
Out[10]:
In [11]:
print clf.score(X_test, Y_test)
In [13]:
with open('logreg_coefs.json', 'w') as f:
json.dump(clf.coef_.tolist(), f)