In [8]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import KFold
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.cross_validation import cross_val_score
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
In [4]:
# https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/
names = ['num_times_pregnant', 'glucose_level', 'blood_pres', 'skin_thickness', 'insulin', 'bmi', 'dia_pedigree', 'age', 'has_diabetes']
data = pd.read_csv("pima-indians-diabetes.data", names = names)
input_data = data[['num_times_pregnant', 'glucose_level', 'blood_pres', 'skin_thickness', 'insulin', 'bmi', 'dia_pedigree', 'age']]
expected_output = data['has_diabetes']
print(input_data.head())
print(expected_output.head())
In [ ]:
# Create a Random forest, Logistic regression and SVM instance:
In [6]:
# Print the mean for the 3 algorithms, using the cross_val_score function
Now actually run the 3 machine learning algorithms
In [10]:
# Create the test train split
In [16]:
# Run the data with Random forest
Out[16]:
In [17]:
# Run the data with Logistic Regression
Out[17]:
In [18]:
# Run the data with SVM
Out[18]: