notebook.community

Edit and run



In [8]:

    
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import KFold
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.cross_validation import cross_val_score
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split



In [4]:

    
# https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/
names = ['num_times_pregnant', 'glucose_level', 'blood_pres', 'skin_thickness', 'insulin', 'bmi', 'dia_pedigree', 'age', 'has_diabetes']
data = pd.read_csv("pima-indians-diabetes.data", names = names)
input_data = data[['num_times_pregnant', 'glucose_level', 'blood_pres', 'skin_thickness', 'insulin', 'bmi', 'dia_pedigree', 'age']]
expected_output = data['has_diabetes']
print(input_data.head())
print(expected_output.head())









    



   num_times_pregnant  glucose_level  blood_pres  skin_thickness  insulin  \
0                   6            148          72              35        0   
1                   1             85          66              29        0   
2                   8            183          64               0        0   
3                   1             89          66              23       94   
4                   0            137          40              35      168   

    bmi  dia_pedigree  age  
0  33.6         0.627   50  
1  26.6         0.351   31  
2  23.3         0.672   32  
3  28.1         0.167   21  
4  43.1         2.288   33  
0    1
1    0
2    1
3    0
4    1
Name: has_diabetes, dtype: int64



In [ ]:

    
# Create a Random forest, Logistic regression and SVM instance:



In [6]:

    
# Print the mean for the 3 algorithms, using the cross_val_score function









    



Random Forest: 0.7486842105263157
SVM:  0.651059466849
Logistic:  0.766968557758

Now actually run the 3 machine learning algorithms



In [10]:

    
# Create the test train split



In [16]:

    
# Run the data with Random forest









    Out[16]:





0.74015748031496065



In [17]:

    
# Run the data with Logistic Regression









    Out[17]:





0.76771653543307083



In [18]:

    
# Run the data with SVM









    Out[18]:





0.67322834645669294