In [40]:
import pandas as pd
%matplotlib inline
from sklearn import datasets
from sklearn import tree
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import cross_val_score
import matplotlib.pyplot as plt
import numpy as np
from numpy import array
In [4]:
import pg8000
conn = pg8000.connect(host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com', port= 5432, database= 'training', user= 'dot_student', password= 'qgis')
In [5]:
cursor = conn.cursor()
In [7]:
cursor.execute("select column_name from information_schema.columns where table_name='winequality'")
column_name=[]
for item in cursor.fetchall():
column_name.append(item)
print(item)
In [21]:
cursor.execute("select * from winequality")
data=[]
for item in cursor.fetchall():
#print(item)
data.append(item)
type(data)
my_array= array(data)
type(my_array)
Out[21]:
In [42]:
x = my_array[:,:11]
y = my_array[:,11:] #two dimensional y array
y2= my_array[:,11] #one dimensional y array THIS IS THE ONE WE NEED
In [38]:
dt= DecisionTreeClassifier()
In [39]:
dt= dt.fit(x,y)
In [46]:
scores = cross_val_score(dt, x, y2, cv=10)
In [47]:
scores
Out[47]:
In [ ]: