In [8]:
import pg8000 as pg
import numpy as np
In [4]:
host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com'
database='training'
port=5432
user='dot_student'
password='qgis'
table = 'winequality'
In [44]:
conn=pg.connect(host=host, user=user, database=database, port=port, password=password)#, table=table)
cursor=conn.cursor()
In [53]:
conn.rollback()
In [54]:
#cursor.execute("select column_name from from information_schema.columns where table_name='winequality'")
cursor.execute("select * from winequality")
wines=np.array(cursor.fetchall())
In [56]:
len(wines[0])
Out[56]:
In [62]:
x=wines[:,:11]
y=wines[:,11]
In [58]:
from sklearn.tree import DecisionTreeClassifier
In [63]:
dt=DecisionTreeClassifier()
dt=dt.fit(x,y)
In [60]:
from sklearn.cross_validation import cross_val_score
In [64]:
scores=cross_val_score(dt,x,y,cv=10)
In [65]:
np.mean(scores)
Out[65]:
In [34]:
num_fields = len(cursor.description)
field_names = [i[0] for i in cursor.description]
In [83]:
[name.decode("utf-8") for name in field_names[:-1]]
Out[83]:
In [84]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(dt.feature_importances_, 'o')
plt.xticks(range(wines.data.shape[1]), [name.decode("utf-8") for name in field_names[:-1]], rotation=90)
plt.ylim(0,1)
Out[84]:
In [ ]: