In [1]:
%load_ext sql
%matplotlib inline
In [2]:
import logging
import os
import sys
from configparser import ConfigParser
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
In [3]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
config = ConfigParser()
configfn = os.path.join(os.path.expanduser('~'), '.pgcred')
config.read(configfn)
Out[3]:
In [4]:
user=config['nbadb']['username']
password=config['nbadb']['password']
db=config['nbadb']['database']
In [5]:
connection_string = "postgresql://{user}:{password}@localhost/{db}".format(user=user, password=password, db=db)
%sql $connection_string
Out[5]:
In [25]:
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
In [55]:
result = %sql SELECT * FROM tmpmodel
In [56]:
df = result.DataFrame()
df.head(10)
Out[56]:
In [57]:
df = df.dropna(how='any')
In [58]:
X_train, X_test, y_train, y_test = train_test_split(df.ix[:,:-1], df.ix[:,-1],
train_size=0.75, test_size=0.25)
In [59]:
X_train_scale = preprocessing.scale(X_train)
X_test_scale = preprocessing.scale(X_test)
In [60]:
tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2)
tpot.fit(X_train_scale, y_train)
print(tpot.score(X_test_scale, y_test))
In [61]:
from sklearn.ensemble import ExtraTreesClassifier
forest = ExtraTreesClassifier(n_estimators=250, random_state=0)
In [62]:
forest.fit(X_train_scale, y_train)
Out[62]:
In [63]:
importances = forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in forest.estimators_],
axis=0)
indices = np.argsort(importances)[::-1]
In [66]:
print("Feature ranking:")
for f in range(X_train_scale.shape[1]):
print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
plt.figure()
plt.title("Feature importances")
plt.bar(range(X_train_scale.shape[1]), importances[indices],
color="r", yerr=std[indices], align="center")
plt.xticks(range(X_train_scale.shape[1]), indices)
plt.xlim([-1, X_train_scale.shape[1]])
plt.show()
In [68]:
df.columns
Out[68]:
In [ ]: