In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn.preprocessing import scale
%matplotlib inline
In [2]:
digits = load_digits()
In [16]:
data = scale(digits.data)
data
Out[16]:
In [17]:
data.shape
Out[17]:
In [18]:
n_digits = len(np.unique(digits.target))
labels = digits.target
print(n_digits)
print(labels)
In [58]:
clf = RandomForestClassifier(n_estimators=10,
max_depth=5,
criterion='entropy')
In [59]:
clf.fit(data, labels)
Out[59]:
In [60]:
scores = clf.score(data, labels)
print(scores)
In [61]:
importances = clf.feature_importances_
indexes = np.argsort(importances)
print(indexes)
In [62]:
ind = []
for index in indexes:
ind.append(labels[index])
In [63]:
plt.figure(1)
plt.title('Importância dos Atributos')
plt.barh(range(len(indexes)), importances[indexes], color='b', align='center')
plt.yticks(range(len(indexes)), ind)
plt.xlabel('Importância Relativa')
plt.show()
In [ ]: