In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn.preprocessing import scale
%matplotlib inline
In [2]:
digits = load_digits()
In [3]:
data = scale(digits.data)
data
Out[3]:
In [4]:
data.shape
Out[4]:
In [5]:
n_digits = len(np.unique(digits.target))
labels = digits.target
print(n_digits)
print(labels)
In [6]:
clf = RandomForestClassifier(n_estimators=10,
max_depth=5,
criterion='entropy')
In [7]:
clf.fit(data, labels)
Out[7]:
In [8]:
scores = clf.score(data, labels)
print(scores)
In [9]:
importances = clf.feature_importances_
indexes = np.argsort(importances)
print(indexes)
In [10]:
ind = []
for index in indexes:
ind.append(labels[index])
In [11]:
plt.figure(1)
plt.title('Importância dos Atributos')
plt.barh(range(len(indexes)), importances[indexes], color='b', align='center')
plt.yticks(range(len(indexes)), ind)
plt.xlabel('Importância Relativa')
plt.show()
In [ ]: