In [32]:
from sklearn import cross_validation, datasets, ensemble
from sklearn.neighbors import KNeighborsClassifier

import numpy as np
import pandas as pd
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [19]:
digits = datasets.load_digits()

In [20]:
digits.data.shape


Out[20]:
(1797L, 64L)

In [21]:
# разделение данных на обучение и тест
X_train = digits.data[:1348]
X_test = digits.data[1348:]
y_train = digits.target[:1348]
y_test = digits.target[1348:]

In [23]:
knn = KNeighborsClassifier(n_neighbors = 1)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [30]:
err = 0.
for i,j in zip(y_pred,y_test):
    if i != j:
        err += 1.
ans1 = err/len(y_test)
print ans1


0.0378619153675

In [33]:
with open('1nn_vs_RandFor_1.txt', 'w') as file_out:
        file_out.write(str(ans1))

In [34]:
RFC = ensemble.RandomForestClassifier(n_estimators=1000)
RFC.fit(X_train, y_train)
y_pred = RFC.predict(X_test)

In [36]:
err_RFC = 0.
for i,j in zip(y_pred,y_test):
    if i != j:
        err_RFC += 1.
ans2 = err_RFC/len(y_test)
print ans2


0.0668151447661

In [37]:
with open('1nn_vs_RandFor_2.txt', 'w') as file_out:
        file_out.write(str(ans2))

In [ ]: