SCIKIT LEARN - Klassifikation - MNIST Datensammlung

Erkennen handgeschriebener Zahlen 0 .. 9

Aufgabe: Laden Sie die notwendigen Basisbibliotheken

Lösung:


In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import scipy as sc
import pandas as pd
%matplotlib inline

Aufgabe: Laden Sie den MNIST-Datensatz mit Hilfe fetch_mldata in scikit learn
aus den sklearn.datasets

Lösung:


In [ ]:
from sklearn.datasets import fetch_mldata

In [ ]:
mnist = fetch_mldata('MNIST original')

In [ ]:
type(mnist)

In [ ]:
mnist

In [ ]:


In [ ]:
X, y = mnist["data"], mnist["target"]

In [ ]:
type(X)

In [ ]:
X.shape

In [ ]:


In [ ]:
y.shape

In [ ]:


In [ ]:
ausw_zahl = X[30000]

In [ ]:
ausw_zahl_bild = ausw_zahl.reshape(28, 28)

In [ ]:
plt.imshow(ausw_zahl_bild, cmap = plt.cm.binary, interpolation="nearest")
plt.axis("off")
plt.show()

In [ ]:
y[30000]

In [ ]:
split = 60000

In [ ]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:split], y[60000:]

In [ ]:
print(X_train)

In [ ]:
X_train.shape

In [ ]:
print(y_train)

In [ ]:
y_train.shape

In [ ]:
zufall_index = np.random.permutation(60000)

In [ ]:
X_train, y_train = X_train[zufall_index], y_train[zufall_index]

In [ ]:
y_train_5 = (y_train == 5)
y_train_6 = (y_train == 6)
y_test_5 = (y_test == 5)
y_test_6 = (y_test == 6)

In [ ]:
from sklearn.linear_model import SGDClassifier

In [ ]:
sgd_clf = SGDClassifier(random_state=42)

In [ ]:
sgd_clf.fit(X_train,y_train_5)

In [ ]:
sgd_clf.predict([ausw_zahl])

In [ ]:


In [ ]:
from sklearn.model_selection import cross_val_score

In [ ]:


In [ ]:
cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring="accuracy")

In [ ]:
cross_val_score(sgd_clf, X_train, y_train_6, cv=5, scoring="accuracy")

In [ ]:


In [ ]:
from sklearn.model_selection import cross_val_predict

In [ ]:
y_train_pred_5 = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)

In [ ]:
from sklearn.metrics import confusion_matrix

In [ ]:
confusion_matrix(y_train_5,y_train_pred_5)

In [ ]:
y_train_pred_6 = cross_val_predict(sgd_clf, X_train, y_train_6, cv=5)

In [ ]:
confusion_matrix(y_train_6,y_train_pred_6)

In [ ]:
from sklearn.metrics import precision_score, recall_score

In [ ]:
precision_score(y_train_6,y_train_pred_6)

In [ ]:
recall_score(y_train_6,y_train_pred_6)

In [ ]:
from sklearn.metrics import f1_score

In [ ]:
f1_score(y_train_6,y_train_pred_6)

In [ ]:
f1_score(y_train_5,y_train_pred_5)

In [ ]: