In [1]:
#%matplotlib inline
import numpy as np
import pandas as pd

In [2]:
train = pd.read_csv('./input/digit_train.csv')
test = pd.read_csv('./input/digit_test.csv')

In [3]:
train = train.head(1000)
y_train = train.pop('label')
X_train = train
#print(train.describe())

#print(X_train.head(2))
#print(y_train)
#print(test.head(2))
#print(X_train.count)
#print(y_train)

In [4]:
from sklearn.svm import SVC

clf = SVC()
clf.fit(X_train, y_train)


Out[4]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [5]:
y_pred= clf.predict(test)
#print(y_pred)

#from sklearn.metrics import accuracy_score
#accuracy_score(, y_pred)

submission = pd.DataFrame(y_pred) 
#print (submission.head(4))
submission.to_csv('submission_digit_recognizer.csv')

In [6]:
from sklearn import decomposition
pca = decomposition.PCA(n_components=3)
pca.fit(train)
new_train = pca.transform(train)
print(new_train)


[[ -629.40940428   730.39246625  -199.31178016]
 [ 1735.73027662   322.27427652   768.68645027]
 [ -841.00935264   302.77410709   -66.69483845]
 ..., 
 [ -540.15548292  -802.48374617   301.00757383]
 [  290.9273804    253.81066619  -694.90507688]
 [ -437.23539858  -244.06691763   533.84204256]]

In [ ]: