In [22]:
import matplotlib.pyplot as plt
import numpy
from pandas import read_csv, DataFrame
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.decomposition import PCA
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
In [28]:
train = read_csv('train.csv')
test = read_csv('test.csv')
train_y = train['label']
train_X = train.drop('label', 1)
train.head()
Out[28]:
In [13]:
%matplotlib inline
pca = PCA()
pca.fit(train_X)
plt.figure(1, figsize=(4, 3))
plt.clf()
plt.axes([.2, .2, .7, .7])
plt.plot(pca.explained_variance_, linewidth=2)
Out[13]:
In [30]:
train_X_pca = pca.transform(train_X)[:, 0:60]
train_shuf = StratifiedShuffleSplit(train_y, n_iter = 10, test_size = .2, random_state = 123)
In [ ]:
parameters = {'kernel': ['rbf']
# 'C': 10. ** numpy.arange(-5, 5, 1)
# 'degree': [2, 3],
# 'gamma': numpy.arange(1, 5, 1)
}
clf = SVC()
gs = GridSearchCV(clf, parameters, n_jobs = 1, cv = train_shuf)
gs = gs.fit(train_X_pca, train_y)
print(gs.best_score_)
print(gs.best_estimator_)
In [ ]:
clf = gs.best_estimator_
print(clf)
clf.fit(train_X, train_y)
clf.predict(test).savetxt()
np.savetxt('out.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')