In [1]:
from sklearn.datasets import fetch_20newsgroups
import numpy as np

In [2]:
news=fetch_20newsgroups(subset='all')


Downloading 20news dataset. This may take a few minutes.
Downloading dataset from https://ndownloader.figshare.com/files/5975967 (14 MB)

In [3]:
from sklearn.cross_validation import train_test_split


/Users/jinze/anaconda2/lib/python2.7/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [4]:
X_train,X_test,y_train,y_test=train_test_split(news.data[:3000],news.target[:3000],test_size=0.25,random_state=33)

In [5]:
from sklearn.svm import SVC

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline

In [7]:
clf=Pipeline([('vect',TfidfVectorizer(stop_words='english',analyzer='word')),('svc',SVC())])

In [12]:
parameters={'svc__gamma':np.logspace(-2,1,4),'svc__C':np.logspace(-1,1,3)}

In [9]:
from sklearn.grid_search import GridSearchCV

In [13]:
gs=GridSearchCV(clf,parameters,verbose=2,refit=True,cv=3,n_jobs=-1)

In [15]:
%time _=gs.fit(X_train,y_train)
gs.best_params_,gs.best_score_


Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] svc__gamma=0.01, svc__C=0.1 .....................................
[CV] svc__gamma=0.01, svc__C=0.1 .....................................
[CV] svc__gamma=0.01, svc__C=0.1 .....................................
[CV] svc__gamma=0.1, svc__C=0.1 ......................................
[CV] ............................ svc__gamma=0.01, svc__C=0.1 -   6.7s
[CV] svc__gamma=0.1, svc__C=0.1 ......................................
[CV] ............................. svc__gamma=0.1, svc__C=0.1 -   6.7s
[CV] ............................ svc__gamma=0.01, svc__C=0.1 -   6.9s
[CV] svc__gamma=0.1, svc__C=0.1 ......................................
[CV] ............................ svc__gamma=0.01, svc__C=0.1 -   6.8s
[CV] svc__gamma=1.0, svc__C=0.1 ......................................
[CV] svc__gamma=1.0, svc__C=0.1 ......................................
[CV] ............................. svc__gamma=0.1, svc__C=0.1 -   6.9s
[CV] svc__gamma=1.0, svc__C=0.1 ......................................
[CV] ............................. svc__gamma=1.0, svc__C=0.1 -   6.9s
[CV] svc__gamma=10.0, svc__C=0.1 .....................................
[CV] ............................. svc__gamma=0.1, svc__C=0.1 -   7.1s
[CV] svc__gamma=10.0, svc__C=0.1 .....................................
[CV] ............................. svc__gamma=1.0, svc__C=0.1 -   7.1s
[CV] svc__gamma=10.0, svc__C=0.1 .....................................
[CV] ............................. svc__gamma=1.0, svc__C=0.1 -   6.8s
[CV] svc__gamma=0.01, svc__C=1.0 .....................................
[CV] ............................ svc__gamma=10.0, svc__C=0.1 -   6.7s
[CV] svc__gamma=0.01, svc__C=1.0 .....................................
[CV] ............................ svc__gamma=10.0, svc__C=0.1 -   6.8s
[CV] svc__gamma=0.01, svc__C=1.0 .....................................
[CV] ............................ svc__gamma=10.0, svc__C=0.1 -   6.9s
[CV] svc__gamma=0.1, svc__C=1.0 ......................................
[CV] ............................ svc__gamma=0.01, svc__C=1.0 -   6.8s
[CV] svc__gamma=0.1, svc__C=1.0 ......................................
[CV] ............................ svc__gamma=0.01, svc__C=1.0 -   6.9s
[CV] svc__gamma=0.1, svc__C=1.0 ......................................
[CV] ............................. svc__gamma=0.1, svc__C=1.0 -   6.8s
[CV] svc__gamma=1.0, svc__C=1.0 ......................................
[CV] ............................ svc__gamma=0.01, svc__C=1.0 -   7.0s
[CV] svc__gamma=1.0, svc__C=1.0 ......................................
[CV] ............................. svc__gamma=0.1, svc__C=1.0 -   6.8s
[CV] svc__gamma=1.0, svc__C=1.0 ......................................
[CV] ............................. svc__gamma=0.1, svc__C=1.0 -   6.9s
[CV] svc__gamma=10.0, svc__C=1.0 .....................................
[CV] ............................. svc__gamma=1.0, svc__C=1.0 -   6.9s
[CV] svc__gamma=10.0, svc__C=1.0 .....................................
[CV] ............................. svc__gamma=1.0, svc__C=1.0 -   6.9s
[CV] svc__gamma=10.0, svc__C=1.0 .....................................
[CV] ............................. svc__gamma=1.0, svc__C=1.0 -   7.0s
[CV] svc__gamma=0.01, svc__C=10.0 ....................................
[CV] ............................ svc__gamma=10.0, svc__C=1.0 -   6.9s
[CV] svc__gamma=0.01, svc__C=10.0 ....................................
[CV] ............................ svc__gamma=10.0, svc__C=1.0 -   7.0s
[CV] svc__gamma=0.01, svc__C=10.0 ....................................
[CV] ............................ svc__gamma=10.0, svc__C=1.0 -   7.0s
[CV] svc__gamma=0.1, svc__C=10.0 .....................................
[CV] ........................... svc__gamma=0.01, svc__C=10.0 -   7.0s
[CV] svc__gamma=0.1, svc__C=10.0 .....................................
[CV] ........................... svc__gamma=0.01, svc__C=10.0 -   7.1s
[CV] svc__gamma=0.1, svc__C=10.0 .....................................
[CV] ............................ svc__gamma=0.1, svc__C=10.0 -   7.0s
[CV] svc__gamma=1.0, svc__C=10.0 .....................................
[CV] ........................... svc__gamma=0.01, svc__C=10.0 -   7.2s
[CV] svc__gamma=1.0, svc__C=10.0 .....................................
[CV] ............................ svc__gamma=0.1, svc__C=10.0 -   7.1s
[CV] svc__gamma=1.0, svc__C=10.0 .....................................
[CV] ............................ svc__gamma=0.1, svc__C=10.0 -   7.2s
[CV] svc__gamma=10.0, svc__C=10.0 ....................................
[CV] ............................ svc__gamma=1.0, svc__C=10.0 -   7.2s
[CV] svc__gamma=10.0, svc__C=10.0 ....................................
[CV] ............................ svc__gamma=1.0, svc__C=10.0 -   7.3s
[CV] svc__gamma=10.0, svc__C=10.0 ....................................
[CV] ............................ svc__gamma=1.0, svc__C=10.0 -   7.1s
[CV] ........................... svc__gamma=10.0, svc__C=10.0 -   6.9s
[CV] ........................... svc__gamma=10.0, svc__C=10.0 -   6.9s
[CV] ........................... svc__gamma=10.0, svc__C=10.0 -   6.9s
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:  1.1min finished
CPU times: user 8.67 s, sys: 290 ms, total: 8.96 s
Wall time: 1min 10s
Out[15]:
({'svc__C': 10.0, 'svc__gamma': 0.1}, 0.7906666666666666)

In [16]:
print gs.score(X_test,y_test)


0.8226666666666667