Importing Libraries


In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.io
import re
from math import *
from sklearn import svm

In [5]:
import nltk
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

# Initializing the PorterStemmer
ps = PorterStemmer()

# Downloading the punkt model
#nltk.download('punkt')

In [6]:
sns.set_style('whitegrid')
%matplotlib inline

Functions


In [7]:
def readFile(fileText):
    try:
        # Read The text file
        file = open(fileText, 'r')
        fileContent = file.read()
        
        # Closing stream after reading it
        file.close()
        
        # Returing file Content
        return { "status": True, "content": fileContent, "msg": '' }
    
    except FileNotFoundError as e:
        # File can't be found
        print(e)
        
        # Returning empty string
        return { "status": False, "content": " ", "msg": e } 
    
def getVocabList():

    # Reading VocabList
    file = readFile('vocab.txt')

    if(file["status"]):
        
        # Getting content of the file
        fileContent = file["content"]
        
        
        # Replacing Numbers with ' '
        numberPattern = "(\d+)"
        fileContent = re.sub(numberPattern, ' ', fileContent)

        
        # Remove any non alphanumeric characters
        nonWordPattern = '[^a-zA-Z0-9]'
        fileContent = re.sub( nonWordPattern, ' ', fileContent)

        
        # Replace multiple spaces with single space
        spacePattern = "[ ]+"
        fileContent = re.sub( spacePattern ,' ', fileContent)

        # Tokenize words
        try:
            
            # Tokenize all of the words
            words = word_tokenize(fileContent)
            return words

        # Error Occured  
        except:
            print("Some Error Occured in Stemming Process")
            return ['']
    else:
        
        # reading file has some problems
        print("We have some problems in Reading File")
        print(file["msg"])
    

    
def processEmail(fileName):

    # Read The text file
    file = readFile(fileName)
    
    if(file["status"]):
        
        # Getting content of the file
        fileContent = file["content"]
        
        # Convert string to lowercase
        fileContent = fileContent.lower()

        # Strip HTML
        htmlPattern = "<[^>]*>"
        fileContent = re.sub(htmlPattern,' ', fileContent)

        # Normalize URLs
        urlPattern = "(http|ftp|https)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?"
        fileContent = re.sub(urlPattern,'httpaddr', fileContent)

        # Normalize Numbers
        numberPattern = "(\d+)"
        fileContent = re.sub(numberPattern, 'number', fileContent)

        # Normalize Email Address
        emailPattern = r'[\w\.-]+@[\w\.-]+'
        fileContent = re.sub(emailPattern, 'emailaddr', fileContent)

        # Normalize Dollars
        dollarPattern = '[$]+'
        fileContent = re.sub(dollarPattern, 'dollar', fileContent)

        # Remove any non alphanumeric characters
        nonWordPattern = '[^a-zA-Z0-9]'
        fileContent = re.sub( nonWordPattern, ' ', fileContent)

        # Replace multiple spaces with single space
        spacePattern = "[ ]+"
        fileContent = re.sub( spacePattern ,' ', fileContent)

        # Words Stemming
        try:
            # Tokenize all of the words
            words = word_tokenize(fileContent)

            # Word Stemming
            words = [ps.stem(x) for x in words]

        except:
            print("Some Error Occured in Stemming Process")
        
        # Initialzing word_indices
        word_indices = []
        
        for w in words:  
            # Constructing Word_indices
            try:
                idx = vocab.index(w)
                word_indices.append(idx)
                
            except ValueError as e:
                # Words doesn't exist in Vobabulary
                continue
        
        return word_indices
    else:
       
        # reading file has some problems
        print("We have some problems in Reading File")
        print(file["msg"])
        
def emailFeatures(word_indices):
    # Total number of words in the dictionary
    n = 1900
    
    # creating feature vector
    matrix = np.zeros((n,1))
    
    # Mapping word_indices to feature vector
    matrix[word_indices] = 1
    
    return matrix

def findBestModel(X,y, Xval, yval):
    # Initializing the Possible values for both C and Sigma
    pValues = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]);
    
    # Creating matrix for holding the error of each model
    error = np.zeros((len(pValues) ** 2,1))
    
    # Computing model error for each permutation of the sigma and C
    for i in range(len(pValues)):
        for j in range(len(pValues)):
            # Initializing The Model
            model = svm.SVC(C=pValues[i] ,kernel= 'rbf' ,gamma= 2 * ( pValues[j] ** 2 ))
            
            # Fitting Data to The Model
            model.fit(X,y)
            
            # Computing error of the Model on the Cross Validation Dataset
            error[ i * len(pValues) + j ] = 1 - model.score(Xval, yval)
            
    # Getting the minimum value index in error matrix
    idx = np.argmin(error)
    
    # Finding C, sigma for model with minimum error
    i = np.floor(idx / len(pValues))
    j = idx - i * len(pValues)
    
    C = pValues[int(i)]
    sigma = pValues[int(j)]
    
    return { "C": C,
           "sigma": sigma }

Spam Classifier

Load Data


In [8]:
mat = scipy.io.loadmat('spamTrain.mat')
X = mat["X"][0:3400]
y = mat["y"].T[0][0:3400]

Xval = mat["X"][3400:4000]
yval = mat["y"].T[0][3400:4000]

Train The SVM


In [24]:
findBestModel(X,y,Xval,yval)


Out[24]:
{'C': 10.0, 'sigma': 0.029999999999999999}

In [9]:
# Initializing The Model
model = svm.SVC(C=10 ,kernel= 'rbf' ,gamma= 2 * ( 0.3 ** 2 ))

# Fitting Data to The Model
model.fit(X,y)


Out[9]:
SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.18, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [10]:
model.score(Xval,yval)


Out[10]:
0.80666666666666664

Find Best Model With Sklearn


In [11]:
from sklearn.grid_search import GridSearchCV

In [31]:
param_grid = { 'C' : [ 0.1, 0.4, 0.8, 2, 5, 10, 20, 40, 100, 200, 400, 1000], 'gamma' : [ 1, 0.1, 0.01, 0.001, 0.0001,]}
grid = GridSearchCV(svm.SVC(), param_grid, verbose= 3)

In [14]:
grid.fit(X,y)


Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] C=0.1, gamma=1 ..................................................
[CV] ......................... C=0.1, gamma=1, score=0.679894 -  13.9s
[CV] C=0.1, gamma=1 ..................................................
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   13.9s remaining:    0.0s
[CV] ......................... C=0.1, gamma=1, score=0.679612 -  15.2s
[CV] C=0.1, gamma=1 ..................................................
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   29.2s remaining:    0.0s
[CV] ......................... C=0.1, gamma=1, score=0.681377 -  15.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....................... C=0.1, gamma=0.1, score=0.697531 -  13.6s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....................... C=0.1, gamma=0.1, score=0.691086 -  16.9s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....................... C=0.1, gamma=0.1, score=0.687555 -  18.3s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...................... C=0.1, gamma=0.01, score=0.888889 -  12.7s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...................... C=0.1, gamma=0.01, score=0.895852 -  12.5s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...................... C=0.1, gamma=0.01, score=0.891439 -  12.6s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..................... C=0.1, gamma=0.001, score=0.683422 -  13.3s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..................... C=0.1, gamma=0.001, score=0.686673 -  13.1s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..................... C=0.1, gamma=0.001, score=0.688438 -  13.0s
[CV] C=0.1, gamma=0.0001 .............................................
[CV] .................... C=0.1, gamma=0.0001, score=0.679894 -  13.0s
[CV] C=0.1, gamma=0.0001 .............................................
[CV] .................... C=0.1, gamma=0.0001, score=0.679612 -  12.7s
[CV] C=0.1, gamma=0.0001 .............................................
[CV] .................... C=0.1, gamma=0.0001, score=0.679612 -  12.5s
[CV] C=0.4, gamma=1 ..................................................
[CV] ......................... C=0.4, gamma=1, score=0.706349 -  17.3s
[CV] C=0.4, gamma=1 ..................................................
[CV] ......................... C=0.4, gamma=1, score=0.700794 -  17.2s
[CV] C=0.4, gamma=1 ..................................................
[CV] ......................... C=0.4, gamma=1, score=0.699029 -  17.3s
[CV] C=0.4, gamma=0.1 ................................................
[CV] ....................... C=0.4, gamma=0.1, score=0.743386 -  16.7s
[CV] C=0.4, gamma=0.1 ................................................
[CV] ....................... C=0.4, gamma=0.1, score=0.740512 -  16.4s
[CV] C=0.4, gamma=0.1 ................................................
[CV] ....................... C=0.4, gamma=0.1, score=0.745808 -  16.5s
[CV] C=0.4, gamma=0.01 ...............................................
[CV] ...................... C=0.4, gamma=0.01, score=0.955026 -   8.2s
[CV] C=0.4, gamma=0.01 ...............................................
[CV] ...................... C=0.4, gamma=0.01, score=0.947926 -   8.1s
[CV] C=0.4, gamma=0.01 ...............................................
[CV] ...................... C=0.4, gamma=0.01, score=0.956752 -   8.1s
[CV] C=0.4, gamma=0.001 ..............................................
[CV] ..................... C=0.4, gamma=0.001, score=0.925044 -  10.5s
[CV] C=0.4, gamma=0.001 ..............................................
[CV] ..................... C=0.4, gamma=0.001, score=0.921447 -  10.5s
[CV] C=0.4, gamma=0.001 ..............................................
[CV] ..................... C=0.4, gamma=0.001, score=0.913504 -  11.0s
[CV] C=0.4, gamma=0.0001 .............................................
[CV] .................... C=0.4, gamma=0.0001, score=0.679894 -  12.7s
[CV] C=0.4, gamma=0.0001 .............................................
[CV] .................... C=0.4, gamma=0.0001, score=0.679612 -  12.7s
[CV] C=0.4, gamma=0.0001 .............................................
[CV] .................... C=0.4, gamma=0.0001, score=0.679612 -  12.7s
[CV] C=0.8, gamma=1 ..................................................
[CV] ......................... C=0.8, gamma=1, score=0.767196 -  18.3s
[CV] C=0.8, gamma=1 ..................................................
[CV] ......................... C=0.8, gamma=1, score=0.756399 -  18.6s
[CV] C=0.8, gamma=1 ..................................................
[CV] ......................... C=0.8, gamma=1, score=0.752868 -  18.4s
[CV] C=0.8, gamma=0.1 ................................................
[CV] ....................... C=0.8, gamma=0.1, score=0.797178 -  18.2s
[CV] C=0.8, gamma=0.1 ................................................
[CV] ....................... C=0.8, gamma=0.1, score=0.787290 -  17.7s
[CV] C=0.8, gamma=0.1 ................................................
[CV] ....................... C=0.8, gamma=0.1, score=0.796117 -  16.8s
[CV] C=0.8, gamma=0.01 ...............................................
[CV] ...................... C=0.8, gamma=0.01, score=0.965608 -   7.4s
[CV] C=0.8, gamma=0.01 ...............................................
[CV] ...................... C=0.8, gamma=0.01, score=0.962048 -   7.4s
[CV] C=0.8, gamma=0.01 ...............................................
[CV] ...................... C=0.8, gamma=0.01, score=0.969109 -   7.6s
[CV] C=0.8, gamma=0.001 ..............................................
[CV] ..................... C=0.8, gamma=0.001, score=0.940917 -   8.3s
[CV] C=0.8, gamma=0.001 ..............................................
[CV] ..................... C=0.8, gamma=0.001, score=0.930274 -   8.2s
[CV] C=0.8, gamma=0.001 ..............................................
[CV] ..................... C=0.8, gamma=0.001, score=0.933804 -   8.4s
[CV] C=0.8, gamma=0.0001 .............................................
[CV] .................... C=0.8, gamma=0.0001, score=0.681658 -  12.1s
[CV] C=0.8, gamma=0.0001 .............................................
[CV] .................... C=0.8, gamma=0.0001, score=0.682259 -  12.1s
[CV] C=0.8, gamma=0.0001 .............................................
[CV] .................... C=0.8, gamma=0.0001, score=0.683142 -  12.1s
[CV] C=2, gamma=1 ....................................................
[CV] ........................... C=2, gamma=1, score=0.780423 -  17.2s
[CV] C=2, gamma=1 ....................................................
[CV] ........................... C=2, gamma=1, score=0.764342 -  17.3s
[CV] C=2, gamma=1 ....................................................
[CV] ........................... C=2, gamma=1, score=0.764342 -  16.8s
[CV] C=2, gamma=0.1 ..................................................
[CV] ......................... C=2, gamma=0.1, score=0.809524 -  16.7s
[CV] C=2, gamma=0.1 ..................................................
[CV] ......................... C=2, gamma=0.1, score=0.810238 -  18.0s
[CV] C=2, gamma=0.1 ..................................................
[CV] ......................... C=2, gamma=0.1, score=0.813769 -  18.3s
[CV] C=2, gamma=0.01 .................................................
[CV] ........................ C=2, gamma=0.01, score=0.977072 -   7.7s
[CV] C=2, gamma=0.01 .................................................
[CV] ........................ C=2, gamma=0.01, score=0.976169 -   7.5s
[CV] C=2, gamma=0.01 .................................................
[CV] ........................ C=2, gamma=0.01, score=0.974404 -   7.5s
[CV] C=2, gamma=0.001 ................................................
[CV] ....................... C=2, gamma=0.001, score=0.966490 -   6.8s
[CV] C=2, gamma=0.001 ................................................
[CV] ....................... C=2, gamma=0.001, score=0.955869 -   6.7s
[CV] C=2, gamma=0.001 ................................................
[CV] ....................... C=2, gamma=0.001, score=0.965578 -   6.7s
[CV] C=2, gamma=0.0001 ...............................................
[CV] ...................... C=2, gamma=0.0001, score=0.864198 -  12.6s
[CV] C=2, gamma=0.0001 ...............................................
[CV] ...................... C=2, gamma=0.0001, score=0.864960 -  12.6s
[CV] C=2, gamma=0.0001 ...............................................
[CV] ...................... C=2, gamma=0.0001, score=0.857899 -  12.7s
[CV] C=5, gamma=1 ....................................................
[CV] ........................... C=5, gamma=1, score=0.780423 -  18.9s
[CV] C=5, gamma=1 ....................................................
[CV] ........................... C=5, gamma=1, score=0.764342 -  18.9s
[CV] C=5, gamma=1 ....................................................
[CV] ........................... C=5, gamma=1, score=0.764342 -  17.7s
[CV] C=5, gamma=0.1 ..................................................
[CV] ......................... C=5, gamma=0.1, score=0.809524 -  17.4s
[CV] C=5, gamma=0.1 ..................................................
[CV] ......................... C=5, gamma=0.1, score=0.810238 -  17.1s
[CV] C=5, gamma=0.1 ..................................................
[CV] ......................... C=5, gamma=0.1, score=0.812886 -  16.5s
[CV] C=5, gamma=0.01 .................................................
[CV] ........................ C=5, gamma=0.01, score=0.975309 -   6.5s
[CV] C=5, gamma=0.01 .................................................
[CV] ........................ C=5, gamma=0.01, score=0.976169 -   6.4s
[CV] C=5, gamma=0.01 .................................................
[CV] ........................ C=5, gamma=0.01, score=0.977935 -   6.6s
[CV] C=5, gamma=0.001 ................................................
[CV] ....................... C=5, gamma=0.001, score=0.966490 -   5.0s
[CV] C=5, gamma=0.001 ................................................
[CV] ....................... C=5, gamma=0.001, score=0.965578 -   4.7s
[CV] C=5, gamma=0.001 ................................................
[CV] ....................... C=5, gamma=0.001, score=0.974404 -   4.8s
[CV] C=5, gamma=0.0001 ...............................................
[CV] ...................... C=5, gamma=0.0001, score=0.932981 -   9.1s
[CV] C=5, gamma=0.0001 ...............................................
[CV] ...................... C=5, gamma=0.0001, score=0.922330 -   9.0s
[CV] C=5, gamma=0.0001 ...............................................
[CV] ...................... C=5, gamma=0.0001, score=0.924095 -   9.1s
[CV] C=10, gamma=1 ...................................................
[CV] .......................... C=10, gamma=1, score=0.780423 -  17.4s
[CV] C=10, gamma=1 ...................................................
[CV] .......................... C=10, gamma=1, score=0.764342 -  17.4s
[CV] C=10, gamma=1 ...................................................
[CV] .......................... C=10, gamma=1, score=0.764342 -  16.8s
[CV] C=10, gamma=0.1 .................................................
[CV] ........................ C=10, gamma=0.1, score=0.810406 -  16.7s
[CV] C=10, gamma=0.1 .................................................
[CV] ........................ C=10, gamma=0.1, score=0.810238 -  17.1s
[CV] C=10, gamma=0.1 .................................................
[CV] ........................ C=10, gamma=0.1, score=0.813769 -  16.7s
[CV] C=10, gamma=0.01 ................................................
[CV] ....................... C=10, gamma=0.01, score=0.974427 -   6.4s
[CV] C=10, gamma=0.01 ................................................
[CV] ....................... C=10, gamma=0.01, score=0.974404 -   6.1s
[CV] C=10, gamma=0.01 ................................................
[CV] ....................... C=10, gamma=0.01, score=0.976169 -   6.3s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...................... C=10, gamma=0.001, score=0.970018 -   4.2s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...................... C=10, gamma=0.001, score=0.970874 -   4.0s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...................... C=10, gamma=0.001, score=0.976169 -   4.4s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..................... C=10, gamma=0.0001, score=0.944444 -   7.4s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..................... C=10, gamma=0.0001, score=0.932039 -   7.2s
[CV] C=10, gamma=0.0001 ..............................................
[CV] ..................... C=10, gamma=0.0001, score=0.939100 -   7.7s
[CV] C=20, gamma=1 ...................................................
[CV] .......................... C=20, gamma=1, score=0.780423 -  18.0s
[CV] C=20, gamma=1 ...................................................
[CV] .......................... C=20, gamma=1, score=0.764342 -  18.3s
[CV] C=20, gamma=1 ...................................................
[CV] .......................... C=20, gamma=1, score=0.764342 -  17.7s
[CV] C=20, gamma=0.1 .................................................
[CV] ........................ C=20, gamma=0.1, score=0.811287 -  17.1s
[CV] C=20, gamma=0.1 .................................................
[CV] ........................ C=20, gamma=0.1, score=0.810238 -  17.6s
[CV] C=20, gamma=0.1 .................................................
[CV] ........................ C=20, gamma=0.1, score=0.813769 -  17.0s
[CV] C=20, gamma=0.01 ................................................
[CV] ....................... C=20, gamma=0.01, score=0.973545 -   6.2s
[CV] C=20, gamma=0.01 ................................................
[CV] ....................... C=20, gamma=0.01, score=0.977052 -   5.7s
[CV] C=20, gamma=0.01 ................................................
[CV] ....................... C=20, gamma=0.01, score=0.972639 -   6.1s
[CV] C=20, gamma=0.001 ...............................................
[CV] ...................... C=20, gamma=0.001, score=0.976190 -   3.7s
[CV] C=20, gamma=0.001 ...............................................
[CV] ...................... C=20, gamma=0.001, score=0.969109 -   3.6s
[CV] C=20, gamma=0.001 ...............................................
[CV] ...................... C=20, gamma=0.001, score=0.980583 -   3.8s
[CV] C=20, gamma=0.0001 ..............................................
[CV] ..................... C=20, gamma=0.0001, score=0.966490 -   5.9s
[CV] C=20, gamma=0.0001 ..............................................
[CV] ..................... C=20, gamma=0.0001, score=0.954987 -   5.8s
[CV] C=20, gamma=0.0001 ..............................................
[CV] ..................... C=20, gamma=0.0001, score=0.965578 -   5.9s
[CV] C=40, gamma=1 ...................................................
[CV] .......................... C=40, gamma=1, score=0.780423 -  17.0s
[CV] C=40, gamma=1 ...................................................
[CV] .......................... C=40, gamma=1, score=0.764342 -  17.1s
[CV] C=40, gamma=1 ...................................................
[CV] .......................... C=40, gamma=1, score=0.764342 -  16.9s
[CV] C=40, gamma=0.1 .................................................
[CV] ........................ C=40, gamma=0.1, score=0.811287 -  16.8s
[CV] C=40, gamma=0.1 .................................................
[CV] ........................ C=40, gamma=0.1, score=0.810238 -  16.7s
[CV] C=40, gamma=0.1 .................................................
[CV] ........................ C=40, gamma=0.1, score=0.813769 -  16.5s
[CV] C=40, gamma=0.01 ................................................
[CV] ....................... C=40, gamma=0.01, score=0.971781 -   5.9s
[CV] C=40, gamma=0.01 ................................................
[CV] ....................... C=40, gamma=0.01, score=0.976169 -   5.5s
[CV] C=40, gamma=0.01 ................................................
[CV] ....................... C=40, gamma=0.01, score=0.969109 -   5.8s
[CV] C=40, gamma=0.001 ...............................................
[CV] ...................... C=40, gamma=0.001, score=0.976190 -   3.4s
[CV] C=40, gamma=0.001 ...............................................
[CV] ...................... C=40, gamma=0.001, score=0.969991 -   3.5s
[CV] C=40, gamma=0.001 ...............................................
[CV] ...................... C=40, gamma=0.001, score=0.981465 -   3.5s
[CV] C=40, gamma=0.0001 ..............................................
[CV] ..................... C=40, gamma=0.0001, score=0.965608 -   4.9s
[CV] C=40, gamma=0.0001 ..............................................
[CV] ..................... C=40, gamma=0.0001, score=0.963813 -   4.7s
[CV] C=40, gamma=0.0001 ..............................................
[CV] ..................... C=40, gamma=0.0001, score=0.972639 -   4.8s
[CV] C=100, gamma=1 ..................................................
[CV] ......................... C=100, gamma=1, score=0.780423 -  16.9s
[CV] C=100, gamma=1 ..................................................
[CV] ......................... C=100, gamma=1, score=0.764342 -  17.0s
[CV] C=100, gamma=1 ..................................................
[CV] ......................... C=100, gamma=1, score=0.764342 -  16.8s
[CV] C=100, gamma=0.1 ................................................
[CV] ....................... C=100, gamma=0.1, score=0.811287 -  16.6s
[CV] C=100, gamma=0.1 ................................................
[CV] ....................... C=100, gamma=0.1, score=0.810238 -  17.3s
[CV] C=100, gamma=0.1 ................................................
[CV] ....................... C=100, gamma=0.1, score=0.813769 -  16.6s
[CV] C=100, gamma=0.01 ...............................................
[CV] ...................... C=100, gamma=0.01, score=0.969136 -   5.9s
[CV] C=100, gamma=0.01 ...............................................
[CV] ...................... C=100, gamma=0.01, score=0.976169 -   5.5s
[CV] C=100, gamma=0.01 ...............................................
[CV] ...................... C=100, gamma=0.01, score=0.967343 -   5.7s
[CV] C=100, gamma=0.001 ..............................................
[CV] ..................... C=100, gamma=0.001, score=0.974427 -   3.3s
[CV] C=100, gamma=0.001 ..............................................
[CV] ..................... C=100, gamma=0.001, score=0.968226 -   3.2s
[CV] C=100, gamma=0.001 ..............................................
[CV] ..................... C=100, gamma=0.001, score=0.978817 -   3.3s
[CV] C=100, gamma=0.0001 .............................................
[CV] .................... C=100, gamma=0.0001, score=0.970018 -   4.0s
[CV] C=100, gamma=0.0001 .............................................
[CV] .................... C=100, gamma=0.0001, score=0.969109 -   3.9s
[CV] C=100, gamma=0.0001 .............................................
[CV] .................... C=100, gamma=0.0001, score=0.975287 -   4.1s
[CV] C=200, gamma=1 ..................................................
[CV] ......................... C=200, gamma=1, score=0.780423 -  17.0s
[CV] C=200, gamma=1 ..................................................
[CV] ......................... C=200, gamma=1, score=0.764342 -  17.0s
[CV] C=200, gamma=1 ..................................................
[CV] ......................... C=200, gamma=1, score=0.764342 -  17.0s
[CV] C=200, gamma=0.1 ................................................
[CV] ....................... C=200, gamma=0.1, score=0.811287 -  16.6s
[CV] C=200, gamma=0.1 ................................................
[CV] ....................... C=200, gamma=0.1, score=0.810238 -  16.7s
[CV] C=200, gamma=0.1 ................................................
[CV] ....................... C=200, gamma=0.1, score=0.813769 -  16.6s
[CV] C=200, gamma=0.01 ...............................................
[CV] ...................... C=200, gamma=0.01, score=0.968254 -   5.8s
[CV] C=200, gamma=0.01 ...............................................
[CV] ...................... C=200, gamma=0.01, score=0.976169 -   5.5s
[CV] C=200, gamma=0.01 ...............................................
[CV] ...................... C=200, gamma=0.01, score=0.970874 -   5.6s
[CV] C=200, gamma=0.001 ..............................................
[CV] ..................... C=200, gamma=0.001, score=0.970899 -   3.3s
[CV] C=200, gamma=0.001 ..............................................
[CV] ..................... C=200, gamma=0.001, score=0.968226 -   3.1s
[CV] C=200, gamma=0.001 ..............................................
[CV] ..................... C=200, gamma=0.001, score=0.977052 -   3.2s
[CV] C=200, gamma=0.0001 .............................................
[CV] .................... C=200, gamma=0.0001, score=0.971781 -   3.6s
[CV] C=200, gamma=0.0001 .............................................
[CV] .................... C=200, gamma=0.0001, score=0.969991 -   3.5s
[CV] C=200, gamma=0.0001 .............................................
[CV] .................... C=200, gamma=0.0001, score=0.976169 -   3.8s
[CV] C=400, gamma=1 ..................................................
[CV] ......................... C=400, gamma=1, score=0.780423 -  18.5s
[CV] C=400, gamma=1 ..................................................
[CV] ......................... C=400, gamma=1, score=0.764342 -  17.9s
[CV] C=400, gamma=1 ..................................................
[CV] ......................... C=400, gamma=1, score=0.764342 -  17.7s
[CV] C=400, gamma=0.1 ................................................
[CV] ....................... C=400, gamma=0.1, score=0.811287 -  17.6s
[CV] C=400, gamma=0.1 ................................................
[CV] ....................... C=400, gamma=0.1, score=0.810238 -  18.8s
[CV] C=400, gamma=0.1 ................................................
[CV] ....................... C=400, gamma=0.1, score=0.813769 -  17.6s
[CV] C=400, gamma=0.01 ...............................................
[CV] ...................... C=400, gamma=0.01, score=0.959436 -   5.4s
[CV] C=400, gamma=0.01 ...............................................
[CV] ...................... C=400, gamma=0.01, score=0.976169 -   5.7s
[CV] C=400, gamma=0.01 ...............................................
[CV] ...................... C=400, gamma=0.01, score=0.970874 -   5.2s
[CV] C=400, gamma=0.001 ..............................................
[CV] ..................... C=400, gamma=0.001, score=0.969136 -   3.4s
[CV] C=400, gamma=0.001 ..............................................
[CV] ..................... C=400, gamma=0.001, score=0.964695 -   2.9s
[CV] C=400, gamma=0.001 ..............................................
[CV] ..................... C=400, gamma=0.001, score=0.969109 -   3.1s
[CV] C=400, gamma=0.0001 .............................................
[CV] .................... C=400, gamma=0.0001, score=0.975309 -   3.3s
[CV] C=400, gamma=0.0001 .............................................
[CV] .................... C=400, gamma=0.0001, score=0.969991 -   3.4s
[CV] C=400, gamma=0.0001 .............................................
[CV] .................... C=400, gamma=0.0001, score=0.977935 -   3.5s
[CV] C=1000, gamma=1 .................................................
[CV] ........................ C=1000, gamma=1, score=0.780423 -  17.8s
[CV] C=1000, gamma=1 .................................................
[CV] ........................ C=1000, gamma=1, score=0.764342 -  18.1s
[CV] C=1000, gamma=1 .................................................
[CV] ........................ C=1000, gamma=1, score=0.764342 -  16.9s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ...................... C=1000, gamma=0.1, score=0.811287 -  16.7s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ...................... C=1000, gamma=0.1, score=0.810238 -  17.0s
[CV] C=1000, gamma=0.1 ...............................................
[CV] ...................... C=1000, gamma=0.1, score=0.813769 -  16.6s
[CV] C=1000, gamma=0.01 ..............................................
[CV] ..................... C=1000, gamma=0.01, score=0.956790 -   5.1s
[CV] C=1000, gamma=0.01 ..............................................
[CV] ..................... C=1000, gamma=0.01, score=0.976169 -   5.5s
[CV] C=1000, gamma=0.01 ..............................................
[CV] ..................... C=1000, gamma=0.01, score=0.968226 -   4.8s
[CV] C=1000, gamma=0.001 .............................................
[CV] .................... C=1000, gamma=0.001, score=0.964727 -   3.1s
[CV] C=1000, gamma=0.001 .............................................
[CV] .................... C=1000, gamma=0.001, score=0.966461 -   2.8s
[CV] C=1000, gamma=0.001 .............................................
[CV] .................... C=1000, gamma=0.001, score=0.968226 -   3.1s
[CV] C=1000, gamma=0.0001 ............................................
[CV] ................... C=1000, gamma=0.0001, score=0.970899 -   3.1s
[CV] C=1000, gamma=0.0001 ............................................
[CV] ................... C=1000, gamma=0.0001, score=0.966461 -   3.1s
[CV] C=1000, gamma=0.0001 ............................................
[CV] ................... C=1000, gamma=0.0001, score=0.977052 -   3.2s
[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed: 33.0min finished
Out[14]:
GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.1, 0.4, 0.8, 2, 5, 10, 20, 40, 100, 200, 400, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001]},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=3)

In [29]:
model = svm.SVC(C=5, gamma=0.01, kernel='rbf')
model.fit(X,y)


Out[29]:
SVC(C=5, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [30]:
model.score(Xval,yval)


Out[30]:
0.97999999999999998

In [ ]: