In [175]:
# Standard imports for data analysis packages in Python
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.lda import LDA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.grid_search import GridSearchCV
# This enables inline Plots
%matplotlib inline

# Limit rows displayed in notebook
pd.set_option('display.max_rows', 10)
pd.set_option('display.precision', 2)

In [176]:
!pwd


/Users/megara/DAT_SF_11/homeworks/hw2

In [189]:
#I pull in the data twice. One set I filled using normal distributions for continous values and histograms/random integers 
# for discrete values which I replaced with strings to replicate the distribution. One set I just removed all ? data.
col_names=[ "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15","A16"]
credit = pd.read_csv('.//data/crx.data', names=col_names)
credit1 = pd.read_csv('.//data/crx.data', names=col_names, na_values='?')

In [192]:
credit1.head()


Out[192]:
A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13 A14 A15 A16
0 b 30.8 0.0 u g w v 1.2 t t 1 f g 202 0 +
1 a 58.7 4.5 u g q h 3.0 t t 6 f g 43 560 +
2 a 24.5 0.5 u g q h 1.5 t f 0 f g 280 824 +
3 b 27.8 1.5 u g w v 3.8 t t 5 t g 100 3 +
4 b 20.2 5.6 u g w v 1.7 t f 0 f s 120 0 +

In [191]:
credit1=credit1.dropna()

In [85]:
credit.head()


Out[85]:
A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13 A14 A15 A16 Approval result
0 b 30.8 0.0 u g w v 1.2 t t 1 f g 00202 0 1 1 None
1 a 58.7 4.5 u g q h 3.0 t t 6 f g 00043 560 1 1 None
2 a 24.5 0.5 u g q h 1.5 t f 0 f g 00280 824 1 1 None
3 b 27.8 1.5 u g w v 3.8 t t 5 t g 00100 3 1 1 None
4 b 20.2 5.6 u g w v 1.7 t f 0 f s 00120 0 1 1 None

In [9]:
credit.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 690 entries, 0 to 689
Data columns (total 16 columns):
A1     690 non-null object
A2     690 non-null object
A3     690 non-null float64
A4     690 non-null object
A5     690 non-null object
A6     690 non-null object
A7     690 non-null object
A8     690 non-null float64
A9     690 non-null object
A10    690 non-null object
A11    690 non-null int64
A12    690 non-null object
A13    690 non-null object
A14    690 non-null object
A15    690 non-null int64
A16    690 non-null object
dtypes: float64(2), int64(2), object(12)
memory usage: 91.6+ KB

In [10]:
credit['A1'].unique()


Out[10]:
array(['b', 'a', '?'], dtype=object)

In [11]:
credit['A1'].value_counts().plot(kind='bar');



In [12]:
credit['A1'].replace(to_replace='?', value=np.random.randint(0,2),inplace=True)

In [13]:
credit['A1'].replace(to_replace=0, value='a',inplace=True)

In [14]:
credit['A1'].replace(to_replace=1, value='b',inplace=True)
credit['A1'].replace(to_replace=2, value='b',inplace=True)

In [15]:
credit['A1'].unique()


Out[15]:
array(['b', 'a'], dtype=object)

In [16]:
credit['A2'].replace(to_replace='?', value= 0,inplace=True)

In [17]:
credit['A2'].unique()
credit['A2']=list(map(float, credit['A2']))
#credit['A2'].replace(to_replace='?', value=credit['A2'].mean(),inplace=True)

In [18]:
listremove=credit['A2']

In [19]:
data=[]
for i in listremove:
    if i != '0':
        data.append(i)
mean = sum(data)/len(data)
std=np.std(data)

In [20]:
credit['A2'].replace(to_replace=0, value=np.random.normal(mean,std),inplace=True)

In [21]:
credit['A4'].unique()


Out[21]:
array(['u', 'y', '?', 'l'], dtype=object)

In [21]:


In [22]:
credit['A4'].value_counts().plot(kind='bar');



In [23]:
credit['A4'].replace(to_replace='?', value=np.random.randint(0,2),inplace=True)

In [24]:
credit['A4'].replace(to_replace=0, value='u' ,inplace=True)
credit['A4'].replace(to_replace=1, value='u' ,inplace=True)
credit['A4'].replace(to_replace=2, value='y' ,inplace=True)
credit['A4'].replace(to_replace='l', value='y' ,inplace=True)

In [25]:
credit['A5'].unique()


Out[25]:
array(['g', 'p', '?', 'gg'], dtype=object)

In [26]:
credit['A5'].value_counts().plot(kind='bar');



In [27]:
credit['A5'].replace(to_replace='?', value=np.random.randint(0,2),inplace=True)

In [28]:
credit['A5'].replace(to_replace=0, value='g' ,inplace=True)
credit['A5'].replace(to_replace=1, value='g' ,inplace=True)
credit['A5'].replace(to_replace=2, value='p' ,inplace=True)

In [29]:
credit['A6'].unique()


Out[29]:
array(['w', 'q', 'm', 'r', 'cc', 'k', 'c', 'd', 'x', 'i', 'e', 'aa', 'ff',
       'j', '?'], dtype=object)

In [30]:
credit['A6'].value_counts().plot(kind='bar');



In [31]:
credit['A6'].replace(to_replace='?', value=np.random.randint(0,2),inplace=True)

In [32]:
credit['A6'].replace(to_replace=0, value='c' ,inplace=True)
credit['A6'].replace(to_replace=1, value='q' ,inplace=True)
credit['A6'].replace(to_replace=2, value='w' ,inplace=True)

In [33]:
credit['A6'].value_counts().plot(kind='bar');



In [34]:
credit['A7'].unique()


Out[34]:
array(['v', 'h', 'bb', 'ff', 'j', 'z', '?', 'o', 'dd', 'n'], dtype=object)

In [35]:
credit['A7'].value_counts().plot(kind='bar');



In [36]:
credit['A7'].replace(to_replace='?', value=np.random.randint(0,7),inplace=True)

In [37]:
credit['A7'].replace(to_replace=0, value='v' ,inplace=True)
credit['A7'].replace(to_replace=1, value='v' ,inplace=True)
credit['A7'].replace(to_replace=2, value='v' ,inplace=True)
credit['A7'].replace(to_replace=3, value='h' ,inplace=True)
credit['A7'].replace(to_replace=4, value='h' ,inplace=True)
credit['A7'].replace(to_replace=5, value='bb' ,inplace=True)
credit['A7'].replace(to_replace=6, value='ff' ,inplace=True)
credit['A7'].replace(to_replace=7, value='z' ,inplace=True)

In [38]:
credit['A8'].unique()
#no missing values


Out[38]:
array([  1.25 ,   3.04 ,   1.5  ,   3.75 ,   1.71 ,   2.5  ,   6.5  ,
         0.04 ,   3.96 ,   3.165,   2.165,   4.335,   1.   ,   5.   ,
         0.25 ,   0.96 ,   3.17 ,   0.665,   0.75 ,   0.835,   7.875,
         3.085,   0.5  ,   5.165,  15.   ,   7.   ,   5.04 ,   7.96 ,
         7.585,   0.415,   2.   ,   1.835,  14.415,   4.5  ,   5.335,
         8.625,  28.5  ,   2.625,   0.125,   6.04 ,   3.5  ,   0.165,
         0.875,   1.75 ,   0.   ,   7.415,   0.085,   5.75 ,   6.   ,
         3.   ,   1.585,   4.29 ,   1.54 ,   1.46 ,   1.625,  12.5  ,
        13.5  ,  10.75 ,   0.375,   0.585,   0.455,   4.   ,   8.5  ,
         9.46 ,   2.25 ,  10.   ,   0.795,   1.375,   1.29 ,  11.5  ,
         6.29 ,  14.   ,   0.335,   1.21 ,   7.375,   7.5  ,   3.25 ,
        13.   ,   5.5  ,   4.25 ,   0.625,   5.085,   2.75 ,   2.375,
         8.   ,   1.085,   2.54 ,   4.165,   1.665,  11.   ,   9.   ,
         1.335,   1.415,   1.96 ,   2.585,   5.125,  15.5  ,   0.71 ,
         5.665,  18.   ,   5.25 ,   8.665,   2.29 ,  20.   ,   2.46 ,
        13.875,   2.085,   4.58 ,   2.71 ,   2.04 ,   0.29 ,   4.75 ,
         0.46 ,   0.21 ,   0.54 ,   3.335,   2.335,   1.165,   2.415,
         2.79 ,   4.625,   1.04 ,   6.75 ,   1.875,  16.   ,  12.75 ,
         5.375,   2.125,  17.5  ,   3.125,   0.79 ,   8.29 ])

In [39]:
credit['A9'].unique()


Out[39]:
array(['t', 'f'], dtype=object)

In [40]:
credit['A10'].unique()


Out[40]:
array(['t', 'f'], dtype=object)

In [41]:
credit['A11'].unique()


Out[41]:
array([ 1,  6,  0,  5,  7, 10,  3, 17,  2,  9,  8, 15, 11, 12, 40, 23,  4,
       20, 67, 14, 16, 13, 19])

In [42]:
credit['A12'].unique()


Out[42]:
array(['f', 't'], dtype=object)

In [43]:
credit['A13'].unique()


Out[43]:
array(['g', 's', 'p'], dtype=object)

In [44]:
credit['A14'].unique()


Out[44]:
array(['00202', '00043', '00280', '00100', '00120', '00360', '00164',
       '00080', '00180', '00052', '00128', '00260', '00000', '00320',
       '00396', '00096', '00200', '00300', '00145', '00500', '00168',
       '00434', '00583', '00030', '00240', '00070', '00455', '00311',
       '00216', '00491', '00400', '00239', '00160', '00711', '00250',
       '00520', '00515', '00420', '?', '00980', '00443', '00140', '00094',
       '00368', '00288', '00928', '00188', '00112', '00171', '00268',
       '00167', '00075', '00152', '00176', '00329', '00212', '00410',
       '00274', '00375', '00408', '00350', '00204', '00040', '00181',
       '00399', '00440', '00093', '00060', '00395', '00393', '00021',
       '00029', '00102', '00431', '00370', '00024', '00020', '00129',
       '00510', '00195', '00144', '00380', '00049', '00050', '00381',
       '00150', '00117', '00056', '00211', '00230', '00156', '00022',
       '00228', '00519', '00253', '00487', '00220', '00088', '00073',
       '00121', '00470', '00136', '00132', '00292', '00154', '00272',
       '00340', '00108', '00720', '00450', '00232', '00170', '01160',
       '00411', '00460', '00348', '00480', '00640', '00372', '00276',
       '00221', '00352', '00141', '00178', '00600', '00550', '02000',
       '00225', '00210', '00110', '00356', '00045', '00062', '00092',
       '00174', '00017', '00086', '00454', '00254', '00028', '00263',
       '00333', '00312', '00290', '00371', '00099', '00252', '00760',
       '00560', '00130', '00523', '00680', '00163', '00208', '00383',
       '00330', '00422', '00840', '00432', '00032', '00186', '00303',
       '00349', '00224', '00369', '00076', '00231', '00309', '00416',
       '00465', '00256'], dtype=object)

In [45]:
credit['A15'].unique()


Out[45]:
array([     0,    560,    824,      3,  31285,   1349,    314,   1442,
          200,   2690,    245,   1208,   1260,     11,  10000,   5000,
         4000,     35,    713,    551,    500,    300,    221,   2283,
          100,     15,    284,   1236,   5800,    730,    400,  50000,
          456,  15108,   2954,      2,     20,     27,    225,      1,
           38,      5,    130,    147,    210,  11202,   1332,     50,
          258,    567,   1000,   2510,    809,    610,    150,  51100,
          367,    600,    247,    375,    278,    827,   2072,    582,
         2300,   3065,   2200,      6,   1602,   2184,   3376,   2000,
         7544,  10561,    837,  11177,    639,   2028,   1065,    540,
          158,  15000,   3000,   3257,   1655,   1430,      7,    790,
          396,    678,   1187,   6590,    168,   1270,   1210,    742,
         8851,   7059,   1704,    857,   6700,   2503,   9800,    196,
           14,  26726,  18027,     99,    444,   1200,   2010,     13,
          120,     32,    722,     40,    484,    204,     98,   5552,
          105,   2803,    126,      4,     21,    173,     10,     25,
           42, 100000,    113,      8,     44,   2732,    179,     16,
         1062,    251,    228,     67,     12,    122,   4208,   1300,
          112,   1110,   1004,    286,   4500,   1212,    195,     87,
           17,    184,    140,     18,    146,     22,     55,     70,
           60,   1058,    769,   5200,     19,    316,    350,   3552,
          687,   1950,     53,     41,     33,     80,    351,   2100,
          475,    892,   4607,   2206,   5860,     28,   1391,   2279,
          591,    960,    690,    234,    800,    990,   2197,     90,
          340,    347,    327,   4071,    109,   1249,    134,   1344,
          321,    948,   2079,   2384,    458,   5298,    162,   1583,
           58,     59,   1400,   1465,   8000,   4700,   1097,   3290,
        13212,   5777,   5124,     23,   4159,    918,    768,    283,
          108,      9,     68,    587,    141,    501,    160,    390,
          154,    117,    246,    237,    364,    537,    394,    750])

In [46]:
type(credit['A15'][1])


Out[46]:
numpy.int64

In [47]:
credit['A16'].unique()


Out[47]:
array(['+', '-'], dtype=object)

In [48]:
credit['Approval']=credit['A16']

In [49]:
credit['Approval']=credit['Approval'].replace('+',1)
credit['Approval']=credit['Approval'].replace('-',0)
credit['Approval']


Out[49]:
0    1
1    1
2    1
...
687    0
688    0
689    0
Name: Approval, Length: 690, dtype: int64

In [194]:
credit1['Approval']=credit1['A16']
credit1['Approval']=credit1['Approval'].replace('+',1)
credit1['Approval']=credit1['Approval'].replace('-',0)
credit1['Approval']


Out[194]:
0    1
1    1
2    1
...
687    0
688    0
689    0
Name: Approval, Length: 653, dtype: int64

In [50]:
#Col A1 significant? Slightly
approved_by_a1 = credit.groupby('A1').Approval.agg(['sum', 'count'])
approved_by_a1['creditrating'] = approved_by_a1['sum'] / approved_by_a1['count']
approved_by_a1


Out[50]:
sum count creditrating
A1
a 101 222 0.5
b 206 468 0.4

In [51]:
counta=0
for i in credit['A16']:
    if i=='+':
        counta = counta+1
counta


Out[51]:
307

In [52]:
countb=0
for i in credit['A16']:
    if i=='-':
        countb = countb+1
countb


Out[52]:
383

In [53]:
#Col A4 significant? Yes
approved_by_a4 = credit.groupby('A4').Approval.agg(['sum', 'count'])
approved_by_a4['creditrating'] = approved_by_a4['sum'] / approved_by_a4['count']
approved_by_a4


Out[53]:
sum count creditrating
A4
u 260 525 0.5
y 47 165 0.3

In [54]:
approved_by_a5 = credit.groupby('A5').Approval.agg(['sum', 'count'])
approved_by_a5['creditrating'] = approved_by_a5['sum'] / approved_by_a5['count']
approved_by_a5


Out[54]:
sum count creditrating
A5
g 260 525 0.5
gg 2 2 1.0
p 45 163 0.3

In [55]:
approved_by_a6 = credit.groupby('A6').Approval.agg(['sum', 'count'])
approved_by_a6['creditrating'] = approved_by_a6['sum'] / approved_by_a6['count']
approved_by_a6


Out[55]:
sum count creditrating
A6
aa 19 54 0.4
c 66 146 0.5
cc 29 41 0.7
d 7 30 0.2
e 14 25 0.6
... ... ... ...
m 16 38 0.4
q 51 78 0.7
r 2 3 0.7
w 33 64 0.5
x 32 38 0.8

14 rows × 3 columns


In [ ]:
# I start fitting and scoring my data to sereral models below including logistic regression, LDA, nearest neighbor, SVC, 
#and Linear SVC. I found logistic regression to return the best score with a grid search on the parameter C. 
#My confusion matrix shows I predicted the disapproval very well and approval moderately well. 
#When I recheck my model using a dataset that threw away all of the ? data rather than filling it. My model does even better. 
#This makes me question whether the idea to fill data is a good idea or now.

In [56]:
from sklearn.cross_validation import train_test_split

In [57]:
X_data = credit[["A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A15"]]

In [58]:
X_data = pd.get_dummies(X_data)

In [119]:
y_data=credit["Approval"]

In [120]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=12, test_size=0.2)

In [195]:
X_data1 = credit1[["A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A15"]]
X_data1 = pd.get_dummies(X_data1)

In [197]:
y_data1=credit1["Approval"]

In [198]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_data1, y_data1, random_state=12, test_size=0.2)

In [121]:
from sklearn.linear_model import LogisticRegression

In [122]:
clf = LogisticRegression(C=1.5)

In [123]:
clf.fit(X_train, y_train)


Out[123]:
LogisticRegression(C=1.5, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, penalty='l2', random_state=None, tol=0.0001)

In [124]:
#looks like logistic regression is the best
clf.score(X_test, y_test)


Out[124]:
0.85507246376811596

In [155]:
param={'C':np.logspace(-3,3,100)}
lin_grid = GridSearchCV(LogisticRegression(),param)
lin_grid.fit(X_train,y_train)


Out[155]:
GridSearchCV(cv=None,
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, penalty='l2', random_state=None, tol=0.0001),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'C': array([  1.00000e-03,   1.14976e-03, ...,   8.69749e+02,   1.00000e+03])},
       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
       verbose=0)

In [156]:
lin_grid.score(X_test, y_test)


Out[156]:
0.85507246376811596

In [200]:
param={'C':np.logspace(-3,3,100)}
lin_grid1 = GridSearchCV(LogisticRegression(),param)
lin_grid1.fit(X_train1,y_train1)


Out[200]:
GridSearchCV(cv=None,
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, penalty='l2', random_state=None, tol=0.0001),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'C': array([  1.00000e-03,   1.14976e-03, ...,   8.69749e+02,   1.00000e+03])},
       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
       verbose=0)

In [201]:
lin_grid1.score(X_test1, y_test1)


Out[201]:
0.90839694656488545

In [125]:
from sklearn.metrics import confusion_matrix, classification_report

In [126]:
y_pred = clf.predict(X_test)

In [157]:
confusion_matrix(y_test, y_pred)


Out[157]:
array([[64, 13],
       [ 7, 54]])

In [166]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

print(cm)

# Show confusion matrix in a separate window
plt.matshow(cm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()


[[64 13]
 [ 7 54]]

In [128]:
print classification_report(y_test, y_pred)


             precision    recall  f1-score   support

          0       0.90      0.83      0.86        77
          1       0.81      0.89      0.84        61

avg / total       0.86      0.86      0.86       138


In [207]:
y_pred2 = lin_grid.predict(X_test)

In [208]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred2)

print(cm)

# Show confusion matrix in a separate window
plt.matshow(cm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()


[[64 13]
 [ 7 54]]

In [202]:
y_pred1 = lin_grid1.predict(X_test1)

In [204]:
# Compute confusion matrix
cm = confusion_matrix(y_test1, y_pred1)

print(cm)

# Show confusion matrix in a separate window
plt.matshow(cm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()


[[70 11]
 [ 1 49]]

In [129]:
pd.DataFrame(zip(X_data.columns, np.transpose(clf.coef_)))


Out[129]:
0 1
0 A2 [0.00421308149235]
1 A3 [0.000404238963896]
2 A8 [0.0810899397003]
3 A11 [0.143919472832]
4 A15 [0.000405937251194]
... ... ...
39 A12_f [0.0298110076537]
40 A12_t [-0.170524958022]
41 A13_g [-0.7433672102]
42 A13_p [0.907141521981]
43 A13_s [-0.304488262149]

44 rows × 2 columns


In [158]:
y_pred_df = pd.DataFrame(clf.predict_proba(X_test))
y_pred_df.rename(columns={0: 'No', 1: 'Yes'}, inplace=True)
y_pred_df['y_pred'] = y_pred
y_pred_df['y_true'] = y_test
y_pred_df.head()


Out[158]:
No Yes y_pred y_true
0 0.0 1.0 1 0
1 1.0 0.0 0 0
2 0.2 0.8 1 1
3 0.2 0.8 1 1
4 0.3 0.7 1 1

In [130]:


In [131]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train,y_train)


Out[131]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [132]:
svc.score(X_test,y_test)
#not as good as logistic regression


Out[132]:
0.71014492753623193

In [133]:
linest=LinearSVC(C=1e-3)
linest.fit(X_train, y_train)


Out[133]:
LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='l2', multi_class='ovr', penalty='l2',
     random_state=None, tol=0.0001, verbose=0)

In [134]:
#not as good as logistic regression
linest.score(X_train, y_train)


Out[134]:
0.78442028985507251

In [135]:
lda = LDA()
lda.fit(X_train, y_train)


Out[135]:
LDA(n_components=None, priors=None)

In [136]:
lda.score(X_test, y_test)
#linear discriminant analysis not quite as good as logistic regression


Out[136]:
0.84782608695652173

In [137]:
neighbor = KNeighborsClassifier(n_neighbors=1)
neighbor.fit(X_train, y_train)


Out[137]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_neighbors=1, p=2, weights='uniform')

In [138]:
neighbor.score(X_test, y_test)
#nearest neighbor not as good as logitic regression


Out[138]:
0.72463768115942029

In [139]:
X_train.shape


Out[139]:
(552, 44)

In [140]:
y_train.shape


Out[140]:
(552,)

In [141]:
param={'C':np.logspace(-3,3,10), 'gamma':np.logspace(-3,3,10)}
svc_grid = GridSearchCV(SVC(),param)
svc_grid.fit(X_train,y_train)


Out[141]:
GridSearchCV(cv=None,
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'C': array([  1.00000e-03,   4.64159e-03,   2.15443e-02,   1.00000e-01,
         4.64159e-01,   2.15443e+00,   1.00000e+01,   4.64159e+01,
         2.15443e+02,   1.00000e+03]), 'gamma': array([  1.00000e-03,   4.64159e-03,   2.15443e-02,   1.00000e-01,
         4.64159e-01,   2.15443e+00,   1.00000e+01,   4.64159e+01,
         2.15443e+02,   1.00000e+03])},
       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
       verbose=0)

In [142]:
svc_grid.score(X_test,y_test)


Out[142]:
0.76086956521739135

In [162]:
param={'C':np.logspace(-3,3,500)}
linsvc_grid = GridSearchCV(LinearSVC(),param)
linsvc_grid.fit(X_train,y_train)


Out[162]:
GridSearchCV(cv=None,
       estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='l2', multi_class='ovr', penalty='l2',
     random_state=None, tol=0.0001, verbose=0),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'C': array([  1.00000e-03,   1.02807e-03, ...,   9.72693e+02,   1.00000e+03])},
       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
       verbose=0)

In [160]:
linsvc_grid.score(X_test,y_test)


Out[160]:
0.8188405797101449

In [82]:


In [ ]: