Example 2: Glass identification


In [ ]:
# Load the basic libraries...
import scipy as sp
import pandas as pd
import sklearn
%pylab inline

Glass Identification Data Set: chemical analysis of 214 glass samples. For the purpose of forensic analysis, we wish to classify these into six classes, each of which corresponds to one glass type.


In [ ]:
D = pd.read_csv("/home/jan/Downloads/glass.data"); D

In [ ]:
data = D.as_matrix()

In [ ]:
glass_X, glass_y = data[:,0:10], data[:,10]

In [ ]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

Take 1


In [ ]:
svc = SVC()
svc.fit(glass_X, glass_y)

In [ ]:
svc.score(glass_X, glass_y)

Take 2


In [ ]:
glass_X, glass_y = data[:,1:10], data[:,10]

In [ ]:
svc = SVC()
svc.fit(glass_X,glass_y)

In [ ]:
svc.score(glass_X,glass_y)

Take 3


In [ ]:
from sklearn import cross_validation
X_train, X_test, y_train, y_test = cross_validation.train_test_split(glass_X,glass_y,train_size=2.0/3,random_state=42)
print X_train.shape, X_test.shape

In [ ]:
svc.fit(X_train,y_train)

In [ ]:
svc.score(X_test,y_test)

Take 4


In [ ]:
from sklearn.grid_search import GridSearchCV

param_grid = [{'C': [2**x for x in range(-5,16)], 'gamma': [2**x for x in range(-15,4)]}]
model = GridSearchCV(SVC(), param_grid)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model.fit(X_train_scaled,y_train)

In [ ]:
model.best_estimator_

In [ ]:
from sklearn.metrics import accuracy_score
model.score(X_train_scaled,y_train)

In [ ]:
model.score(X_test_scaled,y_test)

In [ ]:
from sklearn.learning_curve import learning_curve

In [ ]:
(_,_,on_test) = learning_curve(model,X_train,y_train); on_test

In [ ]:
avgs = apply_along_axis(mean,1,on_test)

In [ ]:
plot(avgs)