In [ ]:
# Load the basic libraries...
import scipy as sp
import pandas as pd
import sklearn
%pylab inline
Glass Identification Data Set: chemical analysis of 214 glass samples. For the purpose of forensic analysis, we wish to classify these into six classes, each of which corresponds to one glass type.
In [ ]:
D = pd.read_csv("/home/jan/Downloads/glass.data"); D
In [ ]:
data = D.as_matrix()
In [ ]:
glass_X, glass_y = data[:,0:10], data[:,10]
In [ ]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
In [ ]:
svc = SVC()
svc.fit(glass_X, glass_y)
In [ ]:
svc.score(glass_X, glass_y)
In [ ]:
glass_X, glass_y = data[:,1:10], data[:,10]
In [ ]:
svc = SVC()
svc.fit(glass_X,glass_y)
In [ ]:
svc.score(glass_X,glass_y)
In [ ]:
from sklearn import cross_validation
X_train, X_test, y_train, y_test = cross_validation.train_test_split(glass_X,glass_y,train_size=2.0/3,random_state=42)
print X_train.shape, X_test.shape
In [ ]:
svc.fit(X_train,y_train)
In [ ]:
svc.score(X_test,y_test)
In [ ]:
from sklearn.grid_search import GridSearchCV
param_grid = [{'C': [2**x for x in range(-5,16)], 'gamma': [2**x for x in range(-15,4)]}]
model = GridSearchCV(SVC(), param_grid)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model.fit(X_train_scaled,y_train)
In [ ]:
model.best_estimator_
In [ ]:
from sklearn.metrics import accuracy_score
model.score(X_train_scaled,y_train)
In [ ]:
model.score(X_test_scaled,y_test)
In [ ]:
from sklearn.learning_curve import learning_curve
In [ ]:
(_,_,on_test) = learning_curve(model,X_train,y_train); on_test
In [ ]:
avgs = apply_along_axis(mean,1,on_test)
In [ ]:
plot(avgs)