In [1]:
%matplotlib inline
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from subprocess import check_output
print(check_output(["ls", "./input"]).decode("utf8"))
from pylab import rcParams
rcParams['figure.figsize'] = (6, 6) # setting default size of plots
In [216]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
train = pd.read_csv('./input/train.csv')
test = pd.read_csv('./input/test.csv')
X_train = train.drop(['species', 'id'], axis=1)
le = LabelEncoder().fit(train['species'])
y_train = le.transform(train['species'])
X_test = test.drop(['id'], axis=1)
test_ids = test.pop('id') #Id column for submission file
print(X_train.shape)
print(X_test.shape)
In [217]:
#from sklearn.decomposition import PCA
#pca = PCA(n_components=120)
#X_train = pca.fit_transform(X_train)
#print(X_train.shape)
In [429]:
from skimage import data, io
from PIL import Image
from skimage import measure
from skimage.feature import corner_harris, corner_subpix, corner_peaks
contours = []
corners = []
for i in range(1,991):
fname = './input/images/'+ str(i) + '.jpg'
im=Image.open(fname)
contour = measure.find_contours(im, 0.4)
contours.append(len(contour))
corner = corner_peaks(corner_harris(im), min_distance=5)
corners.append(len(corner))
print(len(contour))
train['contour'] = contours
train['corner'] = corners
#print(contour)
#print(corners)
scaler.fit(X_train)
X_train = scaler.transform(X_train)
In [437]:
from sklearn.svm import SVC, NuSVC
classifier = SVC(kernel='linear', C=0.1,coef0=1, probability=True,random_state=101)
#classifier = NuSVC(nu=0.1,gamma=10, probability=True,random_state=101)
classifier.fit(X_train, y_train)
#y_pred = classifier.predict(X_test)
y_pred = classifier.predict_proba(X_train)
#print(y_pred)
#print(classifier.n_support_)
#print(classifier.support_vectors_)
#print(classifier.support_)
In [438]:
#check log loss
from sklearn.metrics import log_loss, accuracy_score
#labels = LabelEncoder().fit(train['species'])
y_pred_prob = classifier.predict_proba(X_train)
y_pred = classifier.predict(X_train)
print("Log loss: %0.4f" % log_loss(y_train, y_pred_prob))
print("Accuracy: %0.4f" % accuracy_score(y_train, y_pred))
#best: 1.80042
#Log loss: 2.0623, 2.0455,0.1996
#Accuracy: 0.9576,0.9636
In [439]:
from sklearn import cross_validation
import numpy as np
scores = cross_validation.cross_val_score(classifier, X_train, y_train, cv=5)
print("score: ", np.mean(scores))
In [433]:
from sklearn.grid_search import GridSearchCV
params = {"kernel": ("rbf", "linear"), "C": [0.1, 0.3, 1, 3, 10, 30, 100,300]}
#params = {"kernel": ("rbf", "linear"),"nu":[0.1,0.2,0.4,0.6,0.8,0.9], "gamma": [0.1, 0.3, 1, 3, 10, 30, 100,300]}
clf = GridSearchCV(classifier, params, cv=5)
clf.fit(X_train, y_train)
print("Best parameters: " + str(clf.best_params_))
In [440]:
print(clf.best_score_)
print(clf.best_estimator_)
In [441]:
print(X_test.shape)
y_pred = classifier.predict_proba(X_test)
submission = pd.DataFrame(y_pred, index=test_ids, columns=le.classes_)
#print (submission.head(4))
submission.to_csv('submission_leaf_classification_SVC.csv')
In [108]:
# read images
from PIL import Image
import glob
from skimage import data, io
#from skimage.feature import canny
from skimage.filter import sobel
fname='./input/images/324.jpg'
im=Image.open(fname)
#edges = canny(im)
edges = sobel(im)
io.imshow(edges)
io.show()
print(edges.shape, edges.shape[0] * edges.shape[1])
import numpy as np
print(np.count_nonzero(edges))
from skimage import io
f = open(fname, 'rb')
print(f)
In [109]:
from skimage import measure
fname='./input/images/322.jpg'
im=Image.open(fname)
contours = measure.find_contours(im, 0.4)
import matplotlib.pyplot as plt
#print(contours)
fig, ax = plt.subplots()
print(len(contours))
ax.imshow(im, interpolation='nearest', cmap=plt.cm.gray)
for n, contour in enumerate(contours):
ax.plot(contour[:, 1], contour[:, 0], linewidth=2)
In [110]:
contour = []
for i in range(1,1585):
fname = './input/images/'+ str(i) + '.jpg'
im=Image.open(fname)
contours = measure.find_contours(im, 0.4)
contour.append(len(contours))
In [111]:
len(contour)
Out[111]:
In [155]:
from skimage.feature import corner_harris, corner_subpix, corner_peaks
fname = './input/images/142.jpg'
im=Image.open(fname)
contours = measure.find_contours(im, 0.4)
coords = corner_peaks(corner_harris(im), min_distance=5)
coords_subpix = corner_subpix(im, coords, window_size=13)
print (len(contours),coords.shape,coords_subpix.shape)
fig, ax = plt.subplots()
ax.imshow(im, interpolation='nearest', cmap=plt.cm.gray)
ax.plot(coords[:, 1], coords[:, 0], '+y', markersize=10)
ax.plot(coords_subpix[:, 1], coords_subpix[:, 0], '+r', markersize=15)
#ax.axis((0, 350, 350, 0))
plt.show()
In [427]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.grid_search import GridSearchCV
train = pd.read_csv('./input/train.csv')
test = pd.read_csv('./input/test.csv')
le = LabelEncoder()
le.fit(train.species)
scaler = StandardScaler()
X_train = train.drop(["id", "species"], axis=1).as_matrix()
y_train = le.transform(train.species)
X_test = test.drop(["id"], axis=1).as_matrix()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
print("Training model...")
params = {"kernel": ("rbf", "linear"), "C": [0.1, 0.3, 1, 3, 10, 30, 100]}
svm = SVC()
clf = GridSearchCV(svm, params, cv=5)
clf.fit(X_train, y_train)
print("Best parameters: " + str(clf.best_params_))
for params, mean_score, scores in clf.grid_scores_:
print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std(), params))
print(scores)
print("Predicting test set...")
results = clf.predict(scaler.transform(X_test))
r = np.zeros([len(X_test), len(le.classes_)])
for i, v in enumerate(results):
r[i, v] = 1
submit = pd.DataFrame(r, index=test.id, columns=le.classes_)
submit.to_csv('submit.csv')
In [ ]: