In [ ]:
# Import the libraries
# Load the dataset
In [ ]:
import numpy as np
# Optional for plotting
import matplotlib.pyplot as plt
# Optional for notebook display
%matplotlib inline
data = np.load('data.npz')
images = data['images']
labels = data['labels']
print(images.shape)
print(images.dtype)
In [ ]:
# Image Properties
In [ ]:
# Image Properties
# Get the first image:
first_image = images[0] # Or equivalently: images[0, :, :]
print(first_image.shape)
# Get first three images:
several_images = images[:3]
print(several_images.shape)
In [ ]:
# Slicing and dicing
In [ ]:
# Slicing and dicing
cropped_image = first_image[4:20,4:20]
print(cropped_image.shape)
In [ ]:
# Getting our data in the right format
In [ ]:
# Getting our data in the right format
X = np.reshape(images, (730, -1))
print(X.shape)
In [ ]:
# Modifying arrays
In [ ]:
# Modifying arrays
increase_brightness = first_image + 30
decrease_brightness = first_image - 30
increase_contrast = first_image * 1.5
decrease_contrast = first_image * 0.5
brightness_compare = np.hstack((increase_brightness, decrease_brightness))
constrast_compare = np.hstack((increase_contrast, decrease_contrast))
plt.figure(figsize = (15, 12))
plt.title('Brightness')
plt.axis('off')
plt.imshow(brightness_compare, cmap='gray')
plt.figure(figsize = (15, 12))
plt.title('Contrast')
plt.axis('off')
plt.imshow(constrast_compare, cmap='gray')
| Tag Number | Pattern | Details |
|---|---|---|
| 0 | Rectangle | 100 bees that were the control group |
| 1 | Circle | 100 bees that were treated with caffeine |
| 2 | Blank | Single queen in the colony received this tag |
In [ ]:
# The labels to go with the images
In [ ]:
# The labels to go with the images
labels = data['labels']
print(labels.shape)
print(labels[::100])
y = labels
In [ ]:
# Split of a testing set
In [ ]:
# Split of a testing set
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=4)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
In [ ]:
# Visualise the data
In [ ]:
# Visualise the data
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
fit_trans_X = pca.fit(X).transform(X)
plt.figure(figsize = (35, 20))
plt.scatter(fit_trans_X[:, 0], fit_trans_X[:, 1], c=y, s=400)
In [ ]:
# A better transformation
In [ ]:
# A better transformation
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2)
lda_model = lda.fit(X_train, y_train)
X_trans = lda_model.transform(X_train)
plt.figure(figsize = (35, 20))
plt.scatter(X_trans[:, 0], X_trans[:, 1], c=y_train, s=400)
In [ ]:
# Now lets make some predictions
In [ ]:
# Now lets make some predictions
from sklearn import svm
clf = svm.SVC(gamma=0.0001, C=10)
clf.fit(X_trans, y_train)
In [ ]:
# How well did we do?
In [ ]:
# How well did we do?
transform_testing_set = lda.transform(X_test)
y_pred = clf.predict(transform_testing_set)
from sklearn import metrics
print (metrics.accuracy_score(y_test, y_pred))
In [ ]:
# Let's build a pipeline
In [ ]:
# Let's build a pipeline
from sklearn.pipeline import Pipeline
# Start by creating the individual steps in the pipeline
lda = LDA(n_components=20)
svc = svm.SVC(gamma=0.0001, C=10)
pipeline = Pipeline([('lda', lda),
('svc', svc)])
pipeline.fit(X_train, y_train)
pipeline.score(X_test, y_test)
In [ ]:
# Let's write an adapt for our images to the sklearn API
from sklearn.base import TransformerMixin, BaseEstimator
class CropUnwrap(TransformerMixin, BaseEstimator):
def __init__(self, crop_pixels=0):
self.crop_pixels = crop_pixels
return None
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
X_crop = X[:,
self.crop_pixels:24-self.crop_pixels,
self.crop_pixels:24-self.crop_pixels]
rows, cols, depth = X.shape
return X.reshape((rows, cols*depth))
In [ ]:
# Put it all together
In [ ]:
# Put it all together
unwrap = CropUnwrap()
lda = LDA(n_components=20)
svc = svm.SVC(gamma=0.0001)
pipeline = Pipeline([('unwrap', unwrap),
('lda', lda),
('svm', svc)])
# Now we don't need X_train, we can start with our raw images
pipeline.fit(images, labels)
In [ ]:
# Finding good parameters
from sklearn.grid_search import RandomizedSearchCV
search_range = {'lda__n_components': [5, 10, 15, 20, 30, 50],
'unwrap__crop_pixels': [0, 2, 4, 6, 8],
'svm__C': [1, 10, 100, 1000, 10e3, 10e4]}
searcher = RandomizedSearchCV(pipeline, search_range, n_iter=20)
searcher.fit(images, labels)
In [ ]:
# Testing it out
In [ ]:
# Testing it out
print(searcher.best_score_)
print(searcher.best_params_)
searcher.score(images, labels)
In [ ]:
# Solution to one:
image_train, image_test, label_train, label_test = train_test_split(images,
labels,
test_size=0.2)
searcher.fit(image_train, label_train)
searcher.score(image_test, label_test)
In [ ]:
In [ ]: