In [1]:
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import numpy as np
import os
import wandb
from wandb.keras import WandbCallback
from PIL import Image
from matplotlib import pyplot as plt
In [2]:
run = wandb.init(project="Distillation",tensorboard=True)
config = run.config
config.dropout = 0.25
config.dense_layer_nodes = 100
config.learn_rate = 0.32 #0.01
config.batch_size = 1024 #32
config.epochs = 50
In [3]:
class_names = ['airplane','automobile','bird','cat','deer',
'dog','frog','horse','ship','truck']
num_classes = len(class_names)
(X_train, y_train_original_label), (X_test, y_test) = cifar10.load_data()
# Convert class vectors to binary class matrices.
y_train_original_label = keras.utils.to_categorical(y_train_original_label, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
# Normalize values into [0 to 1] because all default assume zero to one
# We won't bother to fully sphere the data, removing median and divide by variance
X_train = X_train.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.
In [4]:
y_train_softmax_output = np.load("y_train_softmax_output.npy")
In [5]:
# Define Emulation Model
# Note: We can set some of these at variables to reduce capacity
model = Sequential() # Sets up model structure to pass data forward
model.add(Conv2D(32, # Activation layers
(3, 3), # Conv dim
padding='same', # Use padding to maintain same dims
input_shape=X_train.shape[1:], activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) # 16x16 dim per activation
model.add(Dropout(config.dropout)) # Add a dash of regularization (too taste)
# NOTE: Would be fun to try a 16x16xActivation conv to a 1x1x10 tensor
model.add(Flatten()) # Because dense layers like 1D vectors
model.add(Dense(config.dense_layer_nodes, activation='relu'))
model.add(Dropout(config.dropout)) # A pinch more regularization
model.add(Dense(num_classes, activation='softmax')) # Plate and serve
# NOTE: Could also try Adam at some point
opt = keras.optimizers.SGD(lr=config.learn_rate) # No momentum
model.compile(loss='categorical_crossentropy', # Because one hot encoding
optimizer=opt, # Per previous decision to not use momentum
metrics=['accuracy']) # Easier to look at than cross entropy
In [6]:
data = X_train[0]
from matplotlib import pyplot as plt
plt.imshow(data, interpolation='nearest')
plt.show()
In [10]:
model2 = keras.models.load_model("lukas.h5")
y_train_softmax_output = model2.predict(X_train)
print(class_names)
y_train_softmax_output[0]
Out[10]:
In [6]:
# Model
# For batch size make sure it's big enough to leverage GPU parallelism
# As a basic rule you need to reduce learning rate as you reduce batch size
# NOTE: Will be interesting to optimize for training time a bit by tweaking batch size
model.fit(
X_train,
y_train_softmax_output,
batch_size=config.batch_size,
# Will use a fixed set of epochs rather than a stopping criteria
epochs=config.epochs,
validation_data=(X_test, y_test),
# Make sure we can see the pastries while they bake
callbacks=[WandbCallback(data_type="image", labels=class_names)]
)
Out[6]:
In [ ]: