In [ ]:
from keras.datasets import reuters
In [ ]:
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words = 10000)
In [ ]:
len(train_data)
In [ ]:
len(test_data)
In [ ]:
train_data[0]
In [ ]:
word_index = reuters.get_word_index()
reverse_word_index = dict([value, key] for key, value in word_index.items())
In [ ]:
decoded_newswire = ' '.join(reverse_word_index.get(i - 3, '?') for i in train_data[0])
In [ ]:
decoded_newswire
In [ ]:
train_labels[10]
In [ ]:
import numpy as np
def vectorize_data(sequences, dimension = 10000):
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
results[i, sequence] = 1.
return results
In [ ]:
x_train = vectorize_data(train_data)
In [ ]:
x_test = vectorize_data(test_data)
In [ ]:
def one_hot_encode(labels, dimension = 46):
results = np.zeros((len(labels), dimension))
for i, label in enumerate(labels):
results[i, label] = 1.
return results
In [ ]:
one_hot_train_labels = one_hot_encode(train_labels)
In [ ]:
one_hot_test_labels = one_hot_encode(test_labels)
In [ ]:
from keras import models
from keras import layers
In [ ]:
model = models.Sequential()
model.add(layers.Dense(64, activation = "relu", input_shape = (10000,)))
model.add(layers.Dense(64, activation = "relu"))
model.add(layers.Dense(46, activation = "softmax"))
In [ ]:
model.compile(loss = "categorical_crossentropy",
optimizer = "rmsprop",
metrics = ["accuracy"])
In [ ]:
x_val = x_train[:1000]
partial_x_train = x_train[1000:]
In [ ]:
y_val = one_hot_train_labels[:1000]
partial_y_train = one_hot_train_labels[1000:]
In [ ]:
history = model.fit(partial_x_train, partial_y_train, epochs = 20, batch_size = 512, validation_data = (x_val, y_val))
In [ ]:
import matplotlib.pyplot as plt
In [ ]:
values = history.history
values.keys()
In [ ]:
loss = values["loss"]
val_loss = values["val_loss"]
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'bo', label = "training loss")
plt.plot(epochs, val_loss, 'b', label = "validaion loss")
plt.title("Training and validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
In [ ]:
plt.show()
In [ ]:
plt.clf()
acc = values["acc"]
val_acc = values["val_acc"]
plt.plot(epochs, acc, 'bo', label = "training accuracy")
plt.plot(epochs, val_acc, 'b', label = "validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Training and Validation accuracy")
In [ ]:
plt.show()
In [ ]:
model = models.Sequential()
model.add(layers.Dense(64, activation = "relu", input_shape = (10000,)))
model.add(layers.Dense(64, activation = "relu"))
model.add(layers.Dense(46, activation = "softmax"))
In [ ]:
model.compile(loss = "categorical_crossentropy",
optimizer = "rmsprop",
metrics = ["accuracy"])
In [ ]:
model.fit(partial_x_train, partial_y_train, epochs = 9, batch_size = 512, validation_data = (x_val, y_val))
In [ ]:
results = model.evaluate(x_test, one_hot_test_labels)
In [ ]:
results
In [ ]:
import copy
In [ ]:
test_labels_copy = copy.copy(test_labels)
np.random.shuffle(test_labels_copy)
hits_array = np.array(test_labels) == np.array(test_labels_copy)
float(np.sum(hits_array)) / len(test_labels)
In [ ]:
predictions = model.predict(x_test)
In [ ]:
predictions[0].shape
In [ ]:
np.sum(predictions[0])
In [ ]:
np.argmax(predictions[0])