In [ ]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This notebook trains a model to predict whether the given sonar signals are bouncing off a metal cylinder or off a cylindrical rock from UCI Machine Learning Repository.
The Sonar Signals dataset that this sample uses for training is provided by the UC Irvine Machine Learning Repository. Google has hosted the data on a public GCS bucket gs://cloud-samples-data/ml-engine/sonar/sonar.all-data
.
sonar.all-data
is split for both training and evaluationNote: Your typical development process with your own data would require you to upload your data to GCS so that you can access that data from inside your notebook. However, in this case, Google has put the data on GCS to avoid the steps of having you download the data from UC Irvine and then upload the data to GCS.
This dataset is provided by a third party. Google provides no representation, warranty, or other guarantees about the validity or any other aspects of this dataset.
First, you'll create the model (provided below). This is similar to your normal process for creating a PyTorch model. However, there is one key difference:
Downloading the data from GCS at the start of your file, so that you can access the data.
In [ ]:
from google.cloud import storage
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
Add code to download the data from GCS (in this case, using the publicly hosted data). You will then be able to use the data when training your model.
In [ ]:
# Public bucket holding the census data
bucket = storage.Client().bucket('cloud-samples-data')
# Path to the data inside the public bucket
blob = bucket.blob('ml-engine/sonar/sonar.all-data')
# Download the data
blob.download_to_filename('sonar.all-data')
Read in the data
In [ ]:
# Define the dataset to be used by PyTorch
class SonarDataset(Dataset):
def __init__(self, csv_file):
self.dataframe = pd.read_csv(csv_file, header=None)
def __len__(self):
return len(self.dataframe)
def __getitem__(self, idx):
# When iterating through the dataset get the features and targets
features = self.dataframe.iloc[idx, :-1].values.astype(dtype='float64')
# Convert the targets to binary values:
# R = rock --> 0
# M = mine --> 1
target = self.dataframe.iloc[idx, -1:].values
if target[0] == 'R':
target[0] = 0
elif target[0] == 'M':
target[0] = 1
target = target.astype(dtype='float64')
# Load the data as a tensor
data = {'features': torch.from_numpy(features),
'target': target}
return data
# Load the data
sonar_dataset = SonarDataset('./sonar.all-data')
# Create indices for the split
dataset_size = len(sonar_dataset)
test_size = int(0.2 * dataset_size) # Use a test_split of 0.2
train_size = dataset_size - test_size
# Split the dataset
train_dataset, test_dataset = random_split(sonar_dataset,
[train_size, test_size])
# Create our Dataloaders for training and test data
train_loader = DataLoader(
train_dataset.dataset,
batch_size=4,
shuffle=True)
test_loader = DataLoader(
test_dataset.dataset,
batch_size=4,
shuffle=True)
This is where your model code would go. Below is an example model using the census dataset.
In [ ]:
torch.manual_seed(42)
# Create the Deep Neural Network
class SonarDNN(nn.Module):
def __init__(self):
super(SonarDNN, self).__init__()
self.net = nn.Sequential(
nn.Linear(60, 60),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(60, 30),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(30, 1),
nn.Sigmoid()
)
def forward(self, x):
return self.net(x)
# Create the model
net = SonarDNN().double()
optimizer = optim.SGD(net.parameters(),
lr=0.01,
momentum=0.5,
nesterov=False)
Define the training loop
In [ ]:
def train(net, train_loader, optimizer, epoch):
"""Create the training loop"""
net.train()
criterion = nn.BCELoss()
running_loss = 0.0
for batch_index, data in enumerate(train_loader):
features = data['features']
target = data['target']
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(features)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if batch_index % 6 == 5: # print every 6 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch, batch_index + 1, running_loss / 6))
running_loss = 0.0
Define the testing loop
In [ ]:
def test(net, test_loader):
"""Test the DNN"""
isp = False
net.eval()
criterion = nn.BCELoss() # https://pytorch.org/docs/stable/nn.html#bceloss
test_loss = 0
correct = 0
with torch.no_grad():
for i, data in enumerate(test_loader, 0):
features = data['features']
target = data['target']
if not isp:
isp = True
print(features)
print(target)
output = net(features)
# Binarize the output
pred = output.apply_(lambda x: 0.0 if x < 0.5 else 1.0)
test_loss += criterion(output, target) # sum up batch loss
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set:\n\tAverage loss: {:.4f}'.format(test_loss))
print('\tAccuracy: {}/{} ({:.0f}%)\n'.format(
correct,
(len(test_loader) * test_loader.batch_size),
100. * correct / (len(test_loader) * test_loader.batch_size)))
Train / Test the model
In [ ]:
epochs = 10
for epoch in range(1, epochs + 1):
train(net, train_loader, optimizer, epoch)
test(net, test_loader)
Export the trained model
In [ ]:
torch.save(net.state_dict(), 'model.pth')
In [ ]:
! ls -al model.pth
Run a simple prediction with set values:
In [ ]:
rock_feature = torch.tensor([[3.6800e-02, 4.0300e-02, 3.1700e-02, 2.9300e-02, 8.2000e-02, 1.3420e-01,
1.1610e-01, 6.6300e-02, 1.5500e-02, 5.0600e-02, 9.0600e-02, 2.5450e-01,
1.4640e-01, 1.2720e-01, 1.2230e-01, 1.6690e-01, 1.4240e-01, 1.2850e-01,
1.8570e-01, 1.1360e-01, 2.0690e-01, 2.1900e-02, 2.4000e-01, 2.5470e-01,
2.4000e-02, 1.9230e-01, 4.7530e-01, 7.0030e-01, 6.8250e-01, 6.4430e-01,
7.0630e-01, 5.3730e-01, 6.6010e-01, 8.7080e-01, 9.5180e-01, 9.6050e-01,
7.7120e-01, 6.7720e-01, 6.4310e-01, 6.7200e-01, 6.0350e-01, 5.1550e-01,
3.8020e-01, 2.2780e-01, 1.5220e-01, 8.0100e-02, 8.0400e-02, 7.5200e-02,
5.6600e-02, 1.7500e-02, 5.8000e-03, 9.1000e-03, 1.6000e-02, 1.6000e-02,
8.1000e-03, 7.0000e-03, 1.3500e-02, 6.7000e-03, 7.8000e-03, 6.8000e-03]], dtype=torch.float64)
rock_prediction = net(rock_feature)
mine_feature = torch.tensor([[5.9900e-02, 4.7400e-02, 4.9800e-02, 3.8700e-02, 1.0260e-01, 7.7300e-02,
8.5300e-02, 4.4700e-02, 1.0940e-01, 3.5100e-02, 1.5820e-01, 2.0230e-01,
2.2680e-01, 2.8290e-01, 3.8190e-01, 4.6650e-01, 6.6870e-01, 8.6470e-01,
9.3610e-01, 9.3670e-01, 9.1440e-01, 9.1620e-01, 9.3110e-01, 8.6040e-01,
7.3270e-01, 5.7630e-01, 4.1620e-01, 4.1130e-01, 4.1460e-01, 3.1490e-01,
2.9360e-01, 3.1690e-01, 3.1490e-01, 4.1320e-01, 3.9940e-01, 4.1950e-01,
4.5320e-01, 4.4190e-01, 4.7370e-01, 3.4310e-01, 3.1940e-01, 3.3700e-01,
2.4930e-01, 2.6500e-01, 1.7480e-01, 9.3200e-02, 5.3000e-02, 8.1000e-03,
3.4200e-02, 1.3700e-02, 2.8000e-03, 1.3000e-03, 5.0000e-04, 2.2700e-02,
2.0900e-02, 8.1000e-03, 1.1700e-02, 1.1400e-02, 1.1200e-02, 1.0000e-02]], dtype=torch.float64)
mine_prediction = net(mine_feature)
# Note: Try increasing the number of epochs above to see more accurate results.
print('Result Values: (Rock: 0) - (Mine: 1)')
print('Rock Prediction:\n\t{} - {}'.format('Rock' if rock_prediction <= 0.5 else 'Mine', rock_prediction.item()))
print('Mine Prediction:\n\t{} - {}'.format('Rock' if mine_prediction <= 0.5 else 'Mine', mine_prediction.item()))