In [ ]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

PyTorch Training

This notebook trains a model to predict whether the given sonar signals are bouncing off a metal cylinder or off a cylindrical rock from UCI Machine Learning Repository.

The data

The Sonar Signals dataset that this sample uses for training is provided by the UC Irvine Machine Learning Repository. Google has hosted the data on a public GCS bucket gs://cloud-samples-data/ml-engine/sonar/sonar.all-data.

  • sonar.all-data is split for both training and evaluation

Note: Your typical development process with your own data would require you to upload your data to GCS so that you can access that data from inside your notebook. However, in this case, Google has put the data on GCS to avoid the steps of having you download the data from UC Irvine and then upload the data to GCS.

Disclaimer

This dataset is provided by a third party. Google provides no representation, warranty, or other guarantees about the validity or any other aspects of this dataset.

Build your model

First, you'll create the model (provided below). This is similar to your normal process for creating a PyTorch model. However, there is one key difference:

Downloading the data from GCS at the start of your file, so that you can access the data.


In [ ]:
from google.cloud import storage
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

Add code to download the data from GCS (in this case, using the publicly hosted data). You will then be able to use the data when training your model.


In [ ]:
# Public bucket holding the census data
bucket = storage.Client().bucket('cloud-samples-data')

# Path to the data inside the public bucket
blob = bucket.blob('ml-engine/sonar/sonar.all-data')
# Download the data
blob.download_to_filename('sonar.all-data')

Read in the data


In [ ]:
# Define the dataset to be used by PyTorch
class SonarDataset(Dataset):
    def __init__(self, csv_file):
        self.dataframe = pd.read_csv(csv_file, header=None)

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # When iterating through the dataset get the features and targets
        features = self.dataframe.iloc[idx, :-1].values.astype(dtype='float64')

        # Convert the targets to binary values:
        # R = rock --> 0
        # M = mine --> 1
        target = self.dataframe.iloc[idx, -1:].values
        if target[0] == 'R':
            target[0] = 0
        elif target[0] == 'M':
            target[0] = 1
        target = target.astype(dtype='float64')

        # Load the data as a tensor
        data = {'features': torch.from_numpy(features),
                'target': target}
        return data


# Load the data
sonar_dataset = SonarDataset('./sonar.all-data')
# Create indices for the split
dataset_size = len(sonar_dataset)
test_size = int(0.2 * dataset_size)  # Use a test_split of 0.2
train_size = dataset_size - test_size

# Split the dataset
train_dataset, test_dataset = random_split(sonar_dataset,
                                           [train_size, test_size])
# Create our Dataloaders for training and test data
train_loader = DataLoader(
    train_dataset.dataset,
    batch_size=4,
    shuffle=True)
test_loader = DataLoader(
    test_dataset.dataset,
    batch_size=4,
    shuffle=True)

This is where your model code would go. Below is an example model using the census dataset.


In [ ]:
torch.manual_seed(42)

# Create the Deep Neural Network
class SonarDNN(nn.Module):
    def __init__(self):
        super(SonarDNN, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(60, 60),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(60, 30),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(30, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

# Create the model
net = SonarDNN().double()
optimizer = optim.SGD(net.parameters(),
                      lr=0.01,
                      momentum=0.5,
                      nesterov=False)

Define the training loop


In [ ]:
def train(net, train_loader, optimizer, epoch):
    """Create the training loop"""
    net.train()
    criterion = nn.BCELoss()
    running_loss = 0.0

    for batch_index, data in enumerate(train_loader):
        features = data['features']
        target = data['target']

        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(features)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if batch_index % 6 == 5:  # print every 6 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch, batch_index + 1, running_loss / 6))
            running_loss = 0.0

Define the testing loop


In [ ]:
def test(net, test_loader):
    """Test the DNN"""
    isp = False
    net.eval()
    criterion = nn.BCELoss()  # https://pytorch.org/docs/stable/nn.html#bceloss
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            features = data['features']
            target = data['target']
            if not isp:
                isp = True
                print(features)
                print(target)
            output = net(features)
            # Binarize the output
            pred = output.apply_(lambda x: 0.0 if x < 0.5 else 1.0)
            test_loss += criterion(output, target)  # sum up batch loss
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set:\n\tAverage loss: {:.4f}'.format(test_loss))
    print('\tAccuracy: {}/{} ({:.0f}%)\n'.format(
            correct,
            (len(test_loader) * test_loader.batch_size),
            100. * correct / (len(test_loader) * test_loader.batch_size)))

Train / Test the model


In [ ]:
epochs = 10
for epoch in range(1, epochs + 1):
    train(net, train_loader, optimizer, epoch)
    test(net, test_loader)

Export the trained model


In [ ]:
torch.save(net.state_dict(), 'model.pth')

In [ ]:
! ls -al model.pth

Run a simple prediction with set values:


In [ ]:
rock_feature = torch.tensor([[3.6800e-02, 4.0300e-02, 3.1700e-02, 2.9300e-02, 8.2000e-02, 1.3420e-01,
         1.1610e-01, 6.6300e-02, 1.5500e-02, 5.0600e-02, 9.0600e-02, 2.5450e-01,
         1.4640e-01, 1.2720e-01, 1.2230e-01, 1.6690e-01, 1.4240e-01, 1.2850e-01,
         1.8570e-01, 1.1360e-01, 2.0690e-01, 2.1900e-02, 2.4000e-01, 2.5470e-01,
         2.4000e-02, 1.9230e-01, 4.7530e-01, 7.0030e-01, 6.8250e-01, 6.4430e-01,
         7.0630e-01, 5.3730e-01, 6.6010e-01, 8.7080e-01, 9.5180e-01, 9.6050e-01,
         7.7120e-01, 6.7720e-01, 6.4310e-01, 6.7200e-01, 6.0350e-01, 5.1550e-01,
         3.8020e-01, 2.2780e-01, 1.5220e-01, 8.0100e-02, 8.0400e-02, 7.5200e-02,
         5.6600e-02, 1.7500e-02, 5.8000e-03, 9.1000e-03, 1.6000e-02, 1.6000e-02,
         8.1000e-03, 7.0000e-03, 1.3500e-02, 6.7000e-03, 7.8000e-03, 6.8000e-03]],  dtype=torch.float64)
rock_prediction = net(rock_feature)

mine_feature = torch.tensor([[5.9900e-02, 4.7400e-02, 4.9800e-02, 3.8700e-02, 1.0260e-01, 7.7300e-02,
         8.5300e-02, 4.4700e-02, 1.0940e-01, 3.5100e-02, 1.5820e-01, 2.0230e-01,
         2.2680e-01, 2.8290e-01, 3.8190e-01, 4.6650e-01, 6.6870e-01, 8.6470e-01,
         9.3610e-01, 9.3670e-01, 9.1440e-01, 9.1620e-01, 9.3110e-01, 8.6040e-01,
         7.3270e-01, 5.7630e-01, 4.1620e-01, 4.1130e-01, 4.1460e-01, 3.1490e-01,
         2.9360e-01, 3.1690e-01, 3.1490e-01, 4.1320e-01, 3.9940e-01, 4.1950e-01,
         4.5320e-01, 4.4190e-01, 4.7370e-01, 3.4310e-01, 3.1940e-01, 3.3700e-01,
         2.4930e-01, 2.6500e-01, 1.7480e-01, 9.3200e-02, 5.3000e-02, 8.1000e-03,
         3.4200e-02, 1.3700e-02, 2.8000e-03, 1.3000e-03, 5.0000e-04, 2.2700e-02,
         2.0900e-02, 8.1000e-03, 1.1700e-02, 1.1400e-02, 1.1200e-02, 1.0000e-02]],  dtype=torch.float64)
mine_prediction = net(mine_feature)

# Note: Try increasing the number of epochs above to see more accurate results. 
print('Result Values: (Rock: 0) - (Mine: 1)')
print('Rock Prediction:\n\t{} - {}'.format('Rock' if rock_prediction <= 0.5 else 'Mine', rock_prediction.item()))
print('Mine Prediction:\n\t{} - {}'.format('Rock' if mine_prediction <= 0.5 else 'Mine', mine_prediction.item()))