In [1]:
import os
print(os.listdir("../input"))
In [2]:
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import torch
import torch.nn as nn
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms
In [3]:
torch.cuda.set_device(0)
device = torch.device('cuda:0')
In [4]:
class ImageDataset(Dataset):
def __init__(self, csv_file, data_type='train'):
self.data_type = data_type
self.transform_fn = transforms.Compose([
transforms.Resize((350, 350)),
transforms.ToTensor()
])
if self.data_type == 'train':
self.frame = pd.read_csv(csv_file).values
elif self.data_type == 'test':
self.frame = os.listdir('../input/dataset/image/test')
else:
raise 'Not support data_type %s' % self.data_type
def __len__(self):
return len(self.frame)
def __getitem__(self, i):
if self.data_type == 'train':
img = Image.open('../input/dataset/image/train/%s' % self.frame[i, 0]).convert('RGB')
feature = self.transform_fn(img)
label = self.frame[i, 1]
return (feature, label)
elif self.data_type == 'test':
img = Image.open('../input/dataset/image/test/%s' % self.frame[i]).convert('RGB')
feature = self.transform_fn(img)
return (feature, 0)
return None
In [5]:
batch_size = 64
trainset = ImageDataset('../input/train label.csv', 'train')
trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True, num_workers=4)
testset = ImageDataset(None, 'test')
testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False, num_workers=4)
In [6]:
def train(trainloader, model, loss_fn, optimizer,device):
model.train()
total_loss = 0
for batch_idx, (data, target) in enumerate(trainloader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
outputs = model(data)
loss = loss_fn(outputs, target)
total_loss += loss.item()
loss.backward()
optimizer.step()
average_loss = total_loss / (batch_idx + 1)
return average_loss
def evaluate(testloader, model, loss_fn, device):
with torch.no_grad():
model.eval()
correct = 0.0
total_loss = 0
for batch_idx, (data, target) in enumerate(testloader):
data, target = data.to(device), target.to(device)
outputs = model(data)
_, predicted = torch.max(outputs, 1)
correct += (predicted == target).cpu().sum().detach().numpy()
loss = loss_fn(outputs, target)
total_loss += loss.item()
accuracy = correct*100.0 / len(testloader.dataset)
average_loss = total_loss / (batch_idx + 1)
return average_loss, accuracy
In [7]:
def fit(train_loader, model, loss_fn, optimizer, scheduler, n_epochs, device):
train_accs = []
train_losses = []
print('Fit start')
start_time = time.time()
for epoch in range(n_epochs):
train_loss= train(train_loader, model, loss_fn, optimizer, device=device)
_, train_accuracy = evaluate(train_loader, model, loss_fn, device=device)
message = 'Epoch: {}/{}. Train set: Average loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, \
n_epochs, train_loss, train_accuracy)
print(message)
print("Cost:", time.time() - start_time)
train_accs.append(train_accuracy)
train_losses.append(train_loss)
scheduler.step()
print('Fit end')
return train_accs, train_losses
In [8]:
def show_curve(ys, title):
x = np.array(range(len(ys)))
y = np.array(ys)
plt.plot(x, y, c='b')
plt.axis()
plt.title('{} Curve:'.format(title))
plt.xlabel('Epoch')
plt.ylabel('{} Value'.format(title))
plt.show()
In [9]:
n_epochs = 30
learning_rate = 0.01
model = torchvision.models.resnet34(pretrained=True).to(device)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.5)
train_accs, train_losses = fit(trainloader, model, loss_fn, optimizer, scheduler, n_epochs, device)
show_curve(train_accs, 'accuracy')
show_curve(train_losses, 'loss')
In [10]:
def get_test_result(testloader, model, device):
model.eval()
with torch.no_grad():
for batch_idx, (inputs, _) in enumerate(testloader):
inputs = inputs.to(device)
outputs = model(inputs)
_, predicted = outputs.max(1)
if batch_idx == 0:
result = predicted
else:
result = torch.cat([result, predicted], 0)
return result
In [11]:
result = get_test_result(testloader, model, device)
arr = result.cpu().numpy()
df = pd.DataFrame({ 'Image': testloader.dataset.frame, 'Cloth_label': arr })
print(df)
df.to_csv('./resnet.csv', index=False)
print('Finish')