In [13]:
import torch
import torch.utils.data as Data
torch.manual_seed(1) # reproducible
Out[13]:
In [15]:
x = torch.linspace(1, 10, 10) # this is x data (torch tensor)
y = torch.linspace(10, 1, 10) # this is y data (torch tensor)
torch.cat((x.view(len(x),-1),y.view(len(y),-1)),1)
Out[15]:
In [ ]:
dataset = Data.TensorDataset(data_tensor=x, target_tensor=y)
In [29]:
BATCH_SIZE = 5
loader = Data.DataLoader(
dataset=dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # random shuffle for training
num_workers=1, # subprocesses for loading data
)
In [34]:
# EPOCH: 모든 데이터를 한 사이클 돌았을 때 1 epoch 이라고 한다.
for epoch in range(3): # train entire dataset 3 times
for step, (batch_x, batch_y) in enumerate(loader): # for each training step
print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
batch_x.numpy(), '| batch y: ', batch_y.numpy())
# train your model
In [35]:
BATCH_SIZE = 8
loader = Data.DataLoader(
dataset=dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # random shuffle for training
num_workers=1, # subprocesses for loading data
)
for epoch in range(3): # train entire dataset 3 times
for step, (batch_x, batch_y) in enumerate(loader): # for each training step
# train your data...
print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
batch_x.numpy(), '| batch y: ', batch_y.numpy())
In [45]:
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms
# Input pipeline from a folder containing multiple folders of images
# we can check the classes, class_to_idx, and filename with idx
img_dir = "./images"
img_data = dset.ImageFolder(img_dir, transforms.Compose([
transforms.RandomSizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
]))
print(img_data.classes)
print(img_data.class_to_idx)
print(img_data.imgs)
In [50]:
# After we get the list of images, we can turn the list into batches of images
# with torch.utils.data.DataLoader()
img_batch = Data.DataLoader(img_data, batch_size=3,
shuffle=True, num_workers=2)
for img,label in img_batch:
print(img.size())
print(label)
In [57]:
# MNIST Dataset
train_dataset = dsets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)
# Select one data pair (read data from disk).
image, label = train_dataset[0]
print (image.size())
print (label)
In [60]:
# Data Loader (this provides queue and thread in a very simple way).
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=100,
shuffle=True,
num_workers=2)
# When iteration starts, queue and thread start to load dataset from files.
data_iter = iter(train_loader)
# Mini-batch images and labels.
images, labels = data_iter.next()
# # Actual usage of data loader is as below.
# for images, labels in train_loader:
# # Your training code will be written here
# pass
images.size()
Out[60]:
In [56]:
# You should build custom dataset as below.
class CustomDataset(Data.Dataset):
def __init__(self):
# TODO
# 1. Initialize file path or list of file names.
pass
def __getitem__(self, index):
# TODO
# 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
# 2. Preprocess the data (e.g. torchvision.Transform).
# 3. Return a data pair (e.g. image and label).
pass
def __len__(self):
# You should change 0 to the total size of your dataset.
return 0
# Then, you can just use prebuilt torch's data loader.
custom_dataset = CustomDataset()
train_loader = Data.DataLoader(dataset=custom_dataset,
batch_size=100,
shuffle=True,
num_workers=2)
In [ ]:
#========================== Using pretrained model ==========================#
# Download and load pretrained resnet.
resnet = torchvision.models.resnet18(pretrained=True)
# If you want to finetune only top layer of the model.
for param in resnet.parameters():
param.requires_grad = False
# Replace top layer for finetuning.
resnet.fc = torch.nn.Linear(resnet.fc.in_features, 100) # 100 is for example.
# For test.
images = Variable(torch.randn(10, 3, 256, 256))
outputs = resnet(images)
print (outputs.size()) # (10, 100)