In [13]:
    
import torch
import torch.utils.data as Data
torch.manual_seed(1)    # reproducible
    
    Out[13]:
In [15]:
    
x = torch.linspace(1, 10, 10)       # this is x data (torch tensor)
y = torch.linspace(10, 1, 10)       # this is y data (torch tensor)
torch.cat((x.view(len(x),-1),y.view(len(y),-1)),1)
    
    Out[15]:
In [ ]:
    
dataset = Data.TensorDataset(data_tensor=x, target_tensor=y)
    
In [29]:
    
BATCH_SIZE = 5
loader = Data.DataLoader(
    dataset=dataset,            # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # random shuffle for training
    num_workers=1,              # subprocesses for loading data
)
    
In [34]:
    
# EPOCH: 모든 데이터를 한 사이클 돌았을 때 1 epoch 이라고 한다.
for epoch in range(3):   # train entire dataset 3 times
    for step, (batch_x, batch_y) in enumerate(loader):  # for each training step
        
        print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
              batch_x.numpy(), '| batch y: ', batch_y.numpy())
        # train your model
    
    
In [35]:
    
BATCH_SIZE = 8
loader = Data.DataLoader(
    dataset=dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # random shuffle for training
    num_workers=1,              # subprocesses for loading data
)
for epoch in range(3):   # train entire dataset 3 times
    for step, (batch_x, batch_y) in enumerate(loader):  # for each training step
        
        # train your data...
        
        print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
              batch_x.numpy(), '| batch y: ', batch_y.numpy())
    
    
In [45]:
    
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms
# Input pipeline from a folder containing multiple folders of images
# we can check the classes, class_to_idx, and filename with idx
img_dir = "./images"
img_data = dset.ImageFolder(img_dir, transforms.Compose([
            transforms.RandomSizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            ]))
print(img_data.classes)
print(img_data.class_to_idx)
print(img_data.imgs)
    
    
In [50]:
    
# After we get the list of images, we can turn the list into batches of images
# with torch.utils.data.DataLoader()
img_batch = Data.DataLoader(img_data, batch_size=3,
                            shuffle=True, num_workers=2)
for img,label in img_batch:
    print(img.size())
    print(label)
    
    
In [57]:
    
# MNIST Dataset 
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),  
                            download=True)
# Select one data pair (read data from disk).
image, label = train_dataset[0]
print (image.size())
print (label)
    
    
In [60]:
    
# Data Loader (this provides queue and thread in a very simple way).
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100, 
                                           shuffle=True,
                                           num_workers=2)
# When iteration starts, queue and thread start to load dataset from files.
data_iter = iter(train_loader)
# Mini-batch images and labels.
images, labels = data_iter.next()
# # Actual usage of data loader is as below.
# for images, labels in train_loader:
#     # Your training code will be written here
#     pass
images.size()
    
    Out[60]:
In [56]:
    
# You should build custom dataset as below.
class CustomDataset(Data.Dataset):
    def __init__(self):
        # TODO
        # 1. Initialize file path or list of file names. 
        pass
    def __getitem__(self, index):
        # TODO
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        # 2. Preprocess the data (e.g. torchvision.Transform).
        # 3. Return a data pair (e.g. image and label).
        pass
    def __len__(self):
        # You should change 0 to the total size of your dataset.
        return 0 
# Then, you can just use prebuilt torch's data loader. 
custom_dataset = CustomDataset()
train_loader = Data.DataLoader(dataset=custom_dataset,
                                           batch_size=100, 
                                           shuffle=True,
                                           num_workers=2)
    
In [ ]:
    
#========================== Using pretrained model ==========================#
# Download and load pretrained resnet.
resnet = torchvision.models.resnet18(pretrained=True)
# If you want to finetune only top layer of the model.
for param in resnet.parameters():
    param.requires_grad = False
    
# Replace top layer for finetuning.
resnet.fc = torch.nn.Linear(resnet.fc.in_features, 100)  # 100 is for example.
# For test.
images = Variable(torch.randn(10, 3, 256, 256))
outputs = resnet(images)
print (outputs.size())   # (10, 100)