LeNet-5 CIFAR10 Classifier¶
References¶
• [1] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document recognition. Proceedings of the IEEE, november 1998.
Imports¶
In [1]:
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
Model Settings¶
In [2]:
##########################
### SETTINGS
##########################
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 10
# Architecture
NUM_FEATURES = 32*32
NUM_CLASSES = 10
# Other
if torch.cuda.is_available():
DEVICE = “cuda:0”
else:
DEVICE = “cpu”
GRAYSCALE = False
MNIST Dataset¶
In [3]:
##########################
### CIFAR-10 Dataset
##########################
train_mean = (0.5, 0.5, 0.5)
train_std = (0.5, 0.5, 0.5)
resize_transform = transforms.Compose([transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(train_mean, train_std)])
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.CIFAR10(root=’data’,
train=True,
transform=resize_transform,
download=True)
test_dataset = datasets.CIFAR10(root=’data’,
train=False,
transform=resize_transform)
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
num_workers=8,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
num_workers=8,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print(‘Image batch dimensions:’, images.shape)
print(‘Image label dimensions:’, labels.shape)
break
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz
HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value=”)))
Extracting data/cifar-10-python.tar.gz to data
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
cpuset_checked))
Image batch dimensions: torch.Size([128, 3, 32, 32])
Image label dimensions: torch.Size([128])
In [4]:
device = torch.device(DEVICE)
torch.manual_seed(0)
for epoch in range(2):
for batch_idx, (x, y) in enumerate(train_loader):
print(‘Epoch:’, epoch+1, end=”)
print(‘ | Batch index:’, batch_idx, end=”)
print(‘ | Batch size:’, y.size()[0])
x = x.to(device)
y = y.to(device)
break
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
cpuset_checked))
Epoch: 1 | Batch index: 0 | Batch size: 128
Epoch: 2 | Batch index: 0 | Batch size: 128
In [66]:
##########################
### MODEL
##########################
class LeNet5(nn.Module):
def __init__(self, num_classes, grayscale=False):
super(LeNet5, self).__init__()
self.grayscale = grayscale
self.num_classes = num_classes
if self.grayscale:
in_channels = 1
else:
in_channels = 3
self.features = nn.Sequential(
nn.Conv2d(in_channels, 30*in_channels, kernel_size = 7), # <---- changes nn.ReLU(), # <---- changes nn.MaxPool2d(kernel_size=2), nn.Conv2d(30*in_channels, 50*in_channels, kernel_size = 7), # <---- changes nn.ReLU(), # <---- changes nn.MaxPool2d(kernel_size=2) ) self.classifier = nn.Sequential( nn.Linear(450*in_channels, 300*in_channels), # <---- changes nn.ReLU(), # <---- changes nn.Linear(300*in_channels, 200*in_channels), # <---- changes nn.ReLU(), # <---- changes nn.Linear(200*in_channels, num_classes), # <---- changes ) def forward(self, x): x = self.features(x) x = torch.flatten(x, 1) logits = self.classifier(x) probas = F.softmax(logits, dim=1) return logits, probas In [67]: torch.manual_seed(RANDOM_SEED) model = LeNet5(NUM_CLASSES, GRAYSCALE) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) Training¶ In [68]: def compute_accuracy(model, data_loader, device): correct_pred, num_examples = 0, 0 for i, (features, targets) in enumerate(data_loader): features = features.to(device) targets = targets.to(device) logits, probas = model(features) _, predicted_labels = torch.max(probas, 1) num_examples += targets.size(0) correct_pred += (predicted_labels == targets).sum() return correct_pred.float()/num_examples * 100 start_time = time.time() for epoch in range(NUM_EPOCHS): model.train() for batch_idx, (features, targets) in enumerate(train_loader): features = features.to(DEVICE) targets = targets.to(DEVICE) ### FORWARD AND BACK PROP logits, probas = model(features) cost = F.cross_entropy(logits, targets) optimizer.zero_grad() cost.backward() ### UPDATE MODEL PARAMETERS optimizer.step() ### LOGGING if not batch_idx % 50: print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f' %(epoch+1, NUM_EPOCHS, batch_idx, len(train_loader), cost)) model.eval() with torch.set_grad_enabled(False): # save memory during inference print('Epoch: %03d/%03d | Train: %.3f%%' % ( epoch+1, NUM_EPOCHS, compute_accuracy(model, train_loader, device=DEVICE))) print('Time elapsed: %.2f min' % ((time.time() - start_time)/60)) print('Total Training Time: %.2f min' % ((time.time() - start_time)/60)) /usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. cpuset_checked)) Epoch: 001/010 | Batch 0000/0391 | Cost: 2.3004 Epoch: 001/010 | Batch 0050/0391 | Cost: 1.8731 Epoch: 001/010 | Batch 0100/0391 | Cost: 1.6090 Epoch: 001/010 | Batch 0150/0391 | Cost: 1.6064 Epoch: 001/010 | Batch 0200/0391 | Cost: 1.4275 Epoch: 001/010 | Batch 0250/0391 | Cost: 1.2070 Epoch: 001/010 | Batch 0300/0391 | Cost: 1.2289 Epoch: 001/010 | Batch 0350/0391 | Cost: 1.2588 Epoch: 001/010 | Train: 56.422% Time elapsed: 0.33 min Epoch: 002/010 | Batch 0000/0391 | Cost: 1.2183 Epoch: 002/010 | Batch 0050/0391 | Cost: 1.1455 Epoch: 002/010 | Batch 0100/0391 | Cost: 1.3360 Epoch: 002/010 | Batch 0150/0391 | Cost: 1.0921 Epoch: 002/010 | Batch 0200/0391 | Cost: 1.2557 Epoch: 002/010 | Batch 0250/0391 | Cost: 1.1362 Epoch: 002/010 | Batch 0300/0391 | Cost: 0.9911 Epoch: 002/010 | Batch 0350/0391 | Cost: 1.2770 Epoch: 002/010 | Train: 64.876% Time elapsed: 0.66 min Epoch: 003/010 | Batch 0000/0391 | Cost: 1.0398 Epoch: 003/010 | Batch 0050/0391 | Cost: 1.0960 Epoch: 003/010 | Batch 0100/0391 | Cost: 0.7962 Epoch: 003/010 | Batch 0150/0391 | Cost: 0.8341 Epoch: 003/010 | Batch 0200/0391 | Cost: 0.9529 Epoch: 003/010 | Batch 0250/0391 | Cost: 0.8902 Epoch: 003/010 | Batch 0300/0391 | Cost: 0.9333 Epoch: 003/010 | Batch 0350/0391 | Cost: 0.7748 Epoch: 003/010 | Train: 71.132% Time elapsed: 0.99 min Epoch: 004/010 | Batch 0000/0391 | Cost: 0.7432 Epoch: 004/010 | Batch 0050/0391 | Cost: 0.8488 Epoch: 004/010 | Batch 0100/0391 | Cost: 0.7935 Epoch: 004/010 | Batch 0150/0391 | Cost: 0.8088 Epoch: 004/010 | Batch 0200/0391 | Cost: 0.7087 Epoch: 004/010 | Batch 0250/0391 | Cost: 0.9720 Epoch: 004/010 | Batch 0300/0391 | Cost: 0.9366 Epoch: 004/010 | Batch 0350/0391 | Cost: 0.6510 Epoch: 004/010 | Train: 75.414% Time elapsed: 1.32 min Epoch: 005/010 | Batch 0000/0391 | Cost: 0.7845 Epoch: 005/010 | Batch 0050/0391 | Cost: 0.7246 Epoch: 005/010 | Batch 0100/0391 | Cost: 0.6981 Epoch: 005/010 | Batch 0150/0391 | Cost: 0.7112 Epoch: 005/010 | Batch 0200/0391 | Cost: 0.7425 Epoch: 005/010 | Batch 0250/0391 | Cost: 0.5983 Epoch: 005/010 | Batch 0300/0391 | Cost: 0.6996 Epoch: 005/010 | Batch 0350/0391 | Cost: 0.7950 Epoch: 005/010 | Train: 77.872% Time elapsed: 1.65 min Epoch: 006/010 | Batch 0000/0391 | Cost: 0.6423 Epoch: 006/010 | Batch 0050/0391 | Cost: 0.7316 Epoch: 006/010 | Batch 0100/0391 | Cost: 0.6773 Epoch: 006/010 | Batch 0150/0391 | Cost: 0.6519 Epoch: 006/010 | Batch 0200/0391 | Cost: 0.6791 Epoch: 006/010 | Batch 0250/0391 | Cost: 0.6105 Epoch: 006/010 | Batch 0300/0391 | Cost: 0.6407 Epoch: 006/010 | Batch 0350/0391 | Cost: 0.6173 Epoch: 006/010 | Train: 78.694% Time elapsed: 1.98 min Epoch: 007/010 | Batch 0000/0391 | Cost: 0.6966 Epoch: 007/010 | Batch 0050/0391 | Cost: 0.5184 Epoch: 007/010 | Batch 0100/0391 | Cost: 0.4478 Epoch: 007/010 | Batch 0150/0391 | Cost: 0.5390 Epoch: 007/010 | Batch 0200/0391 | Cost: 0.6506 Epoch: 007/010 | Batch 0250/0391 | Cost: 0.4421 Epoch: 007/010 | Batch 0300/0391 | Cost: 0.6329 Epoch: 007/010 | Batch 0350/0391 | Cost: 0.5524 Epoch: 007/010 | Train: 85.764% Time elapsed: 2.31 min Epoch: 008/010 | Batch 0000/0391 | Cost: 0.3701 Epoch: 008/010 | Batch 0050/0391 | Cost: 0.2734 Epoch: 008/010 | Batch 0100/0391 | Cost: 0.4411 Epoch: 008/010 | Batch 0150/0391 | Cost: 0.4290 Epoch: 008/010 | Batch 0200/0391 | Cost: 0.4157 Epoch: 008/010 | Batch 0250/0391 | Cost: 0.3376 Epoch: 008/010 | Batch 0300/0391 | Cost: 0.5340 Epoch: 008/010 | Batch 0350/0391 | Cost: 0.5431 Epoch: 008/010 | Train: 87.990% Time elapsed: 2.64 min Epoch: 009/010 | Batch 0000/0391 | Cost: 0.4626 Epoch: 009/010 | Batch 0050/0391 | Cost: 0.1959 Epoch: 009/010 | Batch 0100/0391 | Cost: 0.2936 Epoch: 009/010 | Batch 0150/0391 | Cost: 0.2915 Epoch: 009/010 | Batch 0200/0391 | Cost: 0.3270 Epoch: 009/010 | Batch 0250/0391 | Cost: 0.4158 Epoch: 009/010 | Batch 0300/0391 | Cost: 0.4383 Epoch: 009/010 | Batch 0350/0391 | Cost: 0.3772 Epoch: 009/010 | Train: 91.526% Time elapsed: 2.97 min Epoch: 010/010 | Batch 0000/0391 | Cost: 0.2472 Epoch: 010/010 | Batch 0050/0391 | Cost: 0.2324 Epoch: 010/010 | Batch 0100/0391 | Cost: 0.3136 Epoch: 010/010 | Batch 0150/0391 | Cost: 0.2464 Epoch: 010/010 | Batch 0200/0391 | Cost: 0.2370 Epoch: 010/010 | Batch 0250/0391 | Cost: 0.3160 Epoch: 010/010 | Batch 0300/0391 | Cost: 0.4520 Epoch: 010/010 | Batch 0350/0391 | Cost: 0.3002 Epoch: 010/010 | Train: 93.804% Time elapsed: 3.29 min Total Training Time: 3.29 min Evaluation¶ In [69]: with torch.set_grad_enabled(False): # save memory during inference print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE))) # accuracy improved from 69.96% to 93.80% Test accuracy: 93.80% In [71]: class UnNormalize(object): def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, tensor): """ Parameters: ------------ tensor (Tensor): Tensor image of size (C, H, W) to be normalized. Returns: ------------ Tensor: Normalized image. """ for t, m, s in zip(tensor, self.mean, self.std): t.mul_(s).add_(m) return tensor unorm = UnNormalize(mean=train_mean, std=train_std) In [72]: test_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) for features, targets in test_loader: break _, predictions = model.forward(features[:8].to(DEVICE)) predictions = torch.argmax(predictions, dim=1) d = {0: 'airplane', 1: 'automobile', 2: 'bird', 3: 'cat', 4: 'deer', 5: 'dog', 6: 'frog', 7: 'horse', 8: 'ship', 9: 'truck'} fig, ax = plt.subplots(1, 8, figsize=(20, 10)) for i in range(8): img = unorm(features[i]) ax[i].imshow(np.transpose(img, (1, 2, 0))) ax[i].set_xlabel(d[predictions[i].item()]) plt.show()