LeNet-5 MNIST Digits Classifier¶
References¶
• [1] Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document recognition. Proceedings of the IEEE, november 1998.
Imports¶
In [13]:
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
Model Settings¶
In [20]:
##########################
### SETTINGS
##########################
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128
# NUM_EPOCHS = 10
NUM_EPOCHS = 10
# Architecture
NUM_FEATURES = 32*32
# NUM_CLASSES = 10
NUM_CLASSES = 10
# Other
if torch.cuda.is_available():
DEVICE = “cuda:0”
else:
DEVICE = “cpu”
GRAYSCALE = True
MNIST Dataset¶
In [21]:
##########################
### MNIST DATASET
##########################
resize_transform = transforms.Compose([transforms.Resize((32, 32)), # transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root=’data’,
train=True,
transform=resize_transform,
download=True)
test_dataset = datasets.MNIST(root=’data’,
train=False,
transform=resize_transform)
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print(‘Image batch dimensions:’, images.shape)
print(‘Image label dimensions:’, labels.shape)
break
Image batch dimensions: torch.Size([128, 1, 32, 32])
Image label dimensions: torch.Size([128])
In [22]:
device = torch.device(DEVICE)
torch.manual_seed(0)
for epoch in range(2):
for batch_idx, (x, y) in enumerate(train_loader):
print(‘Epoch:’, epoch+1, end=”)
print(‘ | Batch index:’, batch_idx, end=”)
print(‘ | Batch size:’, y.size()[0])
x = x.to(device)
y = y.to(device)
break
Epoch: 1 | Batch index: 0 | Batch size: 128
Epoch: 2 | Batch index: 0 | Batch size: 128
In [23]:
##########################
### MODEL
##########################
class LeNet5(nn.Module):
def __init__(self, num_classes, grayscale=False):
super(LeNet5, self).__init__()
self.grayscale = grayscale
self.num_classes = num_classes
if self.grayscale:
in_channels = 1
else:
in_channels = 3
self.features = nn.Sequential(
nn.Conv2d(in_channels, 6, kernel_size=5),
nn.Tanh(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(6, 16, kernel_size=5),
nn.Tanh(),
nn.MaxPool2d(kernel_size=2)
)
self.classifier = nn.Sequential(
nn.Linear(16*5*5, 120),
nn.Tanh(),
nn.Linear(120, 84),
nn.Tanh(),
nn.Linear(84, num_classes),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
logits = self.classifier(x)
probas = F.softmax(logits, dim=1)
return logits, probas
In [24]:
torch.manual_seed(RANDOM_SEED)
model = LeNet5(NUM_CLASSES, GRAYSCALE)
model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
Training¶
In [25]:
def compute_accuracy(model, data_loader, device):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(DEVICE)
targets = targets.to(DEVICE)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print (‘Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f’
%(epoch+1, NUM_EPOCHS, batch_idx,
len(train_loader), cost))
model.eval()
with torch.set_grad_enabled(False): # save memory during inference
print(‘Epoch: %03d/%03d | Train: %.3f%%’ % (
epoch+1, NUM_EPOCHS,
compute_accuracy(model, train_loader, device=DEVICE)))
print(‘Time elapsed: %.2f min’ % ((time.time() – start_time)/60))
print(‘Total Training Time: %.2f min’ % ((time.time() – start_time)/60))
Epoch: 001/010 | Batch 0000/0469 | Cost: 2.2909
Epoch: 001/010 | Batch 0050/0469 | Cost: 0.4785
Epoch: 001/010 | Batch 0100/0469 | Cost: 0.2938
Epoch: 001/010 | Batch 0150/0469 | Cost: 0.2415
Epoch: 001/010 | Batch 0200/0469 | Cost: 0.1336
Epoch: 001/010 | Batch 0250/0469 | Cost: 0.1198
Epoch: 001/010 | Batch 0300/0469 | Cost: 0.0857
Epoch: 001/010 | Batch 0350/0469 | Cost: 0.1963
Epoch: 001/010 | Batch 0400/0469 | Cost: 0.0496
Epoch: 001/010 | Batch 0450/0469 | Cost: 0.1120
Epoch: 001/010 | Train: 97.588%
Time elapsed: 0.75 min
Epoch: 002/010 | Batch 0000/0469 | Cost: 0.0804
Epoch: 002/010 | Batch 0050/0469 | Cost: 0.0900
Epoch: 002/010 | Batch 0100/0469 | Cost: 0.0339
Epoch: 002/010 | Batch 0150/0469 | Cost: 0.0394
Epoch: 002/010 | Batch 0200/0469 | Cost: 0.1655
Epoch: 002/010 | Batch 0250/0469 | Cost: 0.0482
Epoch: 002/010 | Batch 0300/0469 | Cost: 0.1046
Epoch: 002/010 | Batch 0350/0469 | Cost: 0.0716
Epoch: 002/010 | Batch 0400/0469 | Cost: 0.0511
Epoch: 002/010 | Batch 0450/0469 | Cost: 0.0881
Epoch: 002/010 | Train: 98.367%
Time elapsed: 1.53 min
Epoch: 003/010 | Batch 0000/0469 | Cost: 0.0780
Epoch: 003/010 | Batch 0050/0469 | Cost: 0.0201
Epoch: 003/010 | Batch 0100/0469 | Cost: 0.0201
Epoch: 003/010 | Batch 0150/0469 | Cost: 0.0233
Epoch: 003/010 | Batch 0200/0469 | Cost: 0.0355
Epoch: 003/010 | Batch 0250/0469 | Cost: 0.0728
Epoch: 003/010 | Batch 0300/0469 | Cost: 0.0196
Epoch: 003/010 | Batch 0350/0469 | Cost: 0.0538
Epoch: 003/010 | Batch 0400/0469 | Cost: 0.0544
Epoch: 003/010 | Batch 0450/0469 | Cost: 0.0497
Epoch: 003/010 | Train: 98.915%
Time elapsed: 2.33 min
Epoch: 004/010 | Batch 0000/0469 | Cost: 0.0154
Epoch: 004/010 | Batch 0050/0469 | Cost: 0.0116
Epoch: 004/010 | Batch 0100/0469 | Cost: 0.0123
Epoch: 004/010 | Batch 0150/0469 | Cost: 0.0328
Epoch: 004/010 | Batch 0200/0469 | Cost: 0.0213
Epoch: 004/010 | Batch 0250/0469 | Cost: 0.0147
Epoch: 004/010 | Batch 0300/0469 | Cost: 0.0270
Epoch: 004/010 | Batch 0350/0469 | Cost: 0.0192
Epoch: 004/010 | Batch 0400/0469 | Cost: 0.0078
Epoch: 004/010 | Batch 0450/0469 | Cost: 0.0158
Epoch: 004/010 | Train: 99.105%
Time elapsed: 3.14 min
Epoch: 005/010 | Batch 0000/0469 | Cost: 0.0299
Epoch: 005/010 | Batch 0050/0469 | Cost: 0.0720
Epoch: 005/010 | Batch 0100/0469 | Cost: 0.0279
Epoch: 005/010 | Batch 0150/0469 | Cost: 0.0355
Epoch: 005/010 | Batch 0200/0469 | Cost: 0.0418
Epoch: 005/010 | Batch 0250/0469 | Cost: 0.0477
Epoch: 005/010 | Batch 0300/0469 | Cost: 0.0455
Epoch: 005/010 | Batch 0350/0469 | Cost: 0.0398
Epoch: 005/010 | Batch 0400/0469 | Cost: 0.0118
Epoch: 005/010 | Batch 0450/0469 | Cost: 0.0045
Epoch: 005/010 | Train: 99.468%
Time elapsed: 3.95 min
Epoch: 006/010 | Batch 0000/0469 | Cost: 0.0352
Epoch: 006/010 | Batch 0050/0469 | Cost: 0.0041
Epoch: 006/010 | Batch 0100/0469 | Cost: 0.0222
Epoch: 006/010 | Batch 0150/0469 | Cost: 0.0320
Epoch: 006/010 | Batch 0200/0469 | Cost: 0.0198
Epoch: 006/010 | Batch 0250/0469 | Cost: 0.0032
Epoch: 006/010 | Batch 0300/0469 | Cost: 0.0129
Epoch: 006/010 | Batch 0350/0469 | Cost: 0.0071
Epoch: 006/010 | Batch 0400/0469 | Cost: 0.0182
Epoch: 006/010 | Batch 0450/0469 | Cost: 0.0375
Epoch: 006/010 | Train: 99.645%
Time elapsed: 4.82 min
Epoch: 007/010 | Batch 0000/0469 | Cost: 0.0132
Epoch: 007/010 | Batch 0050/0469 | Cost: 0.0049
Epoch: 007/010 | Batch 0100/0469 | Cost: 0.0190
Epoch: 007/010 | Batch 0150/0469 | Cost: 0.0178
Epoch: 007/010 | Batch 0200/0469 | Cost: 0.0266
Epoch: 007/010 | Batch 0250/0469 | Cost: 0.0507
Epoch: 007/010 | Batch 0300/0469 | Cost: 0.0186
Epoch: 007/010 | Batch 0350/0469 | Cost: 0.0285
Epoch: 007/010 | Batch 0400/0469 | Cost: 0.0169
Epoch: 007/010 | Batch 0450/0469 | Cost: 0.0230
Epoch: 007/010 | Train: 99.497%
Time elapsed: 5.64 min
Epoch: 008/010 | Batch 0000/0469 | Cost: 0.0237
Epoch: 008/010 | Batch 0050/0469 | Cost: 0.0124
Epoch: 008/010 | Batch 0100/0469 | Cost: 0.0058
Epoch: 008/010 | Batch 0150/0469 | Cost: 0.0120
Epoch: 008/010 | Batch 0200/0469 | Cost: 0.0174
Epoch: 008/010 | Batch 0250/0469 | Cost: 0.0322
Epoch: 008/010 | Batch 0300/0469 | Cost: 0.0198
Epoch: 008/010 | Batch 0350/0469 | Cost: 0.0045
Epoch: 008/010 | Batch 0400/0469 | Cost: 0.0060
Epoch: 008/010 | Batch 0450/0469 | Cost: 0.0038
Epoch: 008/010 | Train: 99.740%
Time elapsed: 6.46 min
Epoch: 009/010 | Batch 0000/0469 | Cost: 0.0052
Epoch: 009/010 | Batch 0050/0469 | Cost: 0.0020
Epoch: 009/010 | Batch 0100/0469 | Cost: 0.0554
Epoch: 009/010 | Batch 0150/0469 | Cost: 0.0063
Epoch: 009/010 | Batch 0200/0469 | Cost: 0.0113
Epoch: 009/010 | Batch 0250/0469 | Cost: 0.0030
Epoch: 009/010 | Batch 0300/0469 | Cost: 0.0118
Epoch: 009/010 | Batch 0350/0469 | Cost: 0.0225
Epoch: 009/010 | Batch 0400/0469 | Cost: 0.0074
Epoch: 009/010 | Batch 0450/0469 | Cost: 0.0042
Epoch: 009/010 | Train: 99.617%
Time elapsed: 7.24 min
Epoch: 010/010 | Batch 0000/0469 | Cost: 0.0089
Epoch: 010/010 | Batch 0050/0469 | Cost: 0.0027
Epoch: 010/010 | Batch 0100/0469 | Cost: 0.0147
Epoch: 010/010 | Batch 0150/0469 | Cost: 0.0081
Epoch: 010/010 | Batch 0200/0469 | Cost: 0.0037
Epoch: 010/010 | Batch 0250/0469 | Cost: 0.0085
Epoch: 010/010 | Batch 0300/0469 | Cost: 0.0072
Epoch: 010/010 | Batch 0350/0469 | Cost: 0.0328
Epoch: 010/010 | Batch 0400/0469 | Cost: 0.0074
Epoch: 010/010 | Batch 0450/0469 | Cost: 0.0150
Epoch: 010/010 | Train: 99.803%
Time elapsed: 8.01 min
Total Training Time: 8.01 min
Evaluation¶
In [26]:
with torch.set_grad_enabled(False): # save memory during inference
print(‘Test accuracy: %.2f%%’ % (compute_accuracy(model, test_loader, device=DEVICE)))
Test accuracy: 98.88%
In [2]:
for batch_idx, (features, targets) in enumerate(test_loader):
features = features
targets = targets
break
nhwc_img = np.transpose(features[0], axes=(1, 2, 0))
nhw_img = np.squeeze(nhwc_img.numpy(), axis=2)
plt.imshow(nhw_img, cmap=’Greys’);
—————————————————————————
NameError Traceback (most recent call last)
—-> 1 for batch_idx, (features, targets) in enumerate(test_loader):
2
3 features = features
4 targets = targets
5 break
NameError: name ‘test_loader’ is not defined
In [1]:
model.eval()
logits, probas = model(features.to(device)[0, None])
print(‘Probability 7 %.2f%%’ % (probas[0][7]*100))
—————————————————————————
NameError Traceback (most recent call last)
—-> 1 model.eval()
2 logits, probas = model(features.to(device)[0, None])
3 print(‘Probability 7 %.2f%%’ % (probas[0][7]*100))
NameError: name ‘model’ is not defined
In [ ]: