MNIST classification using LBP feature and SVM¶
Step 1: Import required packages¶
In [0]:
import numpy as np
import matplotlib.pyplot as plt
from skimage import feature # This pacakge is used for LBP feature extraction
from sklearn import svm # This pacakge is used for svm classification
from sklearn import neighbors # this package is used for knn
from sklearn import metrics
%matplotlib inline
import cv2
import seaborn as sns # This pacakge is used for better visualization of data (e.g confusion matrix)
import tensorflow as tf
from tensorflow import keras
Step 2. Dataset preparation¶
In [2]:
from google.colab import drive
drive.mount(‘/content/gdrive’)
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code
Enter your authorization code:
··········
Mounted at /content/gdrive
In [11]:
cd /content/gdrive/My Drive/42028-DL-CNN/Ass1/Dataset
/content/gdrive/My Drive/42028-DL-CNN/Ass1/Dataset
In [21]:
ls
t10k-images-idx3-ubyte.gz train-labels-idx1-ubyte
t10k-labels-idx1-ubyte.gz train-labels-idx1-ubyte.gz
train-images-idx3-ubyte.gz
In [0]:
def load_mnist(path, kind=’train’):
import os
import gzip
import numpy as np
“””Load MNIST data from `path`”””
labels_path = os.path.join(path,
‘%s-labels-idx1-ubyte.gz’
% kind)
images_path = os.path.join(path,
‘%s-images-idx3-ubyte.gz’
% kind)
with gzip.open(labels_path, ‘rb’) as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
offset=8)
with gzip.open(images_path, ‘rb’) as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8,
offset=16).reshape(len(labels), 784)
return images, labels
In [0]:
X_train, y_train = load_mnist(‘/content/gdrive/My Drive/42028-DL-CNN/Ass1/Dataset’, kind=’train’)
X_test, y_test = load_mnist(‘/content/gdrive/My Drive/42028-DL-CNN/Ass1/Dataset’, kind=’t10k’)
#X_train, y_train = load_mnist(‘./Dataset/mnist’, kind=’train’)
#X_test, y_test = load_mnist(‘./Dataset/mnist’, kind=’t10k’)
In [0]:
# The 28X28 images are flattened to feature vector of size 784
# There are 60,000 training examples in the training dataset
# There are 10,000 test sample in the testing dataset
print(np.shape(X_train))
print(np.shape(X_test))
# print the labels
print(np.unique(y_train))
(60000, 784)
(10000, 784)
[0 1 2 3 4 5 6 7 8 9]
In [28]:
X_train = X_train.reshape(-1,28,28)
X_test = X_test.reshape(-1,28,28)
# print the size of the result reshaped train and test data splits
print(“Train dataset after reshaping:{}”.format(np.shape(X_train)))
print(“Test dataset after reshaping :{}”.format(np.shape(X_test)))
Train dataset after reshaping:(60000, 28, 28)
Test dataset after reshaping :(10000, 28, 28)
2. Visualization of Dataset¶
In [0]:
# view few images and print its corresponding label
def show_rand_samples(X, y):
from numpy.random import randint
fig = plt.figure()
labels = []
for i in range(10):
index = randint(0, len(X))
while y[index] != i:
index = randint(0, len(X))
labels.append(i)
ax = fig.add_subplot(1, 10, i+1)
ax.axis(‘off’)
ax.imshow(X[index])
print(labels)
In [31]:
show_rand_samples(X_train, y_train)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

3. Local Binary Patterns (LBP)¶
In [0]:
class LocalBinaryPatterns:
def __init__(self, numPoints, radius):
# store the number of points and radius
self.numPoints = numPoints
self.radius = radius
def LBPfeatures(self, image, eps=1e-7):
# compute the Local Binary Pattern representation
# of the image, and then use the LBP representation
# to build the histogram of patterns
lbp = feature.local_binary_pattern(image, self.numPoints,
self.radius, method=”uniform”)
# Form the histogram
(hist, _) = np.histogram(lbp.ravel(),
bins=np.arange(0, self.numPoints + 3),
range=(0, self.numPoints + 2))
# normalize the histogram
hist = hist.astype(“float”)
hist /= (hist.sum() + eps)
# return the histogram of Local Binary Patterns
return hist
LBP feature extraction for the whole training dataset
In [34]:
# Create an object of LocalBinaryPatterns class and initial the parameters.
radius = 3
n_points = 8 * radius
lbp = LocalBinaryPatterns(n_points, radius)
data_train_lbp = []
# loop over the training images
for i in range(len(X_train)):
# load the image, convert it to grayscale, and extract LBP features
image = (X_train[i])
hist = lbp.LBPfeatures(image)
data_train_lbp.append(hist)
data_train_lbp = np.asarray(data_train_lbp)
data_test_lbp = []
# Exract LBP features for each test sample and classify it with the trained SVM classifier
for i in range(len(X_test)):
image = X_test[i]
# Extract LBP feature
hist = lbp.LBPfeatures(image)
data_test_lbp.append(hist)
data_test_lbp = np.asarray(data_test_lbp)
print (data_train_lbp.shape)
print (data_test_lbp.shape)
(60000, 26)
(10000, 26)
3.1 SVM Classifier¶
Trainning SVM model
In [35]:
# train a linear SVM clasifier
svm_model = svm.LinearSVC(C=100.0, random_state=42, tol=1e-5, max_iter=10000)
# svm_model = svm.SVC(kernel=”rbf”, C=100.0, random_state=42, gamma=”scale”)
# Start training the SVM classifier
svm_model.fit(data_train_lbp, y_train)
print(np.shape(data_train_lbp))
print(np.shape(y_train))
(60000, 26)
(60000,)
In [37]:
# Check the training accuray
acc_svm_lbp = svm_model.score(data_train_lbp, y_train)
print(“Train set Accuracy: {:.2f}”.format(acc_svm_lbp))
# Expected training set Accuracy 0.60
Train set Accuracy: 0.55
Evaluation of trained SVM model on test dataset
In [0]:
# Perform classification
predict_svm_lbp = svm_model.predict(data_test_lbp)
In [39]:
acc_svm_lbp = metrics.accuracy_score(y_test, predict_svm_lbp)
print(“Accuracy on test dataset:”, acc_svm_lbp)
Accuracy on test dataset: 0.5605
#### 3.2 KNN classifier¶
Tranning KNN model
In [40]:
knn_model = neighbors.KNeighborsClassifier(n_neighbors=10, weights=’distance’)
knn_model.fit(data_train_lbp, y_train)
Out[40]:
KNeighborsClassifier(algorithm=’auto’, leaf_size=30, metric=’minkowski’,
metric_params=None, n_jobs=None, n_neighbors=10, p=2,
weights=’distance’)
Evaluation of trained kNN model on test dataset
In [0]:
predict_knn_lbp = knn_model.predict(data_test_lbp)
In [42]:
acc_knn_lbp = metrics.accuracy_score(y_test, predict_knn_lbp)
print(“Accuracy on test dataset:”, acc_knn_lbp)
Accuracy on test dataset: 0.5336
3.3 Neural Network classifier¶
In [43]:
ann_model = keras.models.Sequential([keras.layers.Flatten(input_shape=[26,]),
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(10, activation=tf.nn.softmax)])
ann_model.compile(optimizer = tf.train.AdamOptimizer(),
loss = ‘sparse_categorical_crossentropy’,
metrics=[‘accuracy’])
ann_model.summary()
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 26) 0
_________________________________________________________________
dense (Dense) (None, 128) 3456
_________________________________________________________________
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 4,746
Trainable params: 4,746
Non-trainable params: 0
_________________________________________________________________
In [45]:
fit_ann_lbp = ann_model.fit(data_train_lbp[5000:], y_train[5000:], epochs = 20,
validation_data = (data_train_lbp[:5000], y_train[:5000]))
Train on 55000 samples, validate on 5000 samples
Epoch 1/20
55000/55000 [==============================] – 3s 48us/sample – loss: 2.0198 – acc: 0.2734 – val_loss: 1.7300 – val_acc: 0.4332
Epoch 2/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.6092 – acc: 0.4574 – val_loss: 1.4884 – val_acc: 0.5198
Epoch 3/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.4474 – acc: 0.5050 – val_loss: 1.3714 – val_acc: 0.5344
Epoch 4/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.3669 – acc: 0.5229 – val_loss: 1.3096 – val_acc: 0.5574
Epoch 5/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.3229 – acc: 0.5314 – val_loss: 1.2780 – val_acc: 0.5554
Epoch 6/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2974 – acc: 0.5379 – val_loss: 1.2517 – val_acc: 0.5618
Epoch 7/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2801 – acc: 0.5421 – val_loss: 1.2375 – val_acc: 0.5710
Epoch 8/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2665 – acc: 0.5465 – val_loss: 1.2261 – val_acc: 0.5674
Epoch 9/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2549 – acc: 0.5494 – val_loss: 1.2128 – val_acc: 0.5758
Epoch 10/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2444 – acc: 0.5530 – val_loss: 1.2027 – val_acc: 0.5778
Epoch 11/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2352 – acc: 0.5584 – val_loss: 1.1925 – val_acc: 0.5796
Epoch 12/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2258 – acc: 0.5601 – val_loss: 1.1899 – val_acc: 0.5822
Epoch 13/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2177 – acc: 0.5640 – val_loss: 1.1775 – val_acc: 0.5868
Epoch 14/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2103 – acc: 0.5661 – val_loss: 1.1706 – val_acc: 0.5892
Epoch 15/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.2027 – acc: 0.5715 – val_loss: 1.1600 – val_acc: 0.5886
Epoch 16/20
55000/55000 [==============================] – 2s 41us/sample – loss: 1.1970 – acc: 0.5709 – val_loss: 1.1651 – val_acc: 0.5830
Epoch 17/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.1911 – acc: 0.5750 – val_loss: 1.1486 – val_acc: 0.5938
Epoch 18/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.1854 – acc: 0.5749 – val_loss: 1.1421 – val_acc: 0.5992
Epoch 19/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.1806 – acc: 0.5787 – val_loss: 1.1461 – val_acc: 0.5976
Epoch 20/20
55000/55000 [==============================] – 2s 42us/sample – loss: 1.1765 – acc: 0.5805 – val_loss: 1.1346 – val_acc: 0.6020
In [47]:
ann_model.evaluate(data_test_lbp, y_test)
10000/10000 [==============================] – 0s 24us/sample – loss: 1.1267 – acc: 0.5949
Out[47]:
[1.1267493160247803, 0.5949]