Deep Learning Assignemnt 1
1.Preparation¶
1.1Import required packages¶
In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import seaborn as sns
from skimage import feature
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import shuffle
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from tensorflow import keras
1.2Mount the Google Drive to access the Dataset¶
In [0]:
from google.colab import drive
drive.mount(‘/content/gdrive’)
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(“/content/gdrive”, force_remount=True).
1.3Dataset preparation¶
In [0]:
def load_mnist(path, kind=’train’):
import os
import gzip
import numpy as np
“””Load MNIST data from `path`”””
labels_path = os.path.join(path,
‘%s-labels-idx1-ubyte.gz’
% kind)
images_path = os.path.join(path,
‘%s-images-idx3-ubyte.gz’
% kind)
with gzip.open(labels_path, ‘rb’) as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
offset=8)
with gzip.open(images_path, ‘rb’) as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8,
offset=16).reshape(len(labels), 784)
return images, labels
In [0]:
cd /content/gdrive/My Drive/42028-DL-CNN/assignment1/data/fashion
/content/gdrive/My Drive/42028-DL-CNN/assignment1/data/fashion
In [0]:
ls
t10k-images-idx3-ubyte.gz train-images-idx3-ubyte.gz
t10k-labels-idx1-ubyte.gz train-labels-idx1-ubyte.gz
1.3.1Split the dataset into train and test¶
In [0]:
X_train, y_train = load_mnist(‘/content/gdrive/My Drive/42028-DL-CNN’, kind=’train’)
X_test, y_test = load_mnist(‘/content/gdrive/My Drive/42028-DL-CNN’, kind=’t10k’)
labelNames = [“0”, “1”, “2”, “3”, “4”,”5″, “6”, “7”, “8”, “9”]
In [0]:
print(np.shape(X_train))
print(np.shape(X_test))
(60000, 784)
(10000, 784)
In [0]:
print(np.shape(train_images_raw))
(55000, 28, 28)
1.3.3 Reshaping the feature vector¶
In [0]:
X_train=X_train.reshape(-1,28,28)
X_test=X_test.reshape(-1,28,28)
print(“Train dataset after reshaping:{}”.format(np.shape(X_train)))
print(“Test dataset after reshaping :{}”.format(np.shape(X_test)))
Train dataset after reshaping:(60000, 28, 28)
Test dataset after reshaping :(10000, 28, 28)
1.3.4Visualization of Dataset¶
In [0]:
fig=plt.figure()
for i in range(100):
ax4=fig.add_subplot(10,10,i+1)
ax4.axis(‘off’)
ax4.imshow(X_train[i])
img_index=3
fig=plt.figure()
ax1=fig.add_subplot(1,10,1)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=5
ax1=fig.add_subplot(1,10,2)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=7
ax1=fig.add_subplot(1,10,3)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=9
ax1=fig.add_subplot(1,10,4)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=0
ax1=fig.add_subplot(1,10,5)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=18
ax1=fig.add_subplot(1,10,6)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=15
ax1=fig.add_subplot(1,10,7)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=17
ax1=fig.add_subplot(1,10,8)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=4
ax1=fig.add_subplot(1,10,9)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
img_index=1
ax1=fig.add_subplot(1,10,10)
ax1.axis(‘off’)
ax1.imshow(X_train[img_index])
print(labelNames[y_train[img_index]],end=”)
1234567890


Histogram-of-Oriented Gradient (HOG)¶
HOG feature extraction for the whole training dataset
In [0]:
data_train_hog = []
labels_train_hog = []
for img_index in range(len(X_train)):
image = X_train[img_index]
H = feature.hog(image, orientations=9, pixels_per_cell=(10, 10), cells_per_block=(2, 2), block_norm=”L2-Hys”, transform_sqrt=True)
data_train_hog.append(H)
labels_train_hog.append(y_train[img_index])
print(np.shape(data_train_hog))
print(np.shape(labels_train_hog))
(60000, 36)
(60000,)
Hog feature extraction for the whole testing dataset
In [0]:
data_test_hog=[]
for img_i in range(len(X_test)):
img= X_test[img_i]
feature_hog = feature.hog(img,orientations=9, pixels_per_cell=(10,10), cells_per_block=(2,2),transform_sqrt=True,block_norm=”L2-Hys”)
data_test_hog.append(feature_hog)
print(np.shape(data_test_hog))
print(np.shape(y_test))
(10000, 36)
(10000,)
In [0]:
img_index
Out[0]:
59999
2.2.1HOG with SVM¶
In [0]:
model = svm.SVC(kernel=’rbf’,C=100.0, random_state=42)
model.fit(data_train_hog, labels_train_hog)
print(“Train set Accuracy: {:.2f}”.format(model.score(data_train_hog,labels_train_hog)))
/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:196: FutureWarning: The default value of gamma will change from ‘auto’ to ‘scale’ in version 0.22 to account better for unscaled features. Set gamma explicitly to ‘auto’ or ‘scale’ to avoid this warning.
“avoid this warning.”, FutureWarning)
Train set Accuracy: 0.88
Evaluation of trained model on test dataset
In [0]:
predict_test = []
labels_test = []
data_test=[]
for img_ind in range(len(X_test)):
img=X_test[img_ind]
H = feature.hog(img, orientations=9, pixels_per_cell=(10, 10),
cells_per_block=(2, 2), transform_sqrt=True, block_norm=”L2-Hys”)
pred = model.predict(H.reshape(1, -1))[0]
predict_test.append(pred)
data_test.append(H)
labels_test.append(y_test[img_ind])
print(np.shape(predict_test))
print(np.shape(labels_test))
Extracting features from test dataset…
(10000,)
(10000,)
Testing Result
In [0]:
accuracy = metrics.accuracy_score(y_test, predict_test)
print(“Accuracy on test dataset:”,accuracy)
Accuracy on test dataset: 0.8859
2.2.2HOG with KNN¶
In [0]:
model_hog_knn = KNeighborsClassifier(n_neighbors=10,weights=’distance’)
model_hog_knn.fit(data_train_hog,labels_train_hog)
Out[0]:
KNeighborsClassifier(algorithm=’auto’, leaf_size=30, metric=’minkowski’,
metric_params=None, n_jobs=None, n_neighbors=10, p=2,
weights=’distance’)
Evaluation of trained model on test dataset
In [0]:
predictions_hog_knn = model_hog_knn.predict(data_test_hog)
In [0]:
accuracy = metrics.accuracy_score(y_test, predictions_hog_knn)
print(accuracy)
0.8811
2.2.3HOG with ANN¶
In [0]:
model_hog_ann = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=[36,]),
tf.keras.layers.Dense(128, activation=tf.nn.relu),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)])
model_hog_ann.compile(optimizer = tf.train.AdamOptimizer(),
loss = ‘sparse_categorical_crossentropy’,
metrics=[‘accuracy’])
In [0]:
valid_images_hog= np.array(data_train_hog)[:5000]/255.0
valid_labels_hog = np.array(labels_train_hog)[:5000]
train_images_hog = np.array(data_train_hog)[5000:]/255.0
train_labels_hog=np.array(labels_train_hog)[5000:]
test_images_hog = np.array(data_test_hog)/255.0
In [0]:
fit_hog_ann=model_hog_ann.fit(train_images_hog, train_labels_hog, epochs=20,validation_data=(valid_images_hog, valid_labels_hog))
Train on 55000 samples, validate on 5000 samples
Epoch 1/20
55000/55000 [==============================] – 3s 50us/sample – loss: 2.2805 – acc: 0.1362 – val_loss: 2.2255 – val_acc: 0.2238
Epoch 2/20
55000/55000 [==============================] – 2s 43us/sample – loss: 2.1027 – acc: 0.2967 – val_loss: 1.9690 – val_acc: 0.3626
Epoch 3/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.8552 – acc: 0.4165 – val_loss: 1.7473 – val_acc: 0.4522
Epoch 4/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.6707 – acc: 0.4705 – val_loss: 1.5860 – val_acc: 0.5032
Epoch 5/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.5367 – acc: 0.5113 – val_loss: 1.4688 – val_acc: 0.5364
Epoch 6/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.4298 – acc: 0.5496 – val_loss: 1.3660 – val_acc: 0.5972
Epoch 7/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.3370 – acc: 0.5864 – val_loss: 1.2804 – val_acc: 0.6042
Epoch 8/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.2529 – acc: 0.6121 – val_loss: 1.1907 – val_acc: 0.6560
Epoch 9/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.1771 – acc: 0.6369 – val_loss: 1.1218 – val_acc: 0.6716
Epoch 10/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.1095 – acc: 0.6576 – val_loss: 1.0557 – val_acc: 0.6846
Epoch 11/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.0506 – acc: 0.6770 – val_loss: 0.9946 – val_acc: 0.7058
Epoch 12/20
55000/55000 [==============================] – 2s 43us/sample – loss: 1.0001 – acc: 0.6911 – val_loss: 0.9482 – val_acc: 0.7140
Epoch 13/20
55000/55000 [==============================] – 2s 43us/sample – loss: 0.9559 – acc: 0.7059 – val_loss: 0.9062 – val_acc: 0.7260
Epoch 14/20
55000/55000 [==============================] – 2s 43us/sample – loss: 0.9178 – acc: 0.7166 – val_loss: 0.8683 – val_acc: 0.7412
Epoch 15/20
55000/55000 [==============================] – 2s 43us/sample – loss: 0.8852 – acc: 0.7257 – val_loss: 0.8345 – val_acc: 0.7482
Epoch 16/20
55000/55000 [==============================] – 2s 43us/sample – loss: 0.8564 – acc: 0.7332 – val_loss: 0.8059 – val_acc: 0.7588
Epoch 17/20
55000/55000 [==============================] – 2s 43us/sample – loss: 0.8304 – acc: 0.7402 – val_loss: 0.7826 – val_acc: 0.7626
Epoch 18/20
55000/55000 [==============================] – 2s 43us/sample – loss: 0.8080 – acc: 0.7461 – val_loss: 0.7678 – val_acc: 0.7718
Epoch 19/20
55000/55000 [==============================] – 2s 43us/sample – loss: 0.7878 – acc: 0.7524 – val_loss: 0.7428 – val_acc: 0.7688
Epoch 20/20
55000/55000 [==============================] – 2s 43us/sample – loss: 0.7700 – acc: 0.7567 – val_loss: 0.7341 – val_acc: 0.7624
Summary of the model
In [0]:
model_hog_ann.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten_5 (Flatten) (None, 36) 0
_________________________________________________________________
dense_10 (Dense) (None, 128) 4736
_________________________________________________________________
dense_11 (Dense) (None, 10) 1290
=================================================================
Total params: 6,026
Trainable params: 6,026
Non-trainable params: 0
_________________________________________________________________
Plot the learning curve
In [0]:
pd.DataFrame(fit_hog_ann.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
plt.plot(fit_hog_ann.history[‘loss’])
plt.ylabel(‘cost’)
plt.xlabel(‘Epochs’)
plt.title(“Cost/Loss Curve”)
plt.show()


Test
In [0]:
model_hog_ann.evaluate(test_images_hog, y_test)
10000/10000 [==============================] – 0s 26us/sample – loss: 0.7228 – acc: 0.7704
Out[0]:
[0.7228373427391053, 0.7704]