程序代写代做代考 deep learning In [9]:

In [9]:
”’Import data”’

import h5py
import numpy as np
import pandas as pd

”’PLEASE REPLACE THE LOCATION OF THE FOLLOWING 3 INPUT FILES ACCORDINLY ”’

with h5py.File(r”C:\Users\nqtru\Desktop\COMP5329\Assignment 1\Assignment-1-Dataset\train_128.h5″,’r’) as TU:
data = np.copy(TU[‘data’])
with h5py.File(r”C:\Users\nqtru\Desktop\COMP5329\Assignment 1\Assignment-1-Dataset\train_label.h5″,’r’) as TL:
label = np.copy(TL[‘label’])
# Import predicted outputs for test data from Predicted_labels.h5
with h5py.File(r”C:\Users\nqtru\Desktop\COMP5329\Assignment 1\470518197_470490653_308012798\Code\Output\Predicted_labels.h5″,’r’) as TB:
predicted_label = np.copy(TB[‘label’])

”’Standardise the data”’

mu = data.mean()
sigma = data.std()
data = (data – mu) / sigma
In [2]:
”’Define class for activation functions”’

class Activation(object):

def relu (self,x):
return np.maximum(x,0)

def relu_deriv (self,a):
# a = np.maximum(x,0)
a[a<0] = 0 a[a>=0] = 1
return a

def leaky_relu (self,x):
return np.maximum(x,0.1*x)

def leaky_relu_deriv (self,a):
# a = np.maximum(x,0.1*x)
a[a<0] = 0.1 a[a>=0] = 1
return a

def softmax (self,x):
# Normalise the input to prevent overflow problem in np.exp
x_max = x.max()
x_norm = x – x_max
return np.exp(x_norm) / np.sum(np.exp(x_norm), axis=0)

def softmax_deriv (self, a):
#a = np.exp(x) / np.sum(np.exp(x), axis=0)
return a * (1 – a )

def tanh(self, x):
return np.tanh(x)

def tanh_deriv(self, a):
# a = np.tanh(x)
return 1.0 – a**2

def logistic(self, x):
return 1.0 / (1.0 + np.exp(-x))

def logistic_deriv(self, a):
# a = logistic(x)
return a * (1 – a )

def __init__(self,activation=’relu’):
if activation == ‘logistic’:
self.f = self.logistic
self.f_deriv = self.logistic_deriv
elif activation == ‘softmax’:
self.f = self.softmax
self.f_deriv = self.softmax_deriv
elif activation == ‘tanh’:
self.f = self.tanh
self.f_deriv = self.tanh_deriv
elif activation == ‘relu’:
self.f = self.relu
self.f_deriv = self.relu_deriv
elif activation == ‘leaky relu’:
self.f = self.leaky_relu
self.f_deriv = self.leaky_relu_deriv

In [3]:
”’Define class for one hidden layer”’

class HiddenLayer(object):

# To keep the initialised weights and biases stable in across epochs
import random
random.seed(1)

# Initialisation
def __init__(self,n_in, n_out, W=None, b=None, activation=[0,1]):
“””
:type n_in: int
:param n_in: dimensionality of input

:type n_out: int
:param n_out: number of hidden units

:type activation: list of string
:param activation: Non linearity to be applied in the hidden
layer
“””
self.input=None
self.activation=Activation(activation[0]).f # Current layer’s activation function
self.activation_deriv=Activation(activation[1]).f_deriv # Previous layer’s derivative activation function

”’
Initialize W
Weight matrix W is of shape (n_in,n_out)
”’
# Uniformly sampled with a variance of 2/n_in (He, et al, 2015)
if activation[0] == ‘relu’:
self.W = np.random.uniform(
low=-np.sqrt(2. / (n_in)),
high=np.sqrt(2. / (n_in)),
size=(n_in, n_out)
)
# Uniformly sampled with a variance of 6/(n_in+n_out) (Glorot & Bengio, 2010)
else:
self.W = np.random.uniform(
low=-np.sqrt(6. / (n_in + n_out)),
high=np.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)
)
# 4 times larger weights for logistic activation (Glorot & Bengio, 2010)
if activation[0] == ‘logistic’:
self.W *= 4

”’
Initialize b
The bias vector b is of shape (n_out,)
”’
self.b = np.zeros(n_out,)

”’ Intialise gradients of W,b of same shape as W,b”’
self.grad_W = np.zeros(self.W.shape)
self.grad_b = np.zeros(self.b.shape)

”’ Intialise velocities of W,b for Momentum & Adam updates, same shape as W,b”’
self.velocity_W = np.zeros(self.W.shape)
self.velocity_b = np.zeros(self.b.shape)

”’ Intialise squared gradients for Adam update, same shape as W,b ”’
self.sqr_grad_W = np.zeros(self.W.shape)
self.sqr_grad_b = np.zeros(self.b.shape)

”’Forward Propagation”’
def forward(self, input):
”’
:type input: numpy.array
:input: input data/activations from previous layers
”’
#hidden_layer_input= matrix_dot_product(X,wh) + bh
lin_output = np.dot(input, self.W) + self.b
self.output = (
lin_output if self.activation is None
else self.activation(lin_output)
)
self.input=input
return self.output

”’Backward Propagation”’
def backward(self, delta, learning_rate):
# Calculate gradients of W,b
self.grad_W = np.atleast_2d(self.input).T.dot(np.atleast_2d(delta))
self.grad_b = delta
# return delta_ for next layer
delta_ = delta.dot(self.W.T) * self.activation_deriv(self.input)
return delta_

”’Update parameters with momentum”’
def momentum_update(self, learning_rate=0.001, beta=0.9):
# Update velocities of W,b
self.velocity_W = beta*self.velocity_W + (1-beta)*self.grad_W
self.velocity_b = beta*self.velocity_b + (1-beta)*self.grad_b
# Update W,b
self.W -= learning_rate * self.velocity_W
self.b -= learning_rate * self.velocity_b

”’Update parameters with Adam”’
### Reference: Andrew Ng’s Deep Learning on Coursera
### https://www.coursera.org/specializations/deep-learning
def adam_update(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
”’
:beta1, beta2: values of beta (float) for Adam update
:epsilon: a very small float
”’
# Update velocity of W,b
self.velocity_W = beta1*self.velocity_W + (1-beta1)*self.grad_W
self.velocity_b = beta1*self.velocity_b + (1-beta1)*self.grad_b
# Correct the velocity of W,b
velocity_corrected_W = self.velocity_W / (1-beta1**2+epsilon) # added epsilon to prevent 0 denominator
velocity_corrected_b = self.velocity_b / (1-beta1**2+epsilon) # added epsilon to prevent 0 denominator
# Calculate squared gradients of W,b
self.sqr_grad_W = beta2 * self.sqr_grad_W + (1-beta2) * np.power(self.grad_W,2)
self.sqr_grad_b = beta2 * self.sqr_grad_b + (1-beta2) * np.power(self.grad_b,2)
# Correct the squared gradients of W,b
sqr_grad_corrected_W = self.sqr_grad_W / (1-beta2**2+epsilon) # added epsilon to prevent 0 denominator
sqr_grad_corrected_b = self.sqr_grad_b / (1-beta2**2+epsilon) # added epsilon to prevent 0 denominator
# Update W,b
self.W -= learning_rate * (velocity_corrected_W/(np.sqrt(sqr_grad_corrected_W)+epsilon)) # added epsilon to prevent 0 denominator
self.b -= learning_rate * (velocity_corrected_b/(np.sqrt(sqr_grad_corrected_b)+epsilon)) # added epsilon to prevent 0 denominator

In [4]:
”’Define class for the neural network”’

class MLP:

# Initialisation
def __init__(self, layers, activations):
“””
:param layers: A list containing the number of units in each layer.
Should be at least two values
:param activations: The list of activation functions to be used. Can be
“logistic”, “tanh”, “relu” , “leaky relu” or “softmax
“””
### initialize layers
self.layers=[]
self.activations = activations # activation functions of each layer, index corresponds layer number
self.output_count = layers[-1] # number of output in the output layer
self.params=[]
self.dropout_fraction = 1 # 1 means no dropout, otherwise would be between 0 and 1

# Create hidden layers
for i in range(len(layers)-1):
# Create a new layer with params: number of inputs, outputs, activation of itself and of previous layer
self.layers.append(HiddenLayer(layers[i],layers[i+1],activation=[activations[i],activations[i-1]]))

”’Forward propagation”’
def forward(self,input,predict):
“””
:type input: numpy.array
:input: input data/activations from previous layers
:type input: boolean
:predict: True means forward step in prediction,
False means forward step in training
“””
for i in range(len(self.layers)):
output = self.layers[i].forward(input)
# Apply dropout between hidden layers only
if i > 0 and i < len(self.layers)-1 and not predict: # Dropout in training steps output = self.dropout(output) elif predict: # Dropout in predict step output = output * self.dropout_fraction input=output return output '''Dropout module''' ### Reference: Andrew Ng's Deep Learning on Coursera ### https://www.coursera.org/specializations/deep-learning def dropout(self,input): # Create a random dropout array with given dropout percentage dropout_array = np.random.binomial(1, self.dropout_fraction, size=input.shape[0]) input = input * dropout_array return input '''Mean Squared Error''' def criterion_MSE(self,y,y_hat): # Convert y_hat into an array of size 10 consisting of 0,1 (probability of each class) y_true = np.zeros(y_hat.shape) y_true[y] = 1 activation_deriv=Activation(self.activations[-1]).f_deriv # MSE error = y_true-y_hat loss=error**2 # write down the delta in the last layer delta=-error*activation_deriv(y_hat) # return loss and delta return loss,delta '''Cross Entropy''' def cross_entropy_loss(self,y,y_hat): # Convert y_hat into an array of size 10 consisting of 0,1 (probability of each class) y_true = np.zeros(y_hat.shape) y_true[y] = 1 # Cross entropy loss loss = -np.log(y_hat[y]+1e-15) # adding 1e-15 to prevent log(0) # write down the delta in the last layer delta = y_hat-y_true return loss,delta '''Backward propagation''' def backward(self,delta,learning_rate=0.001): for layer in reversed(self.layers): delta = layer.backward(delta,learning_rate) '''Gradient Descent parameters W,b updates''' def update(self,learning_rate=0.001,beta1=0.9,beta2=0.999,optimizer=None): """ :learning_rate: learning rate (float) :beta1, beta2: beta values (float) for Momentum & Adam updates :optimizer: Gradient Descent optimizer, can be "momentum", "adam" or None """ for layer in self.layers: # Momentum if optimizer == 'momentum': layer.momentum_update(learning_rate,beta1) # Adam elif optimizer == 'adam': layer.adam_update(learning_rate,beta1,beta2,1e-8) # Stochastic only elif optimizer == None: layer.W -= learning_rate * layer.grad_W layer.b -= learning_rate * layer.grad_b '''Fit training data''' def fit(self,X_train,y_train,X_test,y_test,learning_rate=0.001, beta1=0.9, beta2=0.999, epochs=100, optimizer=None, dropout=1): """ Online learning. :param X_train: Train data or features :param y_train: Train targets :param X_test: Test data or features :param y_test: Test targets :param learning_rate: parameters defining the speed of learning :param beta1, beta2: Beta values for Momentum / Adam updates :param epochs: number of times the dataset is presented to the network for learning :optimizer: Gradient Descent optimizer, can be "momentum", "adam" or None :dropout: Dropout fraction between 0 to 1, 1 means no dropout """ # Store dropout fraction self.dropout_fraction = dropout # To store test accuracy of each epoch accuracies = [] # To store max test accuracy, min loss and their epoch max_accuracy = 0 max_epoch = -1 min_loss = 1e20 min_epoch = -1 # Convert inputs into arrays X=np.array(X_train) y=np.array(y_train) # Array to store the loss of each epoch to_return = np.zeros(epochs) '''Train data for each epoch''' for k in range(epochs): # Initialise loss for each training sample loss=np.zeros(X.shape[0]) # For each training sample for it in range(X.shape[0]): i=np.random.randint(X.shape[0]) # forward pass y_hat = self.forward(X[i],False) # backward pass loss[it],delta=self.cross_entropy_loss(y[i],y_hat) self.backward(delta,learning_rate) # update parameters self.update(learning_rate,beta1,beta2,optimizer) # Calculate the mean loss over all training samples to_return[k] = np.mean(loss) '''Calculate and print accuracy, loss in each epoch''' # Predict on test data y_pred_test = self.predict(X_test) y_pred_test = np.argmax(y_pred_test,axis=1) # Predict on train data y_pred_train = self.predict(X_train) y_pred_train = np.argmax(y_pred_train,axis=1) # Print test/train accuracies and loss print('Epoch: {}\t| Test Accuracy: {:0.2f}% | Train Accuracy: {:0.2f}% | Loss: {:0.4f}'.format(k+1,getAccuracy(y_test, y_pred_test)*100,getAccuracy(y_train, y_pred_train)*100,to_return[k])) # Keep test accuracy accuracies.append(getAccuracy(y_test, y_pred_test)*100) # Obtain max accuracy and min loss values if getAccuracy(y_test, y_pred_test)*100 > max_accuracy:
max_accuracy = getAccuracy(y_test, y_pred_test)*100
max_epoch = k+1
if to_return[k] < min_loss: min_loss = to_return[k] min_epoch = k+1 # Calculate and print mean accuracy, loss over all epochs print('Mean Accuracy: {:0.2f}%'.format(np.mean(accuracies))) print('Maximum Accuracy: {:0.2f}% reached at epoch {}'.format(max_accuracy,max_epoch)) print('Mean Loss: {:0.4f}'.format(np.mean(to_return))) print('Minimum Loss: {:0.4f} reached at epoch {}'.format(min_loss,min_epoch)) # Return loss values of all epochs return to_return '''Predict output for test data''' def predict(self, x): x = np.array(x) output = np.zeros((x.shape[0],self.output_count)) for i in np.arange(x.shape[0]): output[i] = nn.forward(x[i,:],True) return output In [5]: '''Calculate accuracy between predict and true outputs''' def getAccuracy(y_true, y_pred): correct = np.sum(y_true == y_pred) return (correct/float(len(y_true))) In [97]: %%time '''Define training and test data of ratio 9:1''' # Test x_train = data[0:54000,:] y_train = label[0:54000] # Train x_test = data[54000:60000,:] y_test = label[54000:60000] '''Initialise the proposed neural network''' nn = MLP([128,100,100,10],['tanh','relu','softmax']) '''Fit data to train the model''' loss = nn.fit(x_train, y_train, x_test, y_test, learning_rate=0.001, beta1=0.9, beta2=0.999, epochs=50, optimizer='momentum', dropout=1) '''Plot loss''' import matplotlib.pyplot as pl from ipywidgets import interact, widgets from matplotlib import animation pl.figure(figsize=(15,4)) pl.plot(loss) pl.grid() Epoch: 1 | Test Accuracy: 84.83% | Train Accuracy: 85.06% | Loss: 0.5350 Epoch: 2 | Test Accuracy: 85.80% | Train Accuracy: 86.53% | Loss: 0.3970 Epoch: 3 | Test Accuracy: 86.53% | Train Accuracy: 87.31% | Loss: 0.3616 Epoch: 4 | Test Accuracy: 87.33% | Train Accuracy: 88.24% | Loss: 0.3446 Epoch: 5 | Test Accuracy: 87.42% | Train Accuracy: 88.64% | Loss: 0.3254 Epoch: 6 | Test Accuracy: 87.48% | Train Accuracy: 88.81% | Loss: 0.3060 Epoch: 7 | Test Accuracy: 87.10% | Train Accuracy: 88.83% | Loss: 0.2998 Epoch: 8 | Test Accuracy: 87.97% | Train Accuracy: 89.67% | Loss: 0.2886 Epoch: 9 | Test Accuracy: 88.02% | Train Accuracy: 89.86% | Loss: 0.2825 Epoch: 10 | Test Accuracy: 88.07% | Train Accuracy: 90.19% | Loss: 0.2751 Epoch: 11 | Test Accuracy: 88.10% | Train Accuracy: 90.17% | Loss: 0.2662 Epoch: 12 | Test Accuracy: 88.28% | Train Accuracy: 90.76% | Loss: 0.2574 Epoch: 13 | Test Accuracy: 88.12% | Train Accuracy: 90.71% | Loss: 0.2521 Epoch: 14 | Test Accuracy: 88.52% | Train Accuracy: 90.91% | Loss: 0.2464 Epoch: 15 | Test Accuracy: 88.05% | Train Accuracy: 91.39% | Loss: 0.2416 Epoch: 16 | Test Accuracy: 88.40% | Train Accuracy: 91.12% | Loss: 0.2401 Epoch: 17 | Test Accuracy: 88.28% | Train Accuracy: 91.45% | Loss: 0.2317 Epoch: 18 | Test Accuracy: 88.50% | Train Accuracy: 91.73% | Loss: 0.2277 Epoch: 19 | Test Accuracy: 88.30% | Train Accuracy: 91.91% | Loss: 0.2211 Epoch: 20 | Test Accuracy: 88.02% | Train Accuracy: 91.99% | Loss: 0.2215 Epoch: 21 | Test Accuracy: 88.47% | Train Accuracy: 91.65% | Loss: 0.2132 Epoch: 22 | Test Accuracy: 88.10% | Train Accuracy: 91.66% | Loss: 0.2149 Epoch: 23 | Test Accuracy: 88.27% | Train Accuracy: 92.24% | Loss: 0.2060 Epoch: 24 | Test Accuracy: 88.00% | Train Accuracy: 92.37% | Loss: 0.2067 Epoch: 25 | Test Accuracy: 88.42% | Train Accuracy: 92.69% | Loss: 0.1994 Epoch: 26 | Test Accuracy: 88.18% | Train Accuracy: 92.65% | Loss: 0.2006 Epoch: 27 | Test Accuracy: 88.42% | Train Accuracy: 92.87% | Loss: 0.1991 Epoch: 28 | Test Accuracy: 88.55% | Train Accuracy: 92.63% | Loss: 0.1907 Epoch: 29 | Test Accuracy: 88.33% | Train Accuracy: 92.62% | Loss: 0.1873 Epoch: 30 | Test Accuracy: 88.30% | Train Accuracy: 92.93% | Loss: 0.1939 Epoch: 31 | Test Accuracy: 88.48% | Train Accuracy: 93.24% | Loss: 0.1840 Epoch: 32 | Test Accuracy: 88.27% | Train Accuracy: 93.44% | Loss: 0.1846 Epoch: 33 | Test Accuracy: 88.58% | Train Accuracy: 93.47% | Loss: 0.1766 Epoch: 34 | Test Accuracy: 87.98% | Train Accuracy: 93.28% | Loss: 0.1775 Epoch: 35 | Test Accuracy: 88.25% | Train Accuracy: 93.89% | Loss: 0.1757 Epoch: 36 | Test Accuracy: 88.07% | Train Accuracy: 93.63% | Loss: 0.1743 Epoch: 37 | Test Accuracy: 88.33% | Train Accuracy: 93.57% | Loss: 0.1698 Epoch: 38 | Test Accuracy: 88.30% | Train Accuracy: 93.51% | Loss: 0.1690 Epoch: 39 | Test Accuracy: 88.22% | Train Accuracy: 94.08% | Loss: 0.1665 Epoch: 40 | Test Accuracy: 88.32% | Train Accuracy: 94.36% | Loss: 0.1652 Epoch: 41 | Test Accuracy: 88.30% | Train Accuracy: 94.11% | Loss: 0.1613 Epoch: 42 | Test Accuracy: 88.53% | Train Accuracy: 94.01% | Loss: 0.1625 Epoch: 43 | Test Accuracy: 88.17% | Train Accuracy: 94.55% | Loss: 0.1596 Epoch: 44 | Test Accuracy: 88.38% | Train Accuracy: 94.42% | Loss: 0.1560 Epoch: 45 | Test Accuracy: 88.32% | Train Accuracy: 94.61% | Loss: 0.1574 Epoch: 46 | Test Accuracy: 88.10% | Train Accuracy: 94.57% | Loss: 0.1570 Epoch: 47 | Test Accuracy: 88.20% | Train Accuracy: 94.46% | Loss: 0.1522 Epoch: 48 | Test Accuracy: 88.20% | Train Accuracy: 94.82% | Loss: 0.1559 Epoch: 49 | Test Accuracy: 88.07% | Train Accuracy: 94.89% | Loss: 0.1445 Epoch: 50 | Test Accuracy: 88.13% | Train Accuracy: 94.87% | Loss: 0.1451 Mean Accuracy: 88.03% Maximum Accuracy: 88.58% reached at epoch 33 Mean Loss: 0.2226 Minimum Loss: 0.1445 reached at epoch 49 Wall time: 28min 4s  In [7]: '''Check predicted outputs for test data from Predicted_labels.h5''' # Print its type, shape, first 100 predicted labels print(type(predicted_label)) print(predicted_label.shape) print(predicted_label[0:100])
(10000,)
[9 2 1 1 0 1 4 6 5 7 4 5 5 3 4 1 2 2 8 0 2 5 7 5 1 4 4 0 9 4 8 8 3 3 8 0 7
5 7 9 0 1 6 5 4 9 2 1 2 6 4 4 5 0 2 2 8 4 8 0 7 7 8 5 1 1 0 4 7 8 7 0 6 6
2 1 1 2 8 4 1 8 5 9 5 0 1 2 0 0 5 1 6 7 1 8 0 1 4 2]