IN2064 Practical Session 9 — Deep Learning 2
In this programming exercise your task is to use Tensorflow to classify reviews in the IMDB dataset. The possible labels are positive and negative, i.e. the task is binary classification.
- Fill in the required parts of the skeleton below.
- Train feed forward neural networks of different sizes (specified below) on the IMDB dataset
- Use Tensorboard to visualize the computation graph.
- Plot the training and validation losses for the different neural networks. What are the influences of the neural network capacity, L2 regularization and dropout?
In [1]:
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt
First download the data from https://syncandshare.lrz.de/dl/fiWqeMXAwSqt4Nr3u6b1Jr9S/imdb_data.npzIn [2]:
loader = np.load("imdb_data.npz")
train_data = loader['train_data']
train_labels = loader['train_labels']
validation_data = loader['validation_data']
validation_labels = loader['validation_labels']
In [3]:
batch_size = 512
In [4]:
def batch_data(num_data, batch_size):
""" Yield batches with indices until epoch is over.
Parameters
----------
num_data: int
The number of samples in the dataset.
batch_size: int
The batch size used using training.
Returns
-------
batch_ixs: np.array of ints with shape [batch_size,]
Yields arrays of indices of size of the batch size until the epoch is over.
"""
data_ixs = np.random.permutation(np.arange(num_data))
ix = 0
while ix + batch_size < num_data:
batch_ixs = data_ixs[ix:ix+batch_size]
ix += batch_size
yield batch_ixs
class FeedForwardNet:
"""
Simple feed forward neural network class
"""
def __init__(self, hidden_sizes, name, l2_reg=0.0):
""" FeedForwardNet constructor.
Parameters
----------
hidden_sizes: list of ints
The sizes of the hidden layers of the network.
name: str
The name of the network (used for a VariableScope)
l2_reg: float
The strength of L2 regularization (0 means no regularization)
"""
self.hidden_sizes = hidden_sizes
self.name = name
self.dropout = tf.placeholder_with_default(0.0, shape=(), name="dropout")
self.l2_reg = l2_reg
self.weights =[]
self.biases =[]
def build(self, data_dim):
""" Construct the model.
Parameters
----------
data_dim: int
The dimensions of the data samples.
Returns
-------
None
"""
self.X = tf.placeholder(shape=[None, data_dim], dtype=tf.float32, name="data") #[NxD]
self.Y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name="labels") #[Nx1]
with tf.variable_scope(self.name):
hidden = self.X
for ix, hidden_size in enumerate(self.hidden_sizes):
### YOUR CODE HERE
### YOUR CODE HERE ###
self.logits = ### YOUR CODE HERE ###
self.l2_norm = ### YOUR CODE HERE ###
self.cross_entropy_loss = ### YOUR CODE HERE ###
self.accuracy = ### YOUR CODE HERE ###
self.loss = ### YOUR CODE HERE ###
self.optimizer = tf.train.AdamOptimizer()
self.opt_op = self.optimizer.minimize(self.loss, var_list=[*self.weights, *self.biases])
def number_trainable_parameters(self):
""" Compute number of trainable parameters in the model.
Returns
-------
num_params: int
The number of trainable parameters.
"""
num_params = np.sum([np.prod(x.shape) for x in self.weights])
num_params += np.sum([np.prod(x.shape) for x in self.biases])
return int(num_params)
def train(self, train_data, train_labels, val_data, val_labels, epochs=20, dropout=0.0, batch_size=512):
""" Train the feed forward neural network.
Parameters
----------
train_data: np.array, dtype float32, shape [N, D]
The training data. N corresponds to the number of training samples, D to the dimensionality of the data samples/
train_labels: np.array, shape [N, 1]
The labels of the training data.
val_data: np.array, dtype float32, shape [N_val, D]
The validation data. N_val corresponds to the number of validation samples, D to the dimensionality of the data samples/
val_labels: np.array, shape [N_val, 1]
The labels of the training data.
epochs: int
The number of epochs to train for.
dropout: float
The dropout rate used during training. 0 corresponds to no dropout.
batch_size: int
The batch size used for training.
Returns
-------
None
"""
train_losses = []
train_accs = []
val_losses = []
val_accs = []
weight_norms = []
self.session = tf.Session()
session = self.session
with session.as_default():
session.run(tf.global_variables_initializer())
tr_loss, tr_acc= ### YOUR CODE HERE ###
val_loss, val_acc= ### YOUR CODE HERE ###
train_losses.append(tr_loss)
train_accs.append(tr_acc)
val_losses.append(val_loss)
val_accs.append(val_acc)
weight_norms.append(session.run(self.l2_norm))
for epoch in range(epochs):
print(f"Epoch {epoch+1}/{epochs}")
for batch_ixs in batch_data(len(train_data), batch_size):
_ = session.run( ### YOUR CODE HERE ### )
tr_loss, tr_acc= session.run(### YOUR CODE HERE ###)
val_loss, val_acc= session.run(### YOUR CODE HERE ###)
train_losses.append(tr_loss)
train_accs.append(tr_acc)
val_losses.append(val_loss)
val_accs.append(val_acc)
weight_norms.append(session.run(self.l2_norm))
self.hist={'train_loss': np.array(train_losses),
'train_accuracy': np.array(train_accs),
'val_loss': np.array(val_losses),
'val_accuracy': np.array(val_accs),
'weight_norms': np.array(weight_norms)}
In [5]:
NN_small = FeedForwardNet([4,4], "small")
NN_small.build(train_data.shape[1])
print(f"{NN_small.name} neural network has {NN_small.number_trainable_parameters()} trainable parameters.")
NN_small.train(train_data, train_labels, validation_data, validation_labels)
small neural network has 4029 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [ ]:
summary_writer = tf.summary.FileWriter('logs',graph=NN_small.session.graph)
In [6]:
NN_medium = FeedForwardNet([16,16], "medium")
NN_medium.build(train_data.shape[1])
print(f"{NN_medium.name} neural network has {NN_medium.number_trainable_parameters()} trainable parameters.")
NN_medium.train(train_data, train_labels, validation_data, validation_labels)
medium neural network has 16305 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [7]:
NN_large = FeedForwardNet([512,512], "large")
NN_large.build(train_data.shape[1])
print(f"{NN_large.name} neural network has {NN_large.number_trainable_parameters()} trainable parameters.")
NN_large.train(train_data, train_labels, validation_data, validation_labels)
large neural network has 775681 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [8]:
NN_large_reg = FeedForwardNet([512,512], "large_reg", l2_reg=1e-2)
NN_large_reg.build(train_data.shape[1])
print(f"{NN_large_reg.name} neural network has {NN_large_reg.number_trainable_parameters()} trainable parameters.")
NN_large_reg.train(train_data, train_labels, validation_data, validation_labels)
large_reg neural network has 775681 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [11]:
NN_large_dropout = FeedForwardNet([512,512], "large_dropout")
NN_large_dropout.build(train_data.shape[1])
print(f"{NN_large_dropout.name} neural network has {NN_large_dropout.number_trainable_parameters()} trainable parameters.")
NN_large_dropout.train(train_data, train_labels, validation_data, validation_labels, dropout=0.5)
large_dropout2 neural network has 775681 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [ ]:
fig = plt.figure(figsize=(10,6))
plt.plot(NN_small.hist['train_loss'], label="Training (small)", c="darkgreen")
plt.plot(NN_small.hist['val_loss'], label="Validation (small)", c="darkgreen", linestyle="--")
plt.plot(NN_medium.hist['train_loss'], label="Training (medium)", c="royalblue")
plt.plot(NN_medium.hist['val_loss'], label="Validation (medium)", c="royalblue", linestyle="--")
plt.plot(NN_large.hist['train_loss'], label="Training (large)", c="darkred")
plt.plot(NN_large.hist['val_loss'], label="Validation (large)", c="darkred", linestyle="--")
plt.plot(NN_large_reg.hist['train_loss'], label="Training (large w/ regularization)", c="orange")
plt.plot(NN_large_reg.hist['val_loss'], label="Validation (large w/ regularization)", c="orange", linestyle="--")
plt.plot(NN_large_dropout.hist['train_loss'], label="Training (large w/ dropout)", c="purple")
plt.plot(NN_large_dropout.hist['val_loss'], label="Validation (large w/ dropout)", c="purple", linestyle="--")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
In [ ]:
fig = plt.figure(figsize=(10,6))
plt.plot(NN_small.hist['train_accuracy'], label="Training (small)", c="darkgreen")
plt.plot(NN_small.hist['val_accuracy'], label="Validation (small)", c="darkgreen", linestyle="--")
plt.plot(NN_medium.hist['train_accuracy'], label="Training (medium)", c="royalblue")
plt.plot(NN_medium.hist['val_accuracy'], label="Validation (medium)", c="royalblue", linestyle="--")
plt.plot(NN_large.hist['train_accuracy'], label="Training (large)", c="darkred")
plt.plot(NN_large.hist['val_accuracy'], label="Validation (large)", c="darkred", linestyle="--")
plt.plot(NN_large_reg.hist['train_accuracy'], label="Training (large w/ regularization)", c="orange")
plt.plot(NN_large_reg.hist['val_accuracy'], label="Validation (large w/ regularization)", c="orange", linestyle="--")
plt.plot(NN_large_dropout.hist['train_accuracy'], label="Training (large w/ dropout)", c="purple")
plt.plot(NN_large_dropout.hist['val_accuracy'], label="Validation (large w/ dropout)", c="purple", linestyle="--")
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
In [ ]:
fig = plt.figure(figsize=(10,6))
plt.plot(NN_large.hist['weight_norms'], label="Weight norm (large)", c="darkred")
plt.plot(NN_large_reg.hist['weight_norms'], label="Weight norm (large w/ regularization)", c="orange")
plt.xlabel('Epoch')
plt.ylabel('L2 norm of weights')
plt.legend()
plt.show()