IN2064 Practical Session 9 — Deep Learning 2
In this programming exercise your task is to use Tensorflow to classify reviews in the IMDB dataset. The possible labels are positive and negative, i.e. the task is binary classification.
- Fill in the required parts of the skeleton below.
- Train feed forward neural networks of different sizes (specified below) on the IMDB dataset
- Use Tensorboard to visualize the computation graph.
- Plot the training and validation losses for the different neural networks. What are the influences of the neural network capacity, L2 regularization and dropout?
In [1]:
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt
First download the data from https://syncandshare.lrz.de/dl/fiWqeMXAwSqt4Nr3u6b1Jr9S/imdb_data.npzIn [2]:
loader = np.load("imdb_data.npz") train_data = loader['train_data'] train_labels = loader['train_labels'] validation_data = loader['validation_data'] validation_labels = loader['validation_labels']
In [3]:
batch_size = 512
In [4]:
def batch_data(num_data, batch_size): """ Yield batches with indices until epoch is over. Parameters ---------- num_data: int The number of samples in the dataset. batch_size: int The batch size used using training. Returns ------- batch_ixs: np.array of ints with shape [batch_size,] Yields arrays of indices of size of the batch size until the epoch is over. """ data_ixs = np.random.permutation(np.arange(num_data)) ix = 0 while ix + batch_size < num_data: batch_ixs = data_ixs[ix:ix+batch_size] ix += batch_size yield batch_ixs class FeedForwardNet: """ Simple feed forward neural network class """ def __init__(self, hidden_sizes, name, l2_reg=0.0): """ FeedForwardNet constructor. Parameters ---------- hidden_sizes: list of ints The sizes of the hidden layers of the network. name: str The name of the network (used for a VariableScope) l2_reg: float The strength of L2 regularization (0 means no regularization) """ self.hidden_sizes = hidden_sizes self.name = name self.dropout = tf.placeholder_with_default(0.0, shape=(), name="dropout") self.l2_reg = l2_reg self.weights =[] self.biases =[] def build(self, data_dim): """ Construct the model. Parameters ---------- data_dim: int The dimensions of the data samples. Returns ------- None """ self.X = tf.placeholder(shape=[None, data_dim], dtype=tf.float32, name="data") #[NxD] self.Y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name="labels") #[Nx1] with tf.variable_scope(self.name): hidden = self.X for ix, hidden_size in enumerate(self.hidden_sizes): ### YOUR CODE HERE ### YOUR CODE HERE ### self.logits = ### YOUR CODE HERE ### self.l2_norm = ### YOUR CODE HERE ### self.cross_entropy_loss = ### YOUR CODE HERE ### self.accuracy = ### YOUR CODE HERE ### self.loss = ### YOUR CODE HERE ### self.optimizer = tf.train.AdamOptimizer() self.opt_op = self.optimizer.minimize(self.loss, var_list=[*self.weights, *self.biases]) def number_trainable_parameters(self): """ Compute number of trainable parameters in the model. Returns ------- num_params: int The number of trainable parameters. """ num_params = np.sum([np.prod(x.shape) for x in self.weights]) num_params += np.sum([np.prod(x.shape) for x in self.biases]) return int(num_params) def train(self, train_data, train_labels, val_data, val_labels, epochs=20, dropout=0.0, batch_size=512): """ Train the feed forward neural network. Parameters ---------- train_data: np.array, dtype float32, shape [N, D] The training data. N corresponds to the number of training samples, D to the dimensionality of the data samples/ train_labels: np.array, shape [N, 1] The labels of the training data. val_data: np.array, dtype float32, shape [N_val, D] The validation data. N_val corresponds to the number of validation samples, D to the dimensionality of the data samples/ val_labels: np.array, shape [N_val, 1] The labels of the training data. epochs: int The number of epochs to train for. dropout: float The dropout rate used during training. 0 corresponds to no dropout. batch_size: int The batch size used for training. Returns ------- None """ train_losses = [] train_accs = [] val_losses = [] val_accs = [] weight_norms = [] self.session = tf.Session() session = self.session with session.as_default(): session.run(tf.global_variables_initializer()) tr_loss, tr_acc= ### YOUR CODE HERE ### val_loss, val_acc= ### YOUR CODE HERE ### train_losses.append(tr_loss) train_accs.append(tr_acc) val_losses.append(val_loss) val_accs.append(val_acc) weight_norms.append(session.run(self.l2_norm)) for epoch in range(epochs): print(f"Epoch {epoch+1}/{epochs}") for batch_ixs in batch_data(len(train_data), batch_size): _ = session.run( ### YOUR CODE HERE ### ) tr_loss, tr_acc= session.run(### YOUR CODE HERE ###) val_loss, val_acc= session.run(### YOUR CODE HERE ###) train_losses.append(tr_loss) train_accs.append(tr_acc) val_losses.append(val_loss) val_accs.append(val_acc) weight_norms.append(session.run(self.l2_norm)) self.hist={'train_loss': np.array(train_losses), 'train_accuracy': np.array(train_accs), 'val_loss': np.array(val_losses), 'val_accuracy': np.array(val_accs), 'weight_norms': np.array(weight_norms)}
In [5]:
NN_small = FeedForwardNet([4,4], "small") NN_small.build(train_data.shape[1]) print(f"{NN_small.name} neural network has {NN_small.number_trainable_parameters()} trainable parameters.") NN_small.train(train_data, train_labels, validation_data, validation_labels)
small neural network has 4029 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [ ]:
summary_writer = tf.summary.FileWriter('logs',graph=NN_small.session.graph)
In [6]:
NN_medium = FeedForwardNet([16,16], "medium") NN_medium.build(train_data.shape[1]) print(f"{NN_medium.name} neural network has {NN_medium.number_trainable_parameters()} trainable parameters.") NN_medium.train(train_data, train_labels, validation_data, validation_labels)
medium neural network has 16305 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [7]:
NN_large = FeedForwardNet([512,512], "large") NN_large.build(train_data.shape[1]) print(f"{NN_large.name} neural network has {NN_large.number_trainable_parameters()} trainable parameters.") NN_large.train(train_data, train_labels, validation_data, validation_labels)
large neural network has 775681 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [8]:
NN_large_reg = FeedForwardNet([512,512], "large_reg", l2_reg=1e-2) NN_large_reg.build(train_data.shape[1]) print(f"{NN_large_reg.name} neural network has {NN_large_reg.number_trainable_parameters()} trainable parameters.") NN_large_reg.train(train_data, train_labels, validation_data, validation_labels)
large_reg neural network has 775681 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [11]:
NN_large_dropout = FeedForwardNet([512,512], "large_dropout") NN_large_dropout.build(train_data.shape[1]) print(f"{NN_large_dropout.name} neural network has {NN_large_dropout.number_trainable_parameters()} trainable parameters.") NN_large_dropout.train(train_data, train_labels, validation_data, validation_labels, dropout=0.5)
large_dropout2 neural network has 775681 trainable parameters. Epoch 1/20 Epoch 2/20 Epoch 3/20 Epoch 4/20 Epoch 5/20 Epoch 6/20 Epoch 7/20 Epoch 8/20 Epoch 9/20 Epoch 10/20 Epoch 11/20 Epoch 12/20 Epoch 13/20 Epoch 14/20 Epoch 15/20 Epoch 16/20 Epoch 17/20 Epoch 18/20 Epoch 19/20 Epoch 20/20
In [ ]:
fig = plt.figure(figsize=(10,6)) plt.plot(NN_small.hist['train_loss'], label="Training (small)", c="darkgreen") plt.plot(NN_small.hist['val_loss'], label="Validation (small)", c="darkgreen", linestyle="--") plt.plot(NN_medium.hist['train_loss'], label="Training (medium)", c="royalblue") plt.plot(NN_medium.hist['val_loss'], label="Validation (medium)", c="royalblue", linestyle="--") plt.plot(NN_large.hist['train_loss'], label="Training (large)", c="darkred") plt.plot(NN_large.hist['val_loss'], label="Validation (large)", c="darkred", linestyle="--") plt.plot(NN_large_reg.hist['train_loss'], label="Training (large w/ regularization)", c="orange") plt.plot(NN_large_reg.hist['val_loss'], label="Validation (large w/ regularization)", c="orange", linestyle="--") plt.plot(NN_large_dropout.hist['train_loss'], label="Training (large w/ dropout)", c="purple") plt.plot(NN_large_dropout.hist['val_loss'], label="Validation (large w/ dropout)", c="purple", linestyle="--") plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() plt.show()
In [ ]:
fig = plt.figure(figsize=(10,6)) plt.plot(NN_small.hist['train_accuracy'], label="Training (small)", c="darkgreen") plt.plot(NN_small.hist['val_accuracy'], label="Validation (small)", c="darkgreen", linestyle="--") plt.plot(NN_medium.hist['train_accuracy'], label="Training (medium)", c="royalblue") plt.plot(NN_medium.hist['val_accuracy'], label="Validation (medium)", c="royalblue", linestyle="--") plt.plot(NN_large.hist['train_accuracy'], label="Training (large)", c="darkred") plt.plot(NN_large.hist['val_accuracy'], label="Validation (large)", c="darkred", linestyle="--") plt.plot(NN_large_reg.hist['train_accuracy'], label="Training (large w/ regularization)", c="orange") plt.plot(NN_large_reg.hist['val_accuracy'], label="Validation (large w/ regularization)", c="orange", linestyle="--") plt.plot(NN_large_dropout.hist['train_accuracy'], label="Training (large w/ dropout)", c="purple") plt.plot(NN_large_dropout.hist['val_accuracy'], label="Validation (large w/ dropout)", c="purple", linestyle="--") plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend() plt.show()
In [ ]:
fig = plt.figure(figsize=(10,6)) plt.plot(NN_large.hist['weight_norms'], label="Weight norm (large)", c="darkred") plt.plot(NN_large_reg.hist['weight_norms'], label="Weight norm (large w/ regularization)", c="orange") plt.xlabel('Epoch') plt.ylabel('L2 norm of weights') plt.legend() plt.show()