topic1-2-checkpoint
In [1]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import logging
from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer
from mlp.errors import CrossEntropyError, CrossEntropySoftmaxError
from mlp.models import SingleLayerModel, MultipleLayerModel
from mlp.initialisers import UniformInit
from mlp.learning_rules import GradientDescentLearningRule
from mlp.data_providers import MNISTDataProvider
from mlp.optimisers import Optimiser
plt.style.use(‘ggplot’)
In [2]:
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]
seed = 6102016
rng = np.random.RandomState(seed)
# Create data provider objects for the MNIST data set
train_data = MNISTDataProvider(‘train’, rng=rng)
valid_data = MNISTDataProvider(‘valid’, rng=rng)
input_dim, output_dim = 784, 10
In [11]:
# Set training run hyperparameters
# batch_size = 100 # number of data points in a batch
# init_scale = 0.01 # scale for random parameter initialisation
# learning_rate = 1 # learning rate for gradient descent
# num_epochs = 100 # number of training epochs to perform
# stats_interval = 1 # epoch interval between recording and printing stats
batch_size = 100 # number of data points in a batch
num_epochs = 100 # number of training epochs to perform
stats_interval = 5 # epoch interval between recording and printing stats
learning_rate = 0.2 # learning rate for gradient descent
init_scale = 0.5
# Reset random number generator and data provider states on each run
# to ensure reproducibility of results
rng.seed(seed)
train_data.reset()
valid_data.reset()
# Alter data-provider batch size
train_data.batch_size = batch_size
valid_data.batch_size = batch_size
# Create a parameter initialiser which will sample random uniform values
# from [-init_scale, init_scale]
param_init = UniformInit(-init_scale, init_scale, rng=rng)
# Create affine + softmax model
# model = MultipleLayerModel([
# AffineLayer(input_dim, output_dim, param_init, param_init),
# SoftmaxLayer()
# ])
hidden_dim = 100
model = MultipleLayerModel([
AffineLayer(input_dim, hidden_dim, param_init, param_init),
SigmoidLayer(),
AffineLayer(hidden_dim, hidden_dim, param_init, param_init),
SigmoidLayer(),
AffineLayer(hidden_dim, hidden_dim, param_init, param_init),
SigmoidLayer(),
AffineLayer(hidden_dim, hidden_dim, param_init, param_init),
SigmoidLayer(),
AffineLayer(hidden_dim, output_dim, param_init, param_init)
])
# # Initialise a cross entropy error object
# error = CrossEntropyError()
# # Use a basic gradient descent learning rule
# learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
# Initialise a cross entropy error object
error = CrossEntropySoftmaxError()
# Use a basic gradient descent learning rule
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
res = train_model_and_plot_stats(model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, False )
Epoch 0:
error(train)=3.36e+00, acc(train)=1.04e-01, error(valid)=3.33e+00, acc(valid)=1.09e-01, params_penalty=0.00e+00
Epoch 5: 0.89s to complete
error(train)=2.16e-01, acc(train)=9.36e-01, error(valid)=2.05e-01, acc(valid)=9.40e-01, params_penalty=0.00e+00
Epoch 10: 0.89s to complete
error(train)=1.41e-01, acc(train)=9.57e-01, error(valid)=1.52e-01, acc(valid)=9.57e-01, params_penalty=0.00e+00
Epoch 15: 0.89s to complete
error(train)=1.02e-01, acc(train)=9.69e-01, error(valid)=1.29e-01, acc(valid)=9.62e-01, params_penalty=0.00e+00
Epoch 20: 0.90s to complete
error(train)=7.68e-02, acc(train)=9.78e-01, error(valid)=1.21e-01, acc(valid)=9.66e-01, params_penalty=0.00e+00
Epoch 25: 1.03s to complete
error(train)=5.93e-02, acc(train)=9.83e-01, error(valid)=1.20e-01, acc(valid)=9.65e-01, params_penalty=0.00e+00
Epoch 30: 0.99s to complete
error(train)=4.85e-02, acc(train)=9.86e-01, error(valid)=1.23e-01, acc(valid)=9.66e-01, params_penalty=0.00e+00
Epoch 35: 0.87s to complete
error(train)=3.23e-02, acc(train)=9.92e-01, error(valid)=1.15e-01, acc(valid)=9.69e-01, params_penalty=0.00e+00
Epoch 40: 0.88s to complete
error(train)=2.37e-02, acc(train)=9.95e-01, error(valid)=1.13e-01, acc(valid)=9.69e-01, params_penalty=0.00e+00
Epoch 45: 0.87s to complete
error(train)=1.78e-02, acc(train)=9.97e-01, error(valid)=1.20e-01, acc(valid)=9.68e-01, params_penalty=0.00e+00
Epoch 50: 0.86s to complete
error(train)=1.40e-02, acc(train)=9.98e-01, error(valid)=1.23e-01, acc(valid)=9.69e-01, params_penalty=0.00e+00
Epoch 55: 0.89s to complete
error(train)=1.00e-02, acc(train)=9.99e-01, error(valid)=1.25e-01, acc(valid)=9.70e-01, params_penalty=0.00e+00
Epoch 60: 1.04s to complete
error(train)=8.09e-03, acc(train)=9.99e-01, error(valid)=1.29e-01, acc(valid)=9.70e-01, params_penalty=0.00e+00
Epoch 65: 1.06s to complete
error(train)=6.33e-03, acc(train)=1.00e+00, error(valid)=1.33e-01, acc(valid)=9.69e-01, params_penalty=0.00e+00
Epoch 70: 0.99s to complete
error(train)=4.86e-03, acc(train)=1.00e+00, error(valid)=1.35e-01, acc(valid)=9.70e-01, params_penalty=0.00e+00
Epoch 75: 0.89s to complete
error(train)=5.00e-03, acc(train)=1.00e+00, error(valid)=1.41e-01, acc(valid)=9.70e-01, params_penalty=0.00e+00
Epoch 80: 0.90s to complete
error(train)=3.28e-03, acc(train)=1.00e+00, error(valid)=1.39e-01, acc(valid)=9.70e-01, params_penalty=0.00e+00
Epoch 85: 0.89s to complete
error(train)=2.70e-03, acc(train)=1.00e+00, error(valid)=1.42e-01, acc(valid)=9.70e-01, params_penalty=0.00e+00
Epoch 90: 0.89s to complete
error(train)=2.37e-03, acc(train)=1.00e+00, error(valid)=1.44e-01, acc(valid)=9.71e-01, params_penalty=0.00e+00
Epoch 95: 0.88s to complete
error(train)=2.15e-03, acc(train)=1.00e+00, error(valid)=1.46e-01, acc(valid)=9.70e-01, params_penalty=0.00e+00
Epoch 100: 0.90s to complete
error(train)=1.81e-03, acc(train)=1.00e+00, error(valid)=1.47e-01, acc(valid)=9.71e-01, params_penalty=0.00e+00
In [19]:
import mlp.optimisers
mlp.optimisers = reload(mlp.optimisers)
from mlp.optimisers import Optimiser
rng.seed(seed)
train_data.reset()
valid_data.reset()
param_init = UniformInit(-init_scale, init_scale, rng=rng)
# Create affine + softmax model
# model = MultipleLayerModel([
# AffineLayer(input_dim, output_dim, param_init, param_init),
# SoftmaxLayer()
# ])
hidden_dim = 100
model = MultipleLayerModel([
AffineLayer(input_dim, hidden_dim, param_init, param_init),
SigmoidLayer(),
AffineLayer(hidden_dim, hidden_dim, param_init, param_init),
SigmoidLayer(),
AffineLayer(hidden_dim, hidden_dim, param_init, param_init),
SigmoidLayer(),
AffineLayer(hidden_dim, hidden_dim, param_init, param_init),
SigmoidLayer(),
AffineLayer(hidden_dim, output_dim, param_init, param_init)
])
error = CrossEntropySoftmaxError()
# Use a basic gradient descent learning rule
learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)
earlyStopRes = train_model_and_plot_stats(model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, True )
use early stop
Epoch 0:
error(train)=3.36e+00, acc(train)=1.04e-01, error(valid)=3.33e+00, acc(valid)=1.09e-01, params_penalty=0.00e+00
Epoch 5: 0.92s to complete
error(train)=2.16e-01, acc(train)=9.36e-01, error(valid)=2.05e-01, acc(valid)=9.40e-01, params_penalty=0.00e+00
cur error 0.204874124958 pre inf
Epoch 10: 0.93s to complete
error(train)=1.41e-01, acc(train)=9.57e-01, error(valid)=1.52e-01, acc(valid)=9.57e-01, params_penalty=0.00e+00
cur error 0.152051258802 pre 0.204874124958
Epoch 15: 1.65s to complete
error(train)=1.02e-01, acc(train)=9.69e-01, error(valid)=1.29e-01, acc(valid)=9.62e-01, params_penalty=0.00e+00
cur error 0.129276403969 pre 0.152051258802
Epoch 20: 0.87s to complete
error(train)=7.68e-02, acc(train)=9.78e-01, error(valid)=1.21e-01, acc(valid)=9.66e-01, params_penalty=0.00e+00
cur error 0.121025104022 pre 0.129276403969
Epoch 25: 1.26s to complete
error(train)=5.93e-02, acc(train)=9.83e-01, error(valid)=1.20e-01, acc(valid)=9.65e-01, params_penalty=0.00e+00
cur error 0.11995645528 pre 0.121025104022
Epoch 30: 0.89s to complete
error(train)=4.85e-02, acc(train)=9.86e-01, error(valid)=1.23e-01, acc(valid)=9.66e-01, params_penalty=0.00e+00
cur error 0.123395421498 pre 0.11995645528
In [32]:
stats = res[0]
keys = res[1]
# print stats[-1, keys[‘error(valid)’]]
# print stats[-1, keys[‘acc(valid)’]]
In [33]:
stats2 = earlyStopRes[0]
keys2 = earlyStopRes[1]
# print stats2[-1, keys2[‘error(valid)’]]
# print stats2[-1, keys2[‘acc(valid)’]]
In [34]:
print(‘| with_early_stop | final error(train) | final error(valid) | final acc(train) | final acc(valid) |’)
print(‘|—————–|——————–|——————–|——————|——————|’)
print(‘| {0:8s} | {1:.2e} | {2:.2e} | {3:.2f} | {4:.2f} |’.format(“No”,
stats[-1, keys[‘error(train)’]], stats[-1, keys[‘error(valid)’]],
stats[-1, keys[‘acc(train)’]], stats[-1, keys[‘acc(valid)’]]))
print(‘| {0:8s} | {1:.2e} | {2:.2e} | {3:.2f} | {4:.2f} |’.format(“Yes”,
stats2[-1, keys2[‘error(train)’]], stats2[-1, keys2[‘error(valid)’]],
stats2[-1, keys[‘acc(train)’]], stats2[-1, keys2[‘acc(valid)’]]))
| with_early_stop | final error(train) | final error(valid) | final acc(train) | final acc(valid) |
|—————–|——————–|——————–|——————|——————|
| No | 1.81e-03 | 1.47e-01 | 1.00 | 0.97 |
| Yes | 4.85e-02 | 1.23e-01 | 0.99 | 0.97 |
In [46]:
from scipy.ndimage.interpolation import rotate, shift
def random_rotate_shift(inputs, rng):
“””Randomly rotates a subset of images in a batch.
Args:
inputs: Input image batch, an array of shape (batch_size, 784).
rng: A seeded random number generator.
Returns:
An array of shape (batch_size, 784) corresponding to a copy
of the original `inputs` array with the randomly selected
images rotated by a random angle. The original `inputs`
array should not be modified.
“””
orig_ims = inputs.reshape((-1, 28, 28))
new_ims = orig_ims.copy()
indices = rng.choice(orig_ims.shape[0], orig_ims.shape[0] // 2, False)
angles = rng.uniform(-1., 1., size=indices.shape[0]) * 30.
for i, j in enumerate(indices):
new_ims[j] = rotate(orig_ims[j], angles[i], order=1, reshape=False)
indices = rng.choice(orig_ims.shape[0], orig_ims.shape[0] // 2, False)
shifts = rng.uniform(-1., 1., size=indices.shape[0]) * 4.
for i, j in enumerate(indices):
new_ims[j] = shift(new_ims[j], shifts[i], order=1)
return new_ims.reshape((-1, 784))
from mlp.data_providers import AugmentedMNISTDataProvider
aug_train_data = AugmentedMNISTDataProvider(‘train’, rng=rng, transformer=random_rotate_shift)
from mlp.initialisers import GlorotUniformInit, ConstantInit
from mlp.layers import ReluLayer
from mlp.learning_rules import MomentumLearningRule
In [50]:
batch_size = 100
num_epochs = 100
learning_rate = 0.01
mom_coeff = 0.9
stats_interval = 5
rng.seed(seed)
aug_train_data.reset()
valid_data.reset()
aug_train_data.batch_size = batch_size
valid_data.batch_size = batch_size
weights_init = GlorotUniformInit(0.5, rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
AffineLayer(input_dim, hidden_dim, weights_init, biases_init),
ReluLayer(),
AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
ReluLayer(),
AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])
error = CrossEntropySoftmaxError()
learning_rule = MomentumLearningRule(learning_rate=learning_rate, mom_coeff=mom_coeff)
# data_monitors={‘acc’: lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}
# optimiser = Optimiser(
# model, error, learning_rule, aug_train_data, valid_data, data_monitors)
# aug_stats, aug_keys, aug_run_time = optimiser.train(
# num_epochs=num_epochs, stats_interval=stats_interval)
withAugRes = train_model_and_plot_stats(model, error, learning_rule, aug_train_data, valid_data, num_epochs, stats_interval, False)
Epoch 0:
error(train)=2.30e+00, acc(train)=6.78e-02, error(valid)=2.30e+00, acc(valid)=5.97e-02, params_penalty=0.00e+00
Epoch 5: 4.02s to complete
error(train)=2.60e-01, acc(train)=9.20e-01, error(valid)=1.38e-01, acc(valid)=9.59e-01, params_penalty=0.00e+00
Epoch 10: 3.99s to complete
error(train)=1.75e-01, acc(train)=9.47e-01, error(valid)=1.05e-01, acc(valid)=9.70e-01, params_penalty=0.00e+00
Epoch 15: 4.33s to complete
error(train)=1.48e-01, acc(train)=9.55e-01, error(valid)=9.33e-02, acc(valid)=9.72e-01, params_penalty=0.00e+00
Epoch 20: 4.50s to complete
error(train)=1.39e-01, acc(train)=9.57e-01, error(valid)=8.78e-02, acc(valid)=9.73e-01, params_penalty=0.00e+00
Epoch 25: 4.50s to complete
error(train)=1.24e-01, acc(train)=9.61e-01, error(valid)=8.46e-02, acc(valid)=9.76e-01, params_penalty=0.00e+00
Epoch 30: 4.31s to complete
error(train)=1.13e-01, acc(train)=9.65e-01, error(valid)=7.94e-02, acc(valid)=9.77e-01, params_penalty=0.00e+00
Epoch 35: 4.60s to complete
error(train)=9.89e-02, acc(train)=9.69e-01, error(valid)=7.33e-02, acc(valid)=9.78e-01, params_penalty=0.00e+00
Epoch 40: 4.95s to complete
error(train)=9.12e-02, acc(train)=9.71e-01, error(valid)=7.14e-02, acc(valid)=9.78e-01, params_penalty=0.00e+00
Epoch 45: 4.15s to complete
error(train)=8.86e-02, acc(train)=9.73e-01, error(valid)=6.72e-02, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 50: 4.33s to complete
error(train)=8.59e-02, acc(train)=9.73e-01, error(valid)=6.52e-02, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 55: 5.02s to complete
error(train)=8.21e-02, acc(train)=9.75e-01, error(valid)=6.49e-02, acc(valid)=9.81e-01, params_penalty=0.00e+00
Epoch 60: 4.95s to complete
error(train)=7.85e-02, acc(train)=9.76e-01, error(valid)=6.57e-02, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 65: 5.00s to complete
error(train)=7.37e-02, acc(train)=9.77e-01, error(valid)=6.09e-02, acc(valid)=9.83e-01, params_penalty=0.00e+00
Epoch 70: 4.64s to complete
error(train)=7.19e-02, acc(train)=9.78e-01, error(valid)=5.80e-02, acc(valid)=9.83e-01, params_penalty=0.00e+00
Epoch 75: 5.10s to complete
error(train)=7.17e-02, acc(train)=9.78e-01, error(valid)=6.31e-02, acc(valid)=9.81e-01, params_penalty=0.00e+00
Epoch 80: 5.47s to complete
error(train)=6.89e-02, acc(train)=9.79e-01, error(valid)=5.83e-02, acc(valid)=9.82e-01, params_penalty=0.00e+00
Epoch 85: 4.66s to complete
error(train)=6.61e-02, acc(train)=9.80e-01, error(valid)=5.64e-02, acc(valid)=9.84e-01, params_penalty=0.00e+00
Epoch 90: 4.42s to complete
error(train)=6.71e-02, acc(train)=9.79e-01, error(valid)=6.26e-02, acc(valid)=9.83e-01, params_penalty=0.00e+00
Epoch 95: 4.40s to complete
error(train)=5.88e-02, acc(train)=9.81e-01, error(valid)=5.69e-02, acc(valid)=9.84e-01, params_penalty=0.00e+00
Epoch 100: 4.52s to complete
error(train)=6.88e-02, acc(train)=9.79e-01, error(valid)=6.17e-02, acc(valid)=9.81e-01, params_penalty=0.00e+00
In [49]:
rng.seed(seed)
train_data.reset()
valid_data.reset()
train_data.batch_size = batch_size
valid_data.batch_size = batch_size
weights_init = GlorotUniformInit(0.5, rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
AffineLayer(input_dim, hidden_dim, weights_init, biases_init),
ReluLayer(),
AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
ReluLayer(),
AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
])
error = CrossEntropySoftmaxError()
learning_rule = MomentumLearningRule(learning_rate=learning_rate, mom_coeff=mom_coeff)
# data_monitors={‘acc’: lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}
# optimiser = Optimiser(
# model, error, learning_rule, aug_train_data, valid_data, data_monitors)
# aug_stats, aug_keys, aug_run_time = optimiser.train(
# num_epochs=num_epochs, stats_interval=stats_interval)
withOutAugRes = train_model_and_plot_stats(model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, False)
Epoch 0:
error(train)=2.30e+00, acc(train)=6.02e-02, error(valid)=2.30e+00, acc(valid)=5.97e-02, params_penalty=0.00e+00
Epoch 5: 1.04s to complete
error(train)=1.13e-01, acc(train)=9.67e-01, error(valid)=1.23e-01, acc(valid)=9.66e-01, params_penalty=0.00e+00
Epoch 10: 0.70s to complete
error(train)=5.94e-02, acc(train)=9.83e-01, error(valid)=9.17e-02, acc(valid)=9.75e-01, params_penalty=0.00e+00
Epoch 15: 0.92s to complete
error(train)=3.34e-02, acc(train)=9.90e-01, error(valid)=8.12e-02, acc(valid)=9.76e-01, params_penalty=0.00e+00
Epoch 20: 0.80s to complete
error(train)=1.90e-02, acc(train)=9.96e-01, error(valid)=7.81e-02, acc(valid)=9.77e-01, params_penalty=0.00e+00
Epoch 25: 0.67s to complete
error(train)=1.32e-02, acc(train)=9.97e-01, error(valid)=8.60e-02, acc(valid)=9.78e-01, params_penalty=0.00e+00
Epoch 30: 0.76s to complete
error(train)=5.28e-03, acc(train)=1.00e+00, error(valid)=8.31e-02, acc(valid)=9.79e-01, params_penalty=0.00e+00
Epoch 35: 0.70s to complete
error(train)=3.31e-03, acc(train)=1.00e+00, error(valid)=8.63e-02, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 40: 0.77s to complete
error(train)=2.45e-03, acc(train)=1.00e+00, error(valid)=8.89e-02, acc(valid)=9.79e-01, params_penalty=0.00e+00
Epoch 45: 0.69s to complete
error(train)=1.65e-03, acc(train)=1.00e+00, error(valid)=9.00e-02, acc(valid)=9.81e-01, params_penalty=0.00e+00
Epoch 50: 0.66s to complete
error(train)=1.32e-03, acc(train)=1.00e+00, error(valid)=9.30e-02, acc(valid)=9.81e-01, params_penalty=0.00e+00
Epoch 55: 0.66s to complete
error(train)=1.08e-03, acc(train)=1.00e+00, error(valid)=9.57e-02, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 60: 0.68s to complete
error(train)=9.44e-04, acc(train)=1.00e+00, error(valid)=9.70e-02, acc(valid)=9.81e-01, params_penalty=0.00e+00
Epoch 65: 0.81s to complete
error(train)=8.01e-04, acc(train)=1.00e+00, error(valid)=9.86e-02, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 70: 0.74s to complete
error(train)=6.93e-04, acc(train)=1.00e+00, error(valid)=9.98e-02, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 75: 0.74s to complete
error(train)=6.34e-04, acc(train)=1.00e+00, error(valid)=1.02e-01, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 80: 0.82s to complete
error(train)=5.61e-04, acc(train)=1.00e+00, error(valid)=1.02e-01, acc(valid)=9.81e-01, params_penalty=0.00e+00
Epoch 85: 0.69s to complete
error(train)=5.04e-04, acc(train)=1.00e+00, error(valid)=1.04e-01, acc(valid)=9.81e-01, params_penalty=0.00e+00
Epoch 90: 0.67s to complete
error(train)=4.62e-04, acc(train)=1.00e+00, error(valid)=1.05e-01, acc(valid)=9.80e-01, params_penalty=0.00e+00
Epoch 95: 0.84s to complete
error(train)=4.26e-04, acc(train)=1.00e+00, error(valid)=1.06e-01, acc(valid)=9.81e-01, params_penalty=0.00e+00
Epoch 100: 0.83s to complete
error(train)=3.92e-04, acc(train)=1.00e+00, error(valid)=1.06e-01, acc(valid)=9.80e-01, params_penalty=0.00e+00
In [60]:
stats = withAugRes[0]
keys = withAugRes[1]
stats2 = withOutAugRes[0]
keys2 = withOutAugRes[1]
print(‘|data augmentation | final error(train) | final error(valid) | final acc(train) | final acc(valid) |’)
print(‘|——————|——————–|——————–|——————|——————|’)
print(‘| {0:8s} | {1:.2e} | {2:.2e} | {3:.4f} | {4:.4f} |’.format(“Yes”,
stats[-1, keys[‘error(train)’]], stats[-1, keys[‘error(valid)’]],
stats[-1, keys[‘acc(train)’]], stats[-1, keys[‘acc(valid)’]]))
print(‘| {0:8s} | {1:.2e} | {2:.2e} | {3:.4f} | {4:.4f} |’.format(“No”,
stats2[-1, keys2[‘error(train)’]], stats2[-1, keys2[‘error(valid)’]],
stats2[-1, keys[‘acc(train)’]], stats2[-1, keys2[‘acc(valid)’]]))
|data augmentation | final error(train) | final error(valid) | final acc(train) | final acc(valid) |
|——————|——————–|——————–|——————|——————|
| Yes | 6.88e-02 | 6.17e-02 | 0.9788 | 0.9808 |
| No | 3.92e-04 | 1.06e-01 | 1.0000 | 0.9803 |
In [ ]: