proj2
COMP90051 Project 2¶
Copyright statement: This notebook is copyright University of Melbourne.
It is licensed for the sole purpose of your assessment in COMP90051.
You are not permitted to share or publish derived versions of this notebook, other than with COMP90051 staff for assessment.
The code block below imports the namespaces/functions/classes you may use in the project.
Additional imports are not permitted.
In [ ]:
# Do not edit. These are the only imports permitted.
import numpy as np
from abc import ABC, abstractmethod
# Type annotations
from numpy import ndarray
from numpy.random import Generator
from typing import List, Deque, Optional, Tuple
from sklearn.cluster import KMeans # for Task 3
from sklearn.linear_model import LogisticRegression # for Task 4
from collections import deque # for Task 5
from scipy.stats import multivariate_normal # for Task 5
import matplotlib.pyplot as plt # for Task 6
The base MAB class below defines a common interface for a contextual multi-armed bandit.
Your bandit implementations in Tasks 1-4 should inherit from this class.
In [ ]:
class MAB(ABC):
“””Base class for a contextual multi-armed bandit (MAB)
Parameters
———-
n_arms : int
Number of arms.
rng : Generator, optional
A `Generator` used as an internal source of randomness. If None, a
default `Generator` will be constructed using `np.random.default_rng`.
“””
def __init__(self, n_arms: int, rng: Optional[Generator] = None) -> None:
if not n_arms >= 0:
raise ValueError(“`n_arms` must be non-negative”)
self.n_arms = n_arms
self.rng = np.random.default_rng(rng)
@abstractmethod
def play(self, context: ndarray) -> int:
“””Play a round
Parameters
———-
context : float numpy.ndarray, shape (n_arms, n_dims), optional
An array of context vectors presented to the MAB. The 0-th
axis indexes the arms, and the 1-st axis indexes the features.
Non-contextual bandits accept a context of None.
Returns
——-
arm : int
Integer index of the arm played this round. Should be in the set
{0, …, n_arms – 1}.
“””
@abstractmethod
def update(self, arm: int, context: ndarray,
reward: Optional[float] = None) -> None:
“””Update the internal state of the MAB after a play
Parameters
———-
arm : int
Integer index of the played arm in the set {0, …, n_arms – 1}.
context : float numpy.ndarray, shape (n_arms, n_dims), optional
An array of context vectors that was presented to the MAB. The
0-th axis indexes the arms, and the 1-st axis indexes the
features. Non-contextual bandits accept a context of None.
reward : float, optional
Reward received from the arm. If None, the reward is missing.
“””
if arm >= self.n_arms or arm < 0:
raise ValueError("`arm` must be in the range "
"[0, {}]".format(self.n_arms - 1))
The function below implements off-policy evaluation as described in Appendix A of the project spec.
You should use it—along with the provided dataset—to evaluate the bandits in Tasks 1-4.
In [ ]:
def offline_eval(mab: MAB, arms: ndarray, rewards: ndarray, contexts: ndarray,
n_rounds: Optional[int] = None) -> ndarray:
“””Offline evaluation of a multi-armed bandit
Parameters
———-
mab : instance of MAB
MAB to evaluate.
arms : int ndarray, shape (n_events,)
Array containing the history of pulled arms, represented as integer
indices in the set {0, …, mab.n_arms}
rewards : float ndarray, shape (n_events,)
Array containing the history of rewards. If a reward is missing, it
should be represented by `np.nan`.
contexts : float ndarray, shape (n_events, n_arms, n_dims)
Array containing the history of contexts presented to the arms.
The 0-th axis indexes the events in the history, the 1-st axis
indexes the arms and the 2-nd axis indexed the features.
n_rounds : int, optional
Number of matching events to evaluate the MAB on. If None,
continue evaluating until the historical events are exhausted.
Returns
——-
matching_rewards : ndarray
Rewards of matched events.
matching_ids : ndarray
Indices of matched events.
“””
# Check types
if not isinstance(mab, MAB):
raise TypeError(“`mab` must be an instance of MAB”)
arms = np.asarray(arms)
rewards = np.asarray(rewards)
contexts = np.asarray(contexts)
if n_rounds is not None and n_rounds < 0:
raise ValueError("`n_rounds` must be non-negative")
# Check array dimensions
if arms.ndim != 1:
raise ValueError("`arms` must be a 1D array")
if rewards.ndim != 1:
raise ValueError("`rewards` must be a 1D array")
if contexts.ndim != 3:
raise ValueError("`contexts` must be a 3D array")
if not (arms.shape[0] == rewards.shape[0] == contexts.shape[0]):
raise ValueError("first dimension of input arrays are inconsistent")
if contexts.shape[1] != mab.n_arms:
raise ValueError("`contexts` has inconsistent second dimension")
if arms.max() >= mab.n_arms or arms.min() < 0:
raise ValueError("`arms` contains ids that are out-of-range")
matched_ctr = 0
matched_ids = list()
for i in range(arms.size):
if matched_ctr >= n_rounds:
break
arm_id = mab.play(contexts[i])
if arm_id == arms[i]:
reward = None if np.isnan(rewards[i]) else rewards[i]
mab.update(arm_id, contexts[i], reward)
matched_ctr += 1
matched_ids.append(i)
matched_ids = np.array(matched_ids)
return rewards[matched_ids], matched_ids
Please define any static functions/variables (used across multiple tasks) in the code block below.
In [ ]:
rng = np.random.default_rng(90051) # Random generator used throughout
# Define additional static functions/variables here, if required
Task 1: Implement LinUCB¶
In [ ]:
class LinUCB(…):
“””LinUCB
Parameters
———-
n_arms : int
Number of arms.
n_dims : int
Number of features for each arm’s context.
alpha : float
Positive real explore-exploit parameter.
rng : Generator, optional
A `Generator` used as an internal source of randomness. If None, a
default `Generator` will be constructed using `np.random.default_rng`.
“””
def __init__(self, n_arms: int, n_dims: int, alpha: float,
rng: Optional[Generator] = None) -> None:
… # implement or remove this method
def play(self, context: ndarray) -> int:
… # implement or remove this method
def update(self, arm: int, context: ndarray,
reward: Optional[float] = None) -> None:
… # implement or remove this method
In [ ]:
# Load dataset here
arms = …
rewards = …
contexts = …
In [ ]:
mab = LinUCB(10, 10, 1.0, rng)
LinUCB_rewards, _ = offline_eval(mab, arms, rewards, contexts, 800)
print(‘LinUCB average reward’, np.mean(LinUCB_rewards))
In [ ]:
# Use additional cells here for experimentation
Task 2: Implement MLinUCB¶
In [ ]:
class MLinUCB(…):
“””MLinUCB
Parameters
———-
n_arms : int
Number of arms.
n_dims : int
Number of features for each arm’s context.
alpha : float
Positive real explore-exploit parameter.
N : int
Number of clusters
m : int
Number of closest clusters to use when imputing the reward.
rng : Generator, optional
A `Generator` used as an internal source of randomness. If None, a
default `Generator` will be constructed using `np.random.default_rng`.
“””
def __init__(self, n_arms: int, n_dims: int, alpha: float, N: int, m: int,
rng: Optional[Generator] = None) -> None:
… # implement or remove this method
def play(self, context: ndarray) -> int:
… # implement or remove this method
def update(self, arm: int, context: ndarray,
reward: Optional[float] = None) -> None:
… # implement or remove this method
In [ ]:
# Define rewards_missing here – the rewards array from above where missing values are represented as np.nan
missing_rewards = …
In [ ]:
mab = MLinUCB(10, 10, 1.0, 10, 3, rng)
MLinUCB_rewards, MLinUCB_ids = offline_eval(mab, arms, rewards_missing, contexts, 800)
print(‘MLinUCB average reward’, np.mean(rewards[MLinUCB_ids]))
mab = LinUCB(10, 10, 1.0, rng)
LinUCB_rewards, LinUCB_ids = offline_eval(mab, arms, rewards_missing, contexts, 800)
print(‘LinUCB average reward’, np.mean(rewards[LinUCB_ids]))
In [ ]:
# Use additional cells here for experimentation
Task 3: Implement SquareCB¶
In [ ]:
class SquareCB(…):
“””SquareCB with a logistic regression oracle
Parameters
———-
n_arms : int
Number of arms.
n_dims : int
Number of features for each arm’s context.
gamma : float
Learning rate parameter.
rng : Generator, optional
A `Generator` used as an internal source of randomness. If None, a
default `Generator` will be constructed using `np.random.default_rng`.
“””
def __init__(self, n_arms: int, n_dims: int, gamma: float,
rng: Optional[Generator] = None) -> None:
… # implement or remove this method
def play(self, context: ndarray) -> int:
… # implement or remove this method
def update(self, arm: int, context: ndarray,
reward: Optional[float] = None) -> None:
… # implement or remove this method
In [ ]:
mab = SquareCB(10, 10, 18.0, rng)
SquareCB_rewards, _ = offline_eval(mab, arms, rewards, contexts, 800)
print(‘SquareCB average reward’, np.mean(SquareCB_rewards))
In [ ]:
# Use additional cells here for experimentation