0% found this document useful (0 votes)

14 views

CS6700 - Tutorial - 1 - Bandits - Ipynb - Colaboratory

This document discusses different sampling strategies for multi-arm bandit problems including random, epsilon greedy, and softmax policies. It defines a Gaussian bandit environment and explores the performance of various policies through experiments.

Uploaded by

Rahul me20b145

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

14 views

CS6700 - Tutorial - 1 - Bandits - Ipynb - Colaboratory

Uploaded by

Rahul me20b145

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 6

1/26/24, 3:40 PM CS6700_Tutorial_1_Bandits.

ipynb - Colaboratory

keyboard_arrow_down CS6700 : Tutorial 1 - Multi-Arm Bandits

image.png

Goal: Analysis 3 types of sampling strategy in a MAB

keyboard_arrow_down Import dependencies

# !pip install seaborn

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import NamedTuple, List

keyboard_arrow_down Gaussian Bandit Environment

class GaussianArm(NamedTuple):
mean: float
std: float

class Env:
def __init__(self, num_arms: int, mean_reward_range: tuple, std: float):
"""
num_arms: number of bandit arms
mean_reward_range: mean reward of an arm should lie between the given range
std: standard deviation of the reward for each arm
"""
self.num_arms = num_arms
self.arms = self.create_arms(num_arms, mean_reward_range, std)

def create_arms(self, n: int, mean_reward_range: tuple, std: float) -> dict:

low_rwd, high_rwd = mean_reward_range
# creates "n" number of mean reward for each arm
means = np.random.uniform(low=low_rwd, high=high_rwd, size=(n,))
arms = {id: GaussianArm(mu, std) for id, mu in enumerate(means)}
return arms

@property
def arm_ids(self):
return list(self.arms.keys())

def step(self, arm_id: int) -> float:

arm = self.arms[arm_id]
return np.random.normal(arm.mean, arm.std) # Reward

def get_best_arm_and_expected_reward(self):
best_arm_id = max(self.arms, key=lambda x: self.arms[x].mean)
return best_arm_id, self.arms[best_arm_id].mean

def get_avg_arm_reward(self):
arm_mean_rewards = [v.mean for v in self.arms.values()]
return np.mean(arm_mean_rewards)

def plot_arms_reward_distribution(self, num_samples=1000):

"""
This function is only used to visualize the arm's distrbution.
"""
fig, ax = plt.subplots(1, 1, sharex=False, sharey=False, figsize=(9, 5))
colors = sns.color_palette("hls", self.num_arms)
for i, arm_id in enumerate(self.arm_ids):
reward_samples = [self.step(arm_id) for _ in range(num_samples)]
sns.histplot(reward_samples, ax=ax, stat="density", kde=True, bins=100, color=colors[i], label=f'arm_{arm_id}')
ax.legend()
plt.show()

https://colab.research.google.com/drive/1tG4whuEJr84CsxKY49igHJ74Czi5ac6m#scrollTo=hSJXjgLaud2A&printMode=true 1/6
1/26/24, 3:40 PM CS6700_Tutorial_1_Bandits.ipynb - Colaboratory

keyboard_arrow_down Policy
class BasePolicy:
@property
def name(self):
return 'base_policy'

def reset(self):
"""
This function resets the internal variable.
"""
pass

def update_arm(self, *args):

"""
This function keep track of the estimates
that we may want to update during training.
"""
pass

def select_arm(self) -> int:

"""
It returns arm_id
"""
raise Exception("Not Implemented")

keyboard_arrow_down Random Policy

class RandomPolicy(BasePolicy):
def __init__(self, arm_ids: List[int]):
self.arm_ids = arm_ids

@property
def name(self):
return 'random'

def reset(self) -> None:

"""No use."""
pass

def update_arm(self, *args) -> None:

"""No use."""
pass

def select_arm(self) -> int:

return np.random.choice(self.arm_ids)

class EpGreedyPolicy(BasePolicy):
def __init__(self, epsilon: float, arm_ids: List[int]):
self.epsilon = epsilon
self.arm_ids = arm_ids
self.Q = {id: 0 for id in self.arm_ids}
self.num_pulls_per_arm = {id: 0 for id in self.arm_ids}

@property
def name(self):
return f'ep-greedy ep:{self.epsilon}'

def reset(self) -> None:

self.Q = {id: 0 for id in self.arm_ids}
self.num_pulls_per_arm = {id: 0 for id in self.arm_ids}

def update_arm(self, arm_id: int, arm_reward: float) -> None:

# your code for updating the Q values of each arm
pass

def select_arm(self) -> int:

# your code for selecting arm based on epsilon greedy policy
pass

https://colab.research.google.com/drive/1tG4whuEJr84CsxKY49igHJ74Czi5ac6m#scrollTo=hSJXjgLaud2A&printMode=true 2/6
1/26/24, 3:40 PM CS6700_Tutorial_1_Bandits.ipynb - Colaboratory
class SoftmaxPolicy(BasePolicy):
def __init__(self, tau, arm_ids):
self.tau = tau
self.arm_ids = arm_ids
self.Q = {id: 0 for id in self.arm_ids}
self.num_pulls_per_arm = {id: 0 for id in self.arm_ids}

@property
def name(self):
return f'softmax tau:{self.tau}'

def reset(self):
self.Q = {id: 0 for id in self.arm_ids}
self.num_pulls_per_arm = {id: 0 for id in self.arm_ids}

def update_arm(self, arm_id: int, arm_reward: float) -> None:

# your code for updating the Q values of each arm
pass

def select_arm(self) -> int:

# your code for selecting arm based on softmax policy
pass

class UCB(BasePolicy):
# your code here
pass

keyboard_arrow_down Trainer

def train(env, policy: BasePolicy, timesteps):

policy_reward = np.zeros((timesteps,))
for t in range(timesteps):
arm_id = policy.select_arm()
reward = env.step(arm_id)
policy.update_arm(arm_id, reward)
policy_reward[t] = reward
return policy_reward

def avg_over_runs(env, policy: BasePolicy, timesteps, num_runs):

_, expected_max_reward = env.get_best_arm_and_expected_reward()
policy_reward_each_run = np.zeros((num_runs, timesteps))
for run in range(num_runs):
policy.reset()
policy_reward = train(env, policy, timesteps)
policy_reward_each_run[run, :] = policy_reward

# calculate avg policy reward from policy_reward_each_run

avg_policy_rewards = None # your code here (type: nd.array, shape: (timesteps,))
total_policy_regret = None # your code here (type: float)

return avg_policy_rewards, total_policy_regret

def plot_reward_curve_and_print_regret(env, policies, timesteps=200, num_runs=500):

fig, ax = plt.subplots(1, 1, sharex=False, sharey=False, figsize=(10, 6))
for policy in policies:
avg_policy_rewards, total_policy_regret = avg_over_runs(env, policy, timesteps, num_runs)
print('regret for {}: {:.3f}'.format(policy.name, total_policy_regret))
ax.plot(np.arange(timesteps), avg_policy_rewards, '-', label=policy.name)

_, expected_max_reward = env.get_best_arm_and_expected_reward()
ax.plot(np.arange(timesteps), [expected_max_reward]*timesteps, 'g-')

avg_arm_reward = env.get_avg_arm_reward()
ax.plot(np.arange(timesteps), [avg_arm_reward]*timesteps, 'r-')

plt.legend(loc='lower right')
plt.show()

keyboard_arrow_down Experiments
https://colab.research.google.com/drive/1tG4whuEJr84CsxKY49igHJ74Czi5ac6m#scrollTo=hSJXjgLaud2A&printMode=true 3/6
1/26/24, 3:40 PM CS6700_Tutorial_1_Bandits.ipynb - Colaboratory
seed = 42
np.random.seed(seed)

num_arms = 5
mean_reward_range = (-25, 25)
std = 2.0

env = Env(num_arms, mean_reward_range, std)

env.plot_arms_reward_distribution()

best_arm, max_mean_reward = env.get_best_arm_and_expected_reward()

print(best_arm, max_mean_reward)

1 22.53571532049581

print(env.get_avg_arm_reward())

3.119254917081568

keyboard_arrow_down Please explore following values:

Epsilon greedy: [0.001, 0.01, 0.5, 0.9]

Softmax: [0.001, 1.0, 5.0, 50.0]

random_policy = RandomPolicy(env.arm_ids)
plot_reward_curve_and_print_regret(env, [random_policy], timesteps=200, num_runs=500)

regret for random: 3883.660

explore_epgreedy_epsilons = [0.001, 0.01, 0.5, 0.9]

epgreedy_policies = [EpGreedyPolicy(ep, env.arm_ids) for ep in explore_epgreedy_epsilons]
plot_reward_curve_and_print_regret(env, epgreedy_policies, timesteps=200, num_runs=500)

https://colab.research.google.com/drive/1tG4whuEJr84CsxKY49igHJ74Czi5ac6m#scrollTo=hSJXjgLaud2A&printMode=true 4/6
1/26/24, 3:40 PM CS6700_Tutorial_1_Bandits.ipynb - Colaboratory

regret for ep-greedy ep:0.001: 31.418

regret for ep-greedy ep:0.01: 85.106
regret for ep-greedy ep:0.5: 1979.134
regret for ep-greedy ep:0.9: 3515.911

explore_softmax_taus = [0.001, 1.0, 5.0, 50.0]

softmax_polices = [SoftmaxPolicy(tau, env.arm_ids) for tau in explore_softmax_taus]
plot_reward_curve_and_print_regret(env, softmax_polices, timesteps=200, num_runs=500)

regret for softmax tau:0.001: 1919.966

regret for softmax tau:1.0: 1307.562
regret for softmax tau:5.0: 414.835
regret for softmax tau:50.0: 3169.759

plot_reward_curve_and_print_regret(env, [UCB()], timesteps=200, num_runs=500)

keyboard_arrow_down Optional: Please explore different values of epsilon, tau and verify how does the behaviour changes.

https://colab.research.google.com/drive/1tG4whuEJr84CsxKY49igHJ74Czi5ac6m#scrollTo=hSJXjgLaud2A&printMode=true 5/6
1/26/24, 3:40 PM CS6700_Tutorial_1_Bandits.ipynb - Colaboratory

https://colab.research.google.com/drive/1tG4whuEJr84CsxKY49igHJ74Czi5ac6m#scrollTo=hSJXjgLaud2A&printMode=true 6/6

The C# Player's Guide - 5th Edition - 5.0.0
83% (18)
The C# Player's Guide - 5th Edition - 5.0.0
497 pages
Corce
70% (46)
Corce
206 pages
Python For Everyone 3rd Edition 2018 12
100% (10)
Python For Everyone 3rd Edition 2018 12
761 pages
Get Coding! Learn HTML, CSS, and JavaScript and Build A Website, App, and Game PDF
91% (32)
Get Coding! Learn HTML, CSS, and JavaScript and Build A Website, App, and Game PDF
209 pages
The Ethical Slut PDF
55% (69)
The Ethical Slut PDF
298 pages
Hacking The Art of Exploitation 2nd Edition Jon Erickson
100% (19)
Hacking The Art of Exploitation 2nd Edition Jon Erickson
492 pages
50 Phone Hacks DR - Brad
58% (19)
50 Phone Hacks DR - Brad
29 pages
One-Page Mythic GME
100% (8)
One-Page Mythic GME
11 pages
LSTM From Scratch in Python
No ratings yet
LSTM From Scratch in Python
11 pages
Rick Billstein - Shlomo Libeskind - Johnny W. Lott - A Problem Solving Approach To Mathematics For Elementary School Teachers-Pearson (2015)
No ratings yet
Rick Billstein - Shlomo Libeskind - Johnny W. Lott - A Problem Solving Approach To Mathematics For Elementary School Teachers-Pearson (2015)
1,044 pages
ScriptRunner PowerShell Poster 2020 en
No ratings yet
ScriptRunner PowerShell Poster 2020 en
1 page
Get Your Deals Done Jump Start Guide
78% (9)
Get Your Deals Done Jump Start Guide
73 pages
BitCoin White Paper
100% (4)
BitCoin White Paper
9 pages
PDF
100% (1)
PDF
568 pages
Need Ideas For Getting Your Sub's Mind Into A Submissive Space PDF
50% (2)
Need Ideas For Getting Your Sub's Mind Into A Submissive Space PDF
3 pages
National Pools Catalog
No ratings yet
National Pools Catalog
11 pages
Autoencoder - MPL - Basic - Ipynb - Colaboratory PDF
No ratings yet
Autoencoder - MPL - Basic - Ipynb - Colaboratory PDF
21 pages
SVM
No ratings yet
SVM
8 pages
Evalutation_code_for_participant
No ratings yet
Evalutation_code_for_participant
3 pages
Import As Import As From Import Import As: - Init
No ratings yet
Import As Import As From Import Import As: - Init
5 pages
legged_robot
No ratings yet
legged_robot
26 pages
LSTM Classification
No ratings yet
LSTM Classification
10 pages
Email Spam Classifier
No ratings yet
Email Spam Classifier
22 pages
Sample
No ratings yet
Sample
6 pages
Notebook - Deep Neural Networks
No ratings yet
Notebook - Deep Neural Networks
28 pages
Backup Snapshot - Py
No ratings yet
Backup Snapshot - Py
14 pages
RandomForest
No ratings yet
RandomForest
8 pages
V3.2 Microbial Life Simu Added
No ratings yet
V3.2 Microbial Life Simu Added
9 pages
PINN_1DBurgers
No ratings yet
PINN_1DBurgers
19 pages
Daftar Lampiran Coding Python Recognize
No ratings yet
Daftar Lampiran Coding Python Recognize
7 pages
7 - 201904121342. Lampiran Skripsi
No ratings yet
7 - 201904121342. Lampiran Skripsi
65 pages
MCSL 228
No ratings yet
MCSL 228
26 pages
xg boost-train
No ratings yet
xg boost-train
3 pages
Coding Python Base
No ratings yet
Coding Python Base
54 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
Neural_Network
No ratings yet
Neural_Network
7 pages
Abdimas Hki3f52b4c6
No ratings yet
Abdimas Hki3f52b4c6
6 pages
DL Lab 5
No ratings yet
DL Lab 5
3 pages
gru-train
No ratings yet
gru-train
3 pages
Catboost Train
No ratings yet
Catboost Train
3 pages
trainrealfill
No ratings yet
trainrealfill
19 pages
ML - LAB - 7 - Jupyter Notebook
100% (1)
ML - LAB - 7 - Jupyter Notebook
7 pages
导入所需库
No ratings yet
导入所需库
20 pages
Inheritance Notes
No ratings yet
Inheritance Notes
20 pages
Experiment 7: Hybridization of Genetic Algorithm With PSO
No ratings yet
Experiment 7: Hybridization of Genetic Algorithm With PSO
6 pages
AutoEncoders and GANs
No ratings yet
AutoEncoders and GANs
44 pages
Autoencoder From Scratch
No ratings yet
Autoencoder From Scratch
21 pages
Problem
No ratings yet
Problem
13 pages
Naive - Bayes - Ipynb - Colab
No ratings yet
Naive - Bayes - Ipynb - Colab
3 pages
Feature Analysis With Models
No ratings yet
Feature Analysis With Models
12 pages
Machine
100% (1)
Machine
45 pages
Vertopal.com HW4ML Project Code
No ratings yet
Vertopal.com HW4ML Project Code
24 pages
Import Library Python
No ratings yet
Import Library Python
10 pages
Vertopal.com Experiment01 Baseline Models Accuracy
No ratings yet
Vertopal.com Experiment01 Baseline Models Accuracy
35 pages
Jntuk R20 ML
No ratings yet
Jntuk R20 ML
43 pages
Art - Attacks.evasion - Zoo - Adversarial Robustness Toolbox 1.2.0 Documentation
No ratings yet
Art - Attacks.evasion - Zoo - Adversarial Robustness Toolbox 1.2.0 Documentation
12 pages
Lab Report 4 (1)
No ratings yet
Lab Report 4 (1)
6 pages
To Improve The Performance of Models Predicting Ba
No ratings yet
To Improve The Performance of Models Predicting Ba
6 pages
QLSTMvs LSTM
No ratings yet
QLSTMvs LSTM
7 pages
Blockchainwork
No ratings yet
Blockchainwork
23 pages
Sofcomputing Da2
No ratings yet
Sofcomputing Da2
7 pages
LASSO Regression
No ratings yet
LASSO Regression
7 pages
twins code
No ratings yet
twins code
4 pages
Code:: To Find Frequent Itemsets and Association Between Different Itemsets Using Apriori Algorithm
No ratings yet
Code:: To Find Frequent Itemsets and Association Between Different Itemsets Using Apriori Algorithm
28 pages
Slip
No ratings yet
Slip
5 pages
Ex-11 Implementation of RNN
No ratings yet
Ex-11 Implementation of RNN
6 pages
Activity_Detection_Code
No ratings yet
Activity_Detection_Code
6 pages
Chap 3.1 Embedding in Tensorflow
No ratings yet
Chap 3.1 Embedding in Tensorflow
23 pages
Investing.py-checkpoint
No ratings yet
Investing.py-checkpoint
5 pages
Python 3 Functions and OOPs FP
No ratings yet
Python 3 Functions and OOPs FP
10 pages
Machine Learning Algorithms From Scratch
No ratings yet
Machine Learning Algorithms From Scratch
9 pages
Lab-5 Report
No ratings yet
Lab-5 Report
11 pages
Deep Learning Lab Manual
No ratings yet
Deep Learning Lab Manual
88 pages
vertopal.com_HW4ML project starter code template
No ratings yet
vertopal.com_HW4ML project starter code template
6 pages
Python For Beginners
From Everand
Python For Beginners
Célio Azevedo
No ratings yet
Q1.ipynb - Colab
No ratings yet
Q1.ipynb - Colab
3 pages
CS6700 Programming Assignment 2
No ratings yet
CS6700 Programming Assignment 2
17 pages
DEL MAA: Rahul / Rahul MR AI0538
No ratings yet
DEL MAA: Rahul / Rahul MR AI0538
1 page
CS6700 RL 2024 Wa1
No ratings yet
CS6700 RL 2024 Wa1
7 pages
L2 Projection Piecewise
No ratings yet
L2 Projection Piecewise
9 pages
Triangle Quadratureby Mapping
No ratings yet
Triangle Quadratureby Mapping
2 pages
Hammer NumericalIntegrationSimplexes 1956
No ratings yet
Hammer NumericalIntegrationSimplexes 1956
9 pages
Assignment EE5179 ME20B145 Report
No ratings yet
Assignment EE5179 ME20B145 Report
6 pages
Quadrature Rules For Numerical Integration Over Triangles and Tetrahedra
No ratings yet
Quadrature Rules For Numerical Integration Over Triangles and Tetrahedra
3 pages
Coding With JavaScript For Dummies Everything To Know About JavaScript (2020) - 40153
100% (1)
Coding With JavaScript For Dummies Everything To Know About JavaScript (2020) - 40153
247 pages
The Linux Command Line
100% (4)
The Linux Command Line
537 pages
Introductory Algebra
100% (5)
Introductory Algebra
214 pages
List of Programming Languages by Type
100% (1)
List of Programming Languages by Type
35 pages
The JavaScript Beginner's Handbook
90% (10)
The JavaScript Beginner's Handbook
76 pages
Eat That Frog
100% (10)
Eat That Frog
124 pages
NWO, Illuminati, Freemason, Occult, Bible Prophecy, Conspiracy, Secret Society, Etc. Links
No ratings yet
NWO, Illuminati, Freemason, Occult, Bible Prophecy, Conspiracy, Secret Society, Etc. Links
47 pages
Hacking in Detail
0% (3)
Hacking in Detail
24 pages
FORScan 2015-2018 F150s
0% (1)
FORScan 2015-2018 F150s
34 pages
Introduction To Computer Science
100% (6)
Introduction To Computer Science
202 pages
Linux Cheat Sheet
No ratings yet
Linux Cheat Sheet
4 pages
Learn To Code Getting Started Guide
100% (4)
Learn To Code Getting Started Guide
23 pages
Password Cracking Techniques
100% (1)
Password Cracking Techniques
48 pages
Dokumen - Pub Introduction To Counting Amp Probability The Art of Problem Solving 2nbsped 1934124109 9781934124109
100% (1)
Dokumen - Pub Introduction To Counting Amp Probability The Art of Problem Solving 2nbsped 1934124109 9781934124109
256 pages
Pentest Book Six2dez
No ratings yet
Pentest Book Six2dez
431 pages
Python Programming For Beginners - A Crash Course To Learn Python and Other Recommended Coding
83% (6)
Python Programming For Beginners - A Crash Course To Learn Python and Other Recommended Coding
86 pages
Simple Sabotage Field Manual
100% (2)
Simple Sabotage Field Manual
16 pages
Ipsas 23 - Revenue From Non Exchange Transaction 2
No ratings yet
Ipsas 23 - Revenue From Non Exchange Transaction 2
36 pages
DM 2nd Module Assessment - AMIGO
No ratings yet
DM 2nd Module Assessment - AMIGO
5 pages
GrandmasHouseP1 0.16 - Walkthrough Rev 1.15
No ratings yet
GrandmasHouseP1 0.16 - Walkthrough Rev 1.15
20 pages
ACU1000 Four/Eight Door Access Controller: Key Features
No ratings yet
ACU1000 Four/Eight Door Access Controller: Key Features
2 pages
Te-I - Question Bank Under Unit-I (Highway Planning & Alignment)
100% (2)
Te-I - Question Bank Under Unit-I (Highway Planning & Alignment)
4 pages
Mvir DB
No ratings yet
Mvir DB
2 pages
Karl - Whittington - Body - Worlds - Opicinus - de Canistris and The Medieval Cartographic
No ratings yet
Karl - Whittington - Body - Worlds - Opicinus - de Canistris and The Medieval Cartographic
23 pages
Assesment Biology and Arabic - Infographic Poster of 3d Model
No ratings yet
Assesment Biology and Arabic - Infographic Poster of 3d Model
1 page
Testo 606 in
No ratings yet
Testo 606 in
2 pages
Testing of Aggregates PPT 23.09.2020 Revised
No ratings yet
Testing of Aggregates PPT 23.09.2020 Revised
9 pages
Iop - Bystander Effect
No ratings yet
Iop - Bystander Effect
23 pages
Lab Report
No ratings yet
Lab Report
2 pages
Procedure For Addressing Risk and Opportunity
100% (1)
Procedure For Addressing Risk and Opportunity
6 pages
Norma B 16 48 Gastec
No ratings yet
Norma B 16 48 Gastec
4 pages
Prof. Vinod S. Ramteke: Assistant Professor Department of Computer Science Janata Mahavidyalara, Chandrapur (M.S.)
No ratings yet
Prof. Vinod S. Ramteke: Assistant Professor Department of Computer Science Janata Mahavidyalara, Chandrapur (M.S.)
9 pages
GO Ms 106 - Guidelines For Recognition of Private Hospitals - Amendment
No ratings yet
GO Ms 106 - Guidelines For Recognition of Private Hospitals - Amendment
2 pages
Silabus Rocket Technology-Poltekad - Resume - Rev2
No ratings yet
Silabus Rocket Technology-Poltekad - Resume - Rev2
14 pages
Copernican Revolution (Metaphor) : Characteristics of The Metaphor
No ratings yet
Copernican Revolution (Metaphor) : Characteristics of The Metaphor
2 pages
1st Long Exam in Ucsp
100% (2)
1st Long Exam in Ucsp
2 pages

CS6700 - Tutorial - 1 - Bandits - Ipynb - Colaboratory

Uploaded by

CS6700 - Tutorial - 1 - Bandits - Ipynb - Colaboratory

Uploaded by

1/26/24, 3:40 PM CS6700_Tutorial_1_Bandits.

keyboard_arrow_down CS6700 : Tutorial 1 - Multi-Arm Bandits

Goal: Analysis 3 types of sampling strategy in a MAB

keyboard_arrow_down Import dependencies

keyboard_arrow_down Gaussian Bandit Environment

def create_arms(self, n: int, mean_reward_range: tuple, std: float) -> dict:

def step(self, arm_id: int) -> float:

def plot_arms_reward_distribution(self, num_samples=1000):

def update_arm(self, *args):

def select_arm(self) -> int:

keyboard_arrow_down Random Policy

def reset(self) -> None:

def update_arm(self, *args) -> None:

def select_arm(self) -> int:

def reset(self) -> None:

def update_arm(self, arm_id: int, arm_reward: float) -> None:

def select_arm(self) -> int:

def update_arm(self, arm_id: int, arm_reward: float) -> None:

def select_arm(self) -> int:

def train(env, policy: BasePolicy, timesteps):

def avg_over_runs(env, policy: BasePolicy, timesteps, num_runs):

# calculate avg policy reward from policy_reward_each_run

return avg_policy_rewards, total_policy_regret

def plot_reward_curve_and_print_regret(env, policies, timesteps=200, num_runs=500):

env = Env(num_arms, mean_reward_range, std)

best_arm, max_mean_reward = env.get_best_arm_and_expected_reward()

keyboard_arrow_down Please explore following values:

Epsilon greedy: [0.001, 0.01, 0.5, 0.9]

regret for random: 3883.660

explore_epgreedy_epsilons = [0.001, 0.01, 0.5, 0.9]

regret for ep-greedy ep:0.001: 31.418

explore_softmax_taus = [0.001, 1.0, 5.0, 50.0]

regret for softmax tau:0.001: 1919.966

plot_reward_curve_and_print_regret(env, [UCB()], timesteps=200, num_runs=500)

You might also like