Fix scoping of variables (#670)

Co-authored-by: Martin van Hensbergen <martin@mvhensbergen.com>
This commit is contained in:
mvhensbergen
2023-08-13 18:03:05 +02:00
committed by GitHub
parent f26cbe13e9
commit 7012d22902

View File

@@ -176,6 +176,7 @@ observation, reward, terminated, truncated, info = env.step(action)
class BlackjackAgent:
def __init__(
self,
env,
learning_rate: float,
initial_epsilon: float,
epsilon_decay: float,
@@ -203,7 +204,7 @@ class BlackjackAgent:
self.training_error = []
def get_action(self, obs: tuple[int, int, bool]) -> int:
def get_action(self, env, obs: tuple[int, int, bool]) -> int:
"""
Returns the best action with probability (1 - epsilon)
otherwise a random action with probability epsilon to ensure exploration.
@@ -236,7 +237,7 @@ class BlackjackAgent:
self.training_error.append(temporal_difference)
def decay_epsilon(self):
self.epsilon = max(self.final_epsilon, self.epsilon - epsilon_decay)
self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)
# %%
@@ -258,6 +259,7 @@ epsilon_decay = start_epsilon / (n_episodes / 2) # reduce the exploration over
final_epsilon = 0.1
agent = BlackjackAgent(
env=env,
learning_rate=learning_rate,
initial_epsilon=start_epsilon,
epsilon_decay=epsilon_decay,
@@ -280,7 +282,7 @@ for episode in tqdm(range(n_episodes)):
# play one episode
while not done:
action = agent.get_action(obs)
action = agent.get_action(env, obs)
next_obs, reward, terminated, truncated, info = env.step(action)
# update the agent