From 86054f7a980e4738f2b38de3465bbf55990cda84 Mon Sep 17 00:00:00 2001 From: Quan Vuong Date: Tue, 30 May 2017 11:49:25 +0800 Subject: [PATCH] Consistent initial type (float) for episode_rewards --- baselines/deepq/simple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/deepq/simple.py b/baselines/deepq/simple.py index 2e90d71..a4f3ab3 100644 --- a/baselines/deepq/simple.py +++ b/baselines/deepq/simple.py @@ -222,7 +222,7 @@ def learn(env, episode_rewards[-1] += rew if done: obs = env.reset() - episode_rewards.append(0) + episode_rewards.append(0.0) if t > learning_starts and t % train_freq == 0: # Minimize the error in Bellman's equation on a batch sampled from replay buffer.