diff --git a/baselines/deepq/simple.py b/baselines/deepq/simple.py index 2e90d71..a4f3ab3 100644 --- a/baselines/deepq/simple.py +++ b/baselines/deepq/simple.py @@ -222,7 +222,7 @@ def learn(env, episode_rewards[-1] += rew if done: obs = env.reset() - episode_rewards.append(0) + episode_rewards.append(0.0) if t > learning_starts and t % train_freq == 0: # Minimize the error in Bellman's equation on a batch sampled from replay buffer.