Merge pull request #14 from quanvuong/master

Consistent initial type (float) for episode_rewards
2017-06-04 00:40:42 -07:00
parent 9c10c2fc27 86054f7a98
commit 584261a94a
1 changed files with 1 additions and 1 deletions
--- a/baselines/deepq/simple.py
+++ b/baselines/deepq/simple.py
@@ -222,7 +222,7 @@ def learn(env,
            episode_rewards[-1] += rew
            if done:
                obs = env.reset()
-                episode_rewards.append(0)
+                episode_rewards.append(0.0)

            if t > learning_starts and t % train_freq == 0:
                # Minimize the error in Bellman's equation on a batch sampled from replay buffer.