From f49a9c3d857201e4e6c2e69cdf4246269c3d84ce Mon Sep 17 00:00:00 2001
From: Daniel Ziegler <dmz@mit.edu>
Date: Thu, 1 Mar 2018 09:00:34 -0800
Subject: [PATCH] Fix bug in DDPG parameter space noise adaptation (#306)

The training loop used the rollout step variable `t` rather than the
training step variable `t_train` to decide when to adapt the scale of
the parameter space noise.
---
 baselines/ddpg/training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/baselines/ddpg/training.py b/baselines/ddpg/training.py
index c30f720..35388a2 100644
--- a/baselines/ddpg/training.py
+++ b/baselines/ddpg/training.py
@@ -109,7 +109,7 @@ def train(env, nb_epochs, nb_epoch_cycles, render_eval, reward_scale, render, pa
                 epoch_adaptive_distances = []
                 for t_train in range(nb_train_steps):
                     # Adapt param noise, if necessary.
-                    if memory.nb_entries >= batch_size and t % param_noise_adaption_interval == 0:
+                    if memory.nb_entries >= batch_size and t_train % param_noise_adaption_interval == 0:
                         distance = agent.adapt_param_noise()
                         epoch_adaptive_distances.append(distance)