From f49a9c3d857201e4e6c2e69cdf4246269c3d84ce Mon Sep 17 00:00:00 2001 From: Daniel Ziegler Date: Thu, 1 Mar 2018 09:00:34 -0800 Subject: [PATCH] Fix bug in DDPG parameter space noise adaptation (#306) The training loop used the rollout step variable `t` rather than the training step variable `t_train` to decide when to adapt the scale of the parameter space noise. --- baselines/ddpg/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/ddpg/training.py b/baselines/ddpg/training.py index c30f720..35388a2 100644 --- a/baselines/ddpg/training.py +++ b/baselines/ddpg/training.py @@ -109,7 +109,7 @@ def train(env, nb_epochs, nb_epoch_cycles, render_eval, reward_scale, render, pa epoch_adaptive_distances = [] for t_train in range(nb_train_steps): # Adapt param noise, if necessary. - if memory.nb_entries >= batch_size and t % param_noise_adaption_interval == 0: + if memory.nb_entries >= batch_size and t_train % param_noise_adaption_interval == 0: distance = agent.adapt_param_noise() epoch_adaptive_distances.append(distance)