From 59662fff7837fb051e5d5ad57689807b5d11de0a Mon Sep 17 00:00:00 2001 From: pzhokhov Date: Tue, 18 Sep 2018 14:13:05 -0700 Subject: [PATCH] rename entcoeff to ent_coef in trpo_mpi for compatibility with other algos (#581) --- baselines/trpo_mpi/trpo_mpi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/baselines/trpo_mpi/trpo_mpi.py b/baselines/trpo_mpi/trpo_mpi.py index 2e49ab6..ec0a991 100644 --- a/baselines/trpo_mpi/trpo_mpi.py +++ b/baselines/trpo_mpi/trpo_mpi.py @@ -92,7 +92,7 @@ def learn(*, gamma=0.99, lam=1.0, # advantage estimation seed=None, - entcoeff=0.0, + ent_coef=0.0, cg_damping=1e-2, vf_stepsize=3e-4, vf_iters =3, @@ -117,7 +117,7 @@ def learn(*, max_kl max KL divergence between old policy and new policy ( KL(pi_old || pi) ) - entcoeff coefficient of policy entropy term in the optimization objective + ent_coef coefficient of policy entropy term in the optimization objective cg_iters number of iterations of conjugate gradient algorithm @@ -182,7 +182,7 @@ def learn(*, ent = pi.pd.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) - entbonus = entcoeff * meanent + entbonus = ent_coef * meanent vferr = tf.reduce_mean(tf.square(pi.vf - ret))