rename entcoeff to ent_coef in trpo_mpi for compatibility with other algos (#581)

2018-09-18 14:13:05 -07:00
parent 68a29d0ab3
commit 59662fff78
1 changed files with 3 additions and 3 deletions
--- a/baselines/trpo_mpi/trpo_mpi.py
+++ b/baselines/trpo_mpi/trpo_mpi.py
@@ -92,7 +92,7 @@ def learn(*,
        gamma=0.99,
        lam=1.0, # advantage estimation
        seed=None,
-        entcoeff=0.0,
+        ent_coef=0.0,
        cg_damping=1e-2,
        vf_stepsize=3e-4,
        vf_iters =3,
@@ -117,7 +117,7 @@ def learn(*,

    max_kl                  max KL divergence between old policy and new policy ( KL(pi_old || pi) )

-    entcoeff                coefficient of policy entropy term in the optimization objective
+    ent_coef                coefficient of policy entropy term in the optimization objective

    cg_iters                number of iterations of conjugate gradient algorithm

@@ -182,7 +182,7 @@ def learn(*,
    ent = pi.pd.entropy()
    meankl = tf.reduce_mean(kloldnew)
    meanent = tf.reduce_mean(ent)
-    entbonus = entcoeff * meanent
+    entbonus = ent_coef * meanent

    vferr = tf.reduce_mean(tf.square(pi.vf - ret))