From 59662fff7837fb051e5d5ad57689807b5d11de0a Mon Sep 17 00:00:00 2001
From: pzhokhov <peterzhokhoff@gmail.com>
Date: Tue, 18 Sep 2018 14:13:05 -0700
Subject: [PATCH] rename entcoeff to ent_coef in trpo_mpi for compatibility
 with other algos (#581)

---
 baselines/trpo_mpi/trpo_mpi.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/baselines/trpo_mpi/trpo_mpi.py b/baselines/trpo_mpi/trpo_mpi.py
index 2e49ab6..ec0a991 100644
--- a/baselines/trpo_mpi/trpo_mpi.py
+++ b/baselines/trpo_mpi/trpo_mpi.py
@@ -92,7 +92,7 @@ def learn(*,
         gamma=0.99,
         lam=1.0, # advantage estimation
         seed=None,
-        entcoeff=0.0,
+        ent_coef=0.0,
         cg_damping=1e-2,
         vf_stepsize=3e-4,
         vf_iters =3,
@@ -117,7 +117,7 @@ def learn(*,
 
     max_kl                  max KL divergence between old policy and new policy ( KL(pi_old || pi) )
 
-    entcoeff                coefficient of policy entropy term in the optimization objective
+    ent_coef                coefficient of policy entropy term in the optimization objective
 
     cg_iters                number of iterations of conjugate gradient algorithm
 
@@ -182,7 +182,7 @@ def learn(*,
     ent = pi.pd.entropy()
     meankl = tf.reduce_mean(kloldnew)
     meanent = tf.reduce_mean(ent)
-    entbonus = entcoeff * meanent
+    entbonus = ent_coef * meanent
 
     vferr = tf.reduce_mean(tf.square(pi.vf - ret))