change humanoid hyperparameters, get rid of clip_Frac annealing, as it's apparently dangerous

2019-01-17 16:34:49 -08:00
parent 8c547e5973
commit 45063be393
2 changed files with 4 additions and 4 deletions
--- a/baselines/ppo1/pposgd_simple.py
+++ b/baselines/ppo1/pposgd_simple.py
@@ -97,7 +97,6 @@ def learn(env, policy_fn, *,
    ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return

    lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule
-    clip_param = clip_param * lrmult # Annealed clipping parameter epsilon

    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])
--- a/baselines/ppo1/run_humanoid.py
+++ b/baselines/ppo1/run_humanoid.py
@@ -19,16 +19,17 @@ def train(num_timesteps, seed, model_path=None):
    # these are good enough to make humanoid walk, but whether those are
    # an absolute best or not is not certain
    env = RewScale(env, 0.1)
+    logger.log("NOTE: reward will be scaled by a factor of 10  in logged stats. Check the monitor for unscaled reward.")
    pi = pposgd_simple.learn(env, policy_fn,
            max_timesteps=num_timesteps,
            timesteps_per_actorbatch=2048,
-            clip_param=0.2, entcoeff=0.0,
+            clip_param=0.1, entcoeff=0.0,
            optim_epochs=10,
-            optim_stepsize=3e-4,
+            optim_stepsize=1e-4,
            optim_batchsize=64,
            gamma=0.99,
            lam=0.95,
-            schedule='linear',
+            schedule='constant',
        )
    env.close()
    if model_path: