change humanoid hyperparameters, get rid of clip_Frac annealing, as it's apparently dangerous

This commit is contained in:
John Schulman
2019-01-17 16:34:49 -08:00
committed by Peter Zhokhov
parent 8c547e5973
commit 45063be393
2 changed files with 4 additions and 4 deletions

View File

@@ -97,7 +97,6 @@ def learn(env, policy_fn, *,
ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return
lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule
clip_param = clip_param * lrmult # Annealed clipping parameter epsilon
ob = U.get_placeholder_cached(name="ob")
ac = pi.pdtype.sample_placeholder([None])

View File

@@ -19,16 +19,17 @@ def train(num_timesteps, seed, model_path=None):
# these are good enough to make humanoid walk, but whether those are
# an absolute best or not is not certain
env = RewScale(env, 0.1)
logger.log("NOTE: reward will be scaled by a factor of 10 in logged stats. Check the monitor for unscaled reward.")
pi = pposgd_simple.learn(env, policy_fn,
max_timesteps=num_timesteps,
timesteps_per_actorbatch=2048,
clip_param=0.2, entcoeff=0.0,
clip_param=0.1, entcoeff=0.0,
optim_epochs=10,
optim_stepsize=3e-4,
optim_stepsize=1e-4,
optim_batchsize=64,
gamma=0.99,
lam=0.95,
schedule='linear',
schedule='constant',
)
env.close()
if model_path: