change humanoid hyperparameters, get rid of clip_Frac annealing, as it's apparently dangerous
This commit is contained in:
committed by
Peter Zhokhov
parent
8c547e5973
commit
45063be393
@@ -97,7 +97,6 @@ def learn(env, policy_fn, *,
|
||||
ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return
|
||||
|
||||
lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule
|
||||
clip_param = clip_param * lrmult # Annealed clipping parameter epsilon
|
||||
|
||||
ob = U.get_placeholder_cached(name="ob")
|
||||
ac = pi.pdtype.sample_placeholder([None])
|
||||
|
@@ -19,16 +19,17 @@ def train(num_timesteps, seed, model_path=None):
|
||||
# these are good enough to make humanoid walk, but whether those are
|
||||
# an absolute best or not is not certain
|
||||
env = RewScale(env, 0.1)
|
||||
logger.log("NOTE: reward will be scaled by a factor of 10 in logged stats. Check the monitor for unscaled reward.")
|
||||
pi = pposgd_simple.learn(env, policy_fn,
|
||||
max_timesteps=num_timesteps,
|
||||
timesteps_per_actorbatch=2048,
|
||||
clip_param=0.2, entcoeff=0.0,
|
||||
clip_param=0.1, entcoeff=0.0,
|
||||
optim_epochs=10,
|
||||
optim_stepsize=3e-4,
|
||||
optim_stepsize=1e-4,
|
||||
optim_batchsize=64,
|
||||
gamma=0.99,
|
||||
lam=0.95,
|
||||
schedule='linear',
|
||||
schedule='constant',
|
||||
)
|
||||
env.close()
|
||||
if model_path:
|
||||
|
Reference in New Issue
Block a user