change humanoid hyperparameters, get rid of clip_Frac annealing, as it's apparently dangerous
This commit is contained in:
committed by
Peter Zhokhov
parent
8c547e5973
commit
45063be393
@@ -97,7 +97,6 @@ def learn(env, policy_fn, *,
|
|||||||
ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return
|
ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return
|
||||||
|
|
||||||
lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule
|
lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule
|
||||||
clip_param = clip_param * lrmult # Annealed clipping parameter epsilon
|
|
||||||
|
|
||||||
ob = U.get_placeholder_cached(name="ob")
|
ob = U.get_placeholder_cached(name="ob")
|
||||||
ac = pi.pdtype.sample_placeholder([None])
|
ac = pi.pdtype.sample_placeholder([None])
|
||||||
|
@@ -19,16 +19,17 @@ def train(num_timesteps, seed, model_path=None):
|
|||||||
# these are good enough to make humanoid walk, but whether those are
|
# these are good enough to make humanoid walk, but whether those are
|
||||||
# an absolute best or not is not certain
|
# an absolute best or not is not certain
|
||||||
env = RewScale(env, 0.1)
|
env = RewScale(env, 0.1)
|
||||||
|
logger.log("NOTE: reward will be scaled by a factor of 10 in logged stats. Check the monitor for unscaled reward.")
|
||||||
pi = pposgd_simple.learn(env, policy_fn,
|
pi = pposgd_simple.learn(env, policy_fn,
|
||||||
max_timesteps=num_timesteps,
|
max_timesteps=num_timesteps,
|
||||||
timesteps_per_actorbatch=2048,
|
timesteps_per_actorbatch=2048,
|
||||||
clip_param=0.2, entcoeff=0.0,
|
clip_param=0.1, entcoeff=0.0,
|
||||||
optim_epochs=10,
|
optim_epochs=10,
|
||||||
optim_stepsize=3e-4,
|
optim_stepsize=1e-4,
|
||||||
optim_batchsize=64,
|
optim_batchsize=64,
|
||||||
gamma=0.99,
|
gamma=0.99,
|
||||||
lam=0.95,
|
lam=0.95,
|
||||||
schedule='linear',
|
schedule='constant',
|
||||||
)
|
)
|
||||||
env.close()
|
env.close()
|
||||||
if model_path:
|
if model_path:
|
||||||
|
Reference in New Issue
Block a user