From ad43fd9a35f3848f5c22aea7a91dc4021ca3f9fd Mon Sep 17 00:00:00 2001 From: Peter Zhokhov Date: Wed, 1 Aug 2018 16:15:59 -0700 Subject: [PATCH] add defaults --- baselines/acer/defaults.py | 4 ++++ baselines/ppo2/defaults.py | 22 ++++++++++++++++++++++ baselines/trpo_mpi/defaults.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 baselines/acer/defaults.py create mode 100644 baselines/ppo2/defaults.py create mode 100644 baselines/trpo_mpi/defaults.py diff --git a/baselines/acer/defaults.py b/baselines/acer/defaults.py new file mode 100644 index 0000000..0334bae --- /dev/null +++ b/baselines/acer/defaults.py @@ -0,0 +1,4 @@ +def atari(): + return dict( + lrschedule='constant' + ) diff --git a/baselines/ppo2/defaults.py b/baselines/ppo2/defaults.py new file mode 100644 index 0000000..f8c23a5 --- /dev/null +++ b/baselines/ppo2/defaults.py @@ -0,0 +1,22 @@ +def mujoco(): + return dict( + nsteps=2048, + nminibatches=32, + lam=0.95, + gamma=0.99, + noptepochs=10, + log_interval=1, + ent_coef=0.0, + lr=3e-4, + cliprange=0.2, + value_network='copy' + ) + +def atari(): + return dict( + nsteps=128, nminibatches=4, + lam=0.95, gamma=0.99, noptepochs=4, log_interval=1, + ent_coef=.01, + lr=lambda f : f * 2.5e-4, + cliprange=lambda f : f * 0.1, + ) diff --git a/baselines/trpo_mpi/defaults.py b/baselines/trpo_mpi/defaults.py new file mode 100644 index 0000000..96b6cb3 --- /dev/null +++ b/baselines/trpo_mpi/defaults.py @@ -0,0 +1,30 @@ +from rl_common.models import mlp, cnn_small + + +def atari(): + return dict( + network = cnn_small(), + timesteps_per_batch=512, + max_kl=0.001, + cg_iters=10, + cg_damping=1e-3, + gamma=0.98, + lam=1.0, + vf_iters=3, + vf_stepsize=1e-4, + entcoeff=0.00, + ) + +def mujoco(): + return dict( + network = mlp(num_hidden=32, num_layers=2), + timesteps_per_batch=1024, + max_kl=0.01, + cg_iters=10, + cg_damping=0.1, + gamma=0.99, + lam=0.98, + vf_iters=5, + vf_stepsize=1e-3, + normalize_observations=True, + )