Fix atari wrapper (affecting a2c perf) and pposgd mujoco performance
- removed vf clipping in pposgd - that was severely degrading performance on mujoco because it didn’t account for scale of returns - switched adam epsilon in pposgd_simple - brought back no-ops in atari wrapper (oops) - added readmes - revamped run_X_benchmark scripts to have standard form - cleaned up DDPG a little, removed deprecated SimpleMonitor and non-idiomatic usage of logger
This commit is contained in:
5
baselines/ddpg/README.md
Normal file
5
baselines/ddpg/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# DDPG
|
||||
|
||||
- Original paper: https://arxiv.org/abs/1509.02971
|
||||
- Baselines post: https://blog.openai.com/better-exploration-with-parameter-noise/
|
||||
- `python -m baselines.ddpg.main` runs the algorithm for 1M frames = 10M timesteps on a Mujoco environment. See help (`-h`) for more options.
|
@@ -1,19 +1,11 @@
|
||||
import argparse
|
||||
import time
|
||||
import os
|
||||
from tempfile import mkdtemp
|
||||
import sys
|
||||
import subprocess
|
||||
import threading
|
||||
import json
|
||||
|
||||
from baselines.common.mpi_fork import mpi_fork
|
||||
from baselines import logger
|
||||
from baselines.logger import Logger
|
||||
import logging
|
||||
from baselines import logger, bench
|
||||
from baselines.common.misc_util import (
|
||||
set_global_seeds,
|
||||
boolean_flag,
|
||||
SimpleMonitor
|
||||
)
|
||||
import baselines.ddpg.training as training
|
||||
from baselines.ddpg.models import Actor, Critic
|
||||
@@ -24,42 +16,22 @@ import gym
|
||||
import tensorflow as tf
|
||||
from mpi4py import MPI
|
||||
|
||||
|
||||
def run(env_id, seed, noise_type, num_cpu, layer_norm, logdir, gym_monitor, evaluation, bind_to_core, **kwargs):
|
||||
kwargs['logdir'] = logdir
|
||||
whoami = mpi_fork(num_cpu, bind_to_core=bind_to_core)
|
||||
if whoami == 'parent':
|
||||
sys.exit(0)
|
||||
|
||||
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
|
||||
# Configure things.
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
if rank != 0:
|
||||
# Write to temp directory for all non-master workers.
|
||||
actual_dir = None
|
||||
Logger.CURRENT.close()
|
||||
Logger.CURRENT = Logger(dir=mkdtemp(), output_formats=[])
|
||||
logger.set_level(logger.DISABLED)
|
||||
|
||||
# Create envs.
|
||||
if rank == 0:
|
||||
env = gym.make(env_id)
|
||||
if gym_monitor and logdir:
|
||||
env = gym.wrappers.Monitor(env, os.path.join(logdir, 'gym_train'), force=True)
|
||||
env = SimpleMonitor(env)
|
||||
if rank != 0: logger.set_level(logger.DISABLED)
|
||||
|
||||
if evaluation:
|
||||
eval_env = gym.make(env_id)
|
||||
if gym_monitor and logdir:
|
||||
eval_env = gym.wrappers.Monitor(eval_env, os.path.join(logdir, 'gym_eval'), force=True)
|
||||
eval_env = SimpleMonitor(eval_env)
|
||||
else:
|
||||
eval_env = None
|
||||
# Create envs.
|
||||
env = gym.make(env_id)
|
||||
env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), "%i.monitor.json"%rank))
|
||||
gym.logger.setLevel(logging.WARN)
|
||||
|
||||
if evaluation and rank==0:
|
||||
eval_env = gym.make(env_id)
|
||||
eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval'))
|
||||
env = bench.Monitor(env, None)
|
||||
else:
|
||||
env = gym.make(env_id)
|
||||
if evaluation:
|
||||
eval_env = gym.make(env_id)
|
||||
else:
|
||||
eval_env = None
|
||||
eval_env = None
|
||||
|
||||
# Parse noise_type
|
||||
action_noise = None
|
||||
@@ -103,22 +75,20 @@ def run(env_id, seed, noise_type, num_cpu, layer_norm, logdir, gym_monitor, eval
|
||||
env.close()
|
||||
if eval_env is not None:
|
||||
eval_env.close()
|
||||
Logger.CURRENT.close()
|
||||
if rank == 0:
|
||||
logger.info('total runtime: {}s'.format(time.time() - start_time))
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
||||
parser.add_argument('--env-id', type=str, default='HalfCheetah-v1')
|
||||
boolean_flag(parser, 'render-eval', default=False)
|
||||
boolean_flag(parser, 'layer-norm', default=True)
|
||||
boolean_flag(parser, 'render', default=False)
|
||||
parser.add_argument('--num-cpu', type=int, default=1)
|
||||
boolean_flag(parser, 'normalize-returns', default=False)
|
||||
boolean_flag(parser, 'normalize-observations', default=True)
|
||||
parser.add_argument('--seed', type=int, default=0)
|
||||
parser.add_argument('--seed', help='RNG seed', type=int, default=0)
|
||||
parser.add_argument('--critic-l2-reg', type=float, default=1e-2)
|
||||
parser.add_argument('--batch-size', type=int, default=64) # per MPI worker
|
||||
parser.add_argument('--actor-lr', type=float, default=1e-4)
|
||||
@@ -133,29 +103,11 @@ def parse_args():
|
||||
parser.add_argument('--nb-eval-steps', type=int, default=100) # per epoch cycle and MPI worker
|
||||
parser.add_argument('--nb-rollout-steps', type=int, default=100) # per epoch cycle and MPI worker
|
||||
parser.add_argument('--noise-type', type=str, default='adaptive-param_0.2') # choices are adaptive-param_xx, ou_xx, normal_xx, none
|
||||
parser.add_argument('--logdir', type=str, default=None)
|
||||
boolean_flag(parser, 'gym-monitor', default=False)
|
||||
boolean_flag(parser, 'evaluation', default=True)
|
||||
boolean_flag(parser, 'bind-to-core', default=False)
|
||||
|
||||
boolean_flag(parser, 'evaluation', default=False)
|
||||
return vars(parser.parse_args())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
|
||||
# Figure out what logdir to use.
|
||||
if args['logdir'] is None:
|
||||
args['logdir'] = os.getenv('OPENAI_LOGDIR')
|
||||
|
||||
# Print and save arguments.
|
||||
logger.info('Arguments:')
|
||||
for key in sorted(args.keys()):
|
||||
logger.info('{}: {}'.format(key, args[key]))
|
||||
logger.info('')
|
||||
if args['logdir']:
|
||||
with open(os.path.join(args['logdir'], 'args.json'), 'w') as f:
|
||||
json.dump(args, f)
|
||||
|
||||
# Run actual script.
|
||||
run(**args)
|
||||
|
@@ -14,7 +14,7 @@ from mpi4py import MPI
|
||||
|
||||
|
||||
def train(env, nb_epochs, nb_epoch_cycles, render_eval, reward_scale, render, param_noise, actor, critic,
|
||||
normalize_returns, normalize_observations, critic_l2_reg, actor_lr, critic_lr, action_noise, logdir,
|
||||
normalize_returns, normalize_observations, critic_l2_reg, actor_lr, critic_lr, action_noise,
|
||||
popart, gamma, clip_norm, nb_train_steps, nb_rollout_steps, nb_eval_steps, batch_size, memory,
|
||||
tau=0.01, eval_env=None, param_noise_adaption_interval=50):
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
@@ -178,7 +178,7 @@ def train(env, nb_epochs, nb_epoch_cycles, render_eval, reward_scale, render, pa
|
||||
logger.record_tabular(key, combined_stats[key])
|
||||
logger.dump_tabular()
|
||||
logger.info('')
|
||||
|
||||
logdir = logger.get_dir()
|
||||
if rank == 0 and logdir:
|
||||
if hasattr(env, 'get_state'):
|
||||
with open(os.path.join(logdir, 'env_state.pkl'), 'wb') as f:
|
||||
|
Reference in New Issue
Block a user