From ba2b01782034bcbdb73a2e744cff4cb1c99ab612 Mon Sep 17 00:00:00 2001 From: albert Date: Fri, 7 Jun 2019 15:05:52 -0700 Subject: [PATCH] add log_path flag to command line utility (#917) * add log_path flag to command line utility * Update README with log_path flag * clarify logg and viz docs --- README.md | 15 +++++++++++++-- baselines/common/cmd_util.py | 3 ++- baselines/logger.py | 3 ++- baselines/run.py | 10 ++++++++-- 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index dc3c8b6..6a0b08d 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,8 @@ python -m baselines.run --alg=deepq --env=PongNoFrameskip-v4 --num_timesteps=1e6 ``` ## Saving, loading and visualizing models + +### Saving and loading the model The algorithms serialization API is not properly unified yet; however, there is a simple method to save / restore trained models. `--save_path` and `--load_path` command-line option loads the tensorflow state from a given path before training, and saves it after the training, respectively. Let's imagine you'd like to train ppo2 on Atari Pong, save the model and then later visualize what has it learnt. @@ -111,8 +113,17 @@ python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 -- *NOTE:* Mujoco environments require normalization to work properly, so we wrap them with VecNormalize wrapper. Currently, to ensure the models are saved with normalization (so that trained models can be restored and run without further training) the normalization coefficients are saved as tensorflow variables. This can decrease the performance somewhat, so if you require high-throughput steps with Mujoco and do not need saving/restoring the models, it may make sense to use numpy normalization instead. To do that, set 'use_tf=False` in [baselines/run.py](baselines/run.py#L116). -## Loading and vizualizing learning curves and other training metrics -See [here](docs/viz/viz.ipynb) for instructions on how to load and display the training data. +### Logging and vizualizing learning curves and other training metrics +By default, all summary data, including progress, standard output, is saved to a unique directory in a temp folder, specified by a call to Python's [tempfile.gettempdir()](https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir). +The directory can be changed with the `--log_path` command-line option. +```bash +python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=2e7 --save_path=~/models/pong_20M_ppo2 --log_path=~/logs/Pong/ +``` +*NOTE:* Please be aware that the logger will overwrite files of the same name in an existing directory, thus it's recommended that folder names be given a unique timestamp to prevent overwritten logs. + +Another way the temp directory can be changed is through the use of the `$OPENAI_LOGDIR` environment variable. + +For examples on how to load and display the training data, see [here](docs/viz/viz.ipynb). ## Subpackages diff --git a/baselines/common/cmd_util.py b/baselines/common/cmd_util.py index 28cb0a7..586480c 100644 --- a/baselines/common/cmd_util.py +++ b/baselines/common/cmd_util.py @@ -170,6 +170,7 @@ def common_arg_parser(): parser.add_argument('--save_path', help='Path to save trained model to', default=None, type=str) parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int) parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int) + parser.add_argument('--log_path', help='Directory to save learning curve data.', default=None, type=str) parser.add_argument('--play', default=False, action='store_true') return parser @@ -186,7 +187,7 @@ def robotics_arg_parser(): def parse_unknown_args(args): """ - Parse arguments not consumed by arg parser into a dicitonary + Parse arguments not consumed by arg parser into a dictionary """ retval = {} preceded_by_key = False diff --git a/baselines/logger.py b/baselines/logger.py index 77d40ba..59cf8c7 100644 --- a/baselines/logger.py +++ b/baselines/logger.py @@ -379,7 +379,8 @@ def configure(dir=None, format_strs=None, comm=None, log_suffix=''): dir = osp.join(tempfile.gettempdir(), datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f")) assert isinstance(dir, str) - os.makedirs(dir, exist_ok=True) + dir = os.path.expanduser(dir) + os.makedirs(os.path.expanduser(dir), exist_ok=True) rank = get_rank_without_mpi_import() if rank > 0: diff --git a/baselines/run.py b/baselines/run.py index a295873..13f7f6c 100644 --- a/baselines/run.py +++ b/baselines/run.py @@ -192,6 +192,12 @@ def parse_cmdline_kwargs(args): return {k: parse(v) for k,v in parse_unknown_args(args).items()} +def configure_logger(log_path, **kwargs): + if log_path is not None: + logger.configure(log_path) + else: + logger.configure(**kwargs) + def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) @@ -202,10 +208,10 @@ def main(args): if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 - logger.configure() + configure_logger(args.log_path) else: - logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() + configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args)