add log_path flag to command line utility (#917)

* add log_path flag to command line utility * Update README with log_path flag * clarify logg and viz docs
2019-06-07 15:05:52 -07:00
parent 7c520852d9
commit ba2b017820
4 changed files with 25 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -98,6 +98,8 @@ python -m baselines.run --alg=deepq --env=PongNoFrameskip-v4 --num_timesteps=1e6
 ```
 ## Saving, loading and visualizing models
 ### Saving and loading the model
 The algorithms serialization API is not properly unified yet; however, there is a simple method to save / restore trained models. 
 `--save_path` and `--load_path` command-line option loads the tensorflow state from a given path before training, and saves it after the training, respectively. 
 Let's imagine you'd like to train ppo2 on Atari Pong,  save the model and then later visualize what has it learnt.
@@ -111,8 +113,17 @@ python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 --
 *NOTE:* Mujoco environments require normalization to work properly, so we wrap them with VecNormalize wrapper. Currently, to ensure the models are saved with normalization (so that trained models can be restored and run without further training) the normalization coefficients are saved as tensorflow variables. This can decrease the performance somewhat, so if you require high-throughput steps with Mujoco and do not need saving/restoring the models, it may make sense to use numpy normalization instead. To do that, set 'use_tf=False` in [baselines/run.py](baselines/run.py#L116). 
-## Loading and vizualizing learning curves and other training metrics
+### Logging and vizualizing learning curves and other training metrics
-See [here](docs/viz/viz.ipynb) for instructions on how to load and display the training data. 
+By default, all summary data, including progress, standard output, is saved to a unique directory in a temp folder, specified by a call to Python's [tempfile.gettempdir()](https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir).
 The directory can be changed with the `--log_path` command-line option.
 ```bash
 python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=2e7 --save_path=~/models/pong_20M_ppo2 --log_path=~/logs/Pong/
 ```
 *NOTE:* Please be aware that the logger will overwrite files of the same name in an existing directory, thus it's recommended that folder names be given a unique timestamp to prevent overwritten logs.
 Another way the temp directory can be changed is through the use of the `$OPENAI_LOGDIR` environment variable.
 For examples on how to load and display the training data, see [here](docs/viz/viz.ipynb).
 ## Subpackages
--- a/baselines/common/cmd_util.py
+++ b/baselines/common/cmd_util.py
@@ -170,6 +170,7 @@ def common_arg_parser():
    parser.add_argument('--save_path', help='Path to save trained model to', default=None, type=str)
    parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int)
    parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int)
    parser.add_argument('--log_path', help='Directory to save learning curve data.', default=None, type=str)
    parser.add_argument('--play', default=False, action='store_true')
    return parser
@@ -186,7 +187,7 @@ def robotics_arg_parser():
 def parse_unknown_args(args):
    """
-    Parse arguments not consumed by arg parser into a dicitonary
+    Parse arguments not consumed by arg parser into a dictionary
    """
    retval = {}
    preceded_by_key = False
--- a/baselines/logger.py
+++ b/baselines/logger.py
@@ -379,7 +379,8 @@ def configure(dir=None, format_strs=None, comm=None, log_suffix=''):
        dir = osp.join(tempfile.gettempdir(),
            datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
    assert isinstance(dir, str)
-    os.makedirs(dir, exist_ok=True)
+    dir = os.path.expanduser(dir)
    os.makedirs(os.path.expanduser(dir), exist_ok=True)
    rank = get_rank_without_mpi_import()
    if rank > 0:
--- a/baselines/run.py
+++ b/baselines/run.py
@@ -192,6 +192,12 @@ def parse_cmdline_kwargs(args):
    return {k: parse(v) for k,v in parse_unknown_args(args).items()}
 def configure_logger(log_path, **kwargs):
    if log_path is not None:
        logger.configure(log_path)
    else:
        logger.configure(**kwargs)
 def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)
@@ -202,10 +208,10 @@ def main(args):
    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
-        logger.configure()
+        configure_logger(args.log_path)
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])
    model, env = train(args, extra_args)