add log_path flag to command line utility (#917)

* add log_path flag to command line utility

* Update README with log_path flag

* clarify logg and viz docs
This commit is contained in:
albert
2019-06-07 15:05:52 -07:00
committed by pzhokhov
parent 7c520852d9
commit ba2b017820
4 changed files with 25 additions and 6 deletions

View File

@@ -98,6 +98,8 @@ python -m baselines.run --alg=deepq --env=PongNoFrameskip-v4 --num_timesteps=1e6
``` ```
## Saving, loading and visualizing models ## Saving, loading and visualizing models
### Saving and loading the model
The algorithms serialization API is not properly unified yet; however, there is a simple method to save / restore trained models. The algorithms serialization API is not properly unified yet; however, there is a simple method to save / restore trained models.
`--save_path` and `--load_path` command-line option loads the tensorflow state from a given path before training, and saves it after the training, respectively. `--save_path` and `--load_path` command-line option loads the tensorflow state from a given path before training, and saves it after the training, respectively.
Let's imagine you'd like to train ppo2 on Atari Pong, save the model and then later visualize what has it learnt. Let's imagine you'd like to train ppo2 on Atari Pong, save the model and then later visualize what has it learnt.
@@ -111,8 +113,17 @@ python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 --
*NOTE:* Mujoco environments require normalization to work properly, so we wrap them with VecNormalize wrapper. Currently, to ensure the models are saved with normalization (so that trained models can be restored and run without further training) the normalization coefficients are saved as tensorflow variables. This can decrease the performance somewhat, so if you require high-throughput steps with Mujoco and do not need saving/restoring the models, it may make sense to use numpy normalization instead. To do that, set 'use_tf=False` in [baselines/run.py](baselines/run.py#L116). *NOTE:* Mujoco environments require normalization to work properly, so we wrap them with VecNormalize wrapper. Currently, to ensure the models are saved with normalization (so that trained models can be restored and run without further training) the normalization coefficients are saved as tensorflow variables. This can decrease the performance somewhat, so if you require high-throughput steps with Mujoco and do not need saving/restoring the models, it may make sense to use numpy normalization instead. To do that, set 'use_tf=False` in [baselines/run.py](baselines/run.py#L116).
## Loading and vizualizing learning curves and other training metrics ### Logging and vizualizing learning curves and other training metrics
See [here](docs/viz/viz.ipynb) for instructions on how to load and display the training data. By default, all summary data, including progress, standard output, is saved to a unique directory in a temp folder, specified by a call to Python's [tempfile.gettempdir()](https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir).
The directory can be changed with the `--log_path` command-line option.
```bash
python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=2e7 --save_path=~/models/pong_20M_ppo2 --log_path=~/logs/Pong/
```
*NOTE:* Please be aware that the logger will overwrite files of the same name in an existing directory, thus it's recommended that folder names be given a unique timestamp to prevent overwritten logs.
Another way the temp directory can be changed is through the use of the `$OPENAI_LOGDIR` environment variable.
For examples on how to load and display the training data, see [here](docs/viz/viz.ipynb).
## Subpackages ## Subpackages

View File

@@ -170,6 +170,7 @@ def common_arg_parser():
parser.add_argument('--save_path', help='Path to save trained model to', default=None, type=str) parser.add_argument('--save_path', help='Path to save trained model to', default=None, type=str)
parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int) parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int)
parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int) parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int)
parser.add_argument('--log_path', help='Directory to save learning curve data.', default=None, type=str)
parser.add_argument('--play', default=False, action='store_true') parser.add_argument('--play', default=False, action='store_true')
return parser return parser
@@ -186,7 +187,7 @@ def robotics_arg_parser():
def parse_unknown_args(args): def parse_unknown_args(args):
""" """
Parse arguments not consumed by arg parser into a dicitonary Parse arguments not consumed by arg parser into a dictionary
""" """
retval = {} retval = {}
preceded_by_key = False preceded_by_key = False

View File

@@ -379,7 +379,8 @@ def configure(dir=None, format_strs=None, comm=None, log_suffix=''):
dir = osp.join(tempfile.gettempdir(), dir = osp.join(tempfile.gettempdir(),
datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f")) datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
assert isinstance(dir, str) assert isinstance(dir, str)
os.makedirs(dir, exist_ok=True) dir = os.path.expanduser(dir)
os.makedirs(os.path.expanduser(dir), exist_ok=True)
rank = get_rank_without_mpi_import() rank = get_rank_without_mpi_import()
if rank > 0: if rank > 0:

View File

@@ -192,6 +192,12 @@ def parse_cmdline_kwargs(args):
return {k: parse(v) for k,v in parse_unknown_args(args).items()} return {k: parse(v) for k,v in parse_unknown_args(args).items()}
def configure_logger(log_path, **kwargs):
if log_path is not None:
logger.configure(log_path)
else:
logger.configure(**kwargs)
def main(args): def main(args):
# configure logger, disable logging in child MPI processes (with rank > 0) # configure logger, disable logging in child MPI processes (with rank > 0)
@@ -202,10 +208,10 @@ def main(args):
if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
rank = 0 rank = 0
logger.configure() configure_logger(args.log_path)
else: else:
logger.configure(format_strs=[])
rank = MPI.COMM_WORLD.Get_rank() rank = MPI.COMM_WORLD.Get_rank()
configure_logger(args.log_path, format_strs=[])
model, env = train(args, extra_args) model, env = train(args, extra_args)