diff --git a/README.md b/README.md index a285912..79e9ffe 100644 --- a/README.md +++ b/README.md @@ -38,20 +38,27 @@ More thorough tutorial on virtualenvs and options can be found [here](https://vi ## Installation -Clone the repo and cd into it: -```bash -git clone https://github.com/openai/baselines.git -cd baselines -``` -If using virtualenv, create a new virtualenv and activate it -```bash -virtualenv env --python=python3 -. env/bin/activate -``` -Install baselines package -```bash -pip install -e . -``` +- Clone the repo and cd into it: + ```bash + git clone https://github.com/openai/baselines.git + cd baselines + ``` +- If you don't have TensorFlow installed already, install your favourite flavor of TensorFlow. In most cases, + ```bash + pip install tensorflow-gpu # if you have a CUDA-compatible gpu and proper drivers + ``` + or + ```bash + pip install tensorflow + ``` + should be sufficient. Refer to [TensorFlow installation guide](https://www.tensorflow.org/install/) + for more details. + +- Install baselines package + ```bash + pip install -e . + ``` + ### MuJoCo Some of the baselines examples use [MuJoCo](http://www.mujoco.org) (multi-joint dynamics in contact) physics simulator, which is proprietary and requires binaries and a license (temporary 30-day license can be obtained from [www.mujoco.org](http://www.mujoco.org)). Instructions on setting up MuJoCo can be found [here](https://github.com/openai/mujoco-py) @@ -103,6 +110,13 @@ python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 -- *NOTE:* At the moment Mujoco training uses VecNormalize wrapper for the environment which is not being saved correctly; so loading the models trained on Mujoco will not work well if the environment is recreated. If necessary, you can work around that by replacing RunningMeanStd by TfRunningMeanStd in [baselines/common/vec_env/vec_normalize.py](baselines/common/vec_env/vec_normalize.py#L12). This way, mean and std of environment normalizing wrapper will be saved in tensorflow variables and included in the model file; however, training is slower that way - hence not including it by default +## Using baselines with TensorBoard +Baselines logger can save data in the TensorBoard format. To do so, set environment variables `OPENAI_LOG_FORMAT` and `OPENAI_LOGDIR`: +```bash +export OPENAI_LOG_FORMAT='stdout,log,csv,tensorboard' # formats are comma-separated, but for tensorboard you only really need the last one +export OPENAI_LOGDIR=path/to/tensorboard/data +``` + ## Subpackages - [A2C](baselines/a2c) diff --git a/baselines/run.py b/baselines/run.py index faa2786..a4bdde2 100644 --- a/baselines/run.py +++ b/baselines/run.py @@ -13,7 +13,6 @@ from baselines import bench, logger from importlib import import_module from baselines.common.vec_env.vec_normalize import VecNormalize -from baselines.common.vec_env.dummy_vec_env import DummyVecEnv from baselines.common import atari_wrappers, retro_wrappers try: @@ -92,19 +91,8 @@ def build_env(args): seed = args.seed env_type, env_id = get_env_type(args.env) - if env_type == 'mujoco': - get_session(tf.ConfigProto(allow_soft_placement=True, - intra_op_parallelism_threads=1, - inter_op_parallelism_threads=1)) - if args.num_env: - env = make_vec_env(env_id, env_type, nenv, seed, reward_scale=args.reward_scale) - else: - env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale) - - env = VecNormalize(env) - - elif env_type == 'atari': + if env_type == 'atari': if alg == 'acer': env = make_vec_env(env_id, env_type, nenv, seed) elif alg == 'deepq': @@ -132,17 +120,15 @@ def build_env(args): env = bench.Monitor(env, logger.get_dir()) env = retro_wrappers.wrap_deepmind_retro(env) - elif env_type == 'classic_control': - def make_env(): - e = gym.make(env_id) - e = bench.Monitor(e, logger.get_dir(), allow_early_resets=True) - e.seed(seed) - return e + else: + get_session(tf.ConfigProto(allow_soft_placement=True, + intra_op_parallelism_threads=1, + inter_op_parallelism_threads=1)) - env = DummyVecEnv([make_env]) + env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) - else: - raise ValueError('Unknown env_type {}'.format(env_type)) + if env_type == 'mujoco': + env = VecNormalize(env) return env @@ -163,10 +149,10 @@ def get_env_type(env_id): def get_default_network(env_type): - if env_type == 'mujoco' or env_type == 'classic_control': - return 'mlp' if env_type == 'atari': return 'cnn' + else: + return 'mlp' raise ValueError('Unknown env_type {}'.format(env_type))