fixes to readme and baselines/run.py (#80)
* fixes to readme and baselines/run.py * polish installation section of baselines README * polish installation section of baselines README
This commit is contained in:
42
README.md
42
README.md
@@ -38,20 +38,27 @@ More thorough tutorial on virtualenvs and options can be found [here](https://vi
|
|||||||
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
Clone the repo and cd into it:
|
- Clone the repo and cd into it:
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/openai/baselines.git
|
git clone https://github.com/openai/baselines.git
|
||||||
cd baselines
|
cd baselines
|
||||||
```
|
```
|
||||||
If using virtualenv, create a new virtualenv and activate it
|
- If you don't have TensorFlow installed already, install your favourite flavor of TensorFlow. In most cases,
|
||||||
```bash
|
```bash
|
||||||
virtualenv env --python=python3
|
pip install tensorflow-gpu # if you have a CUDA-compatible gpu and proper drivers
|
||||||
. env/bin/activate
|
```
|
||||||
```
|
or
|
||||||
Install baselines package
|
```bash
|
||||||
```bash
|
pip install tensorflow
|
||||||
pip install -e .
|
```
|
||||||
```
|
should be sufficient. Refer to [TensorFlow installation guide](https://www.tensorflow.org/install/)
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
- Install baselines package
|
||||||
|
```bash
|
||||||
|
pip install -e .
|
||||||
|
```
|
||||||
|
|
||||||
### MuJoCo
|
### MuJoCo
|
||||||
Some of the baselines examples use [MuJoCo](http://www.mujoco.org) (multi-joint dynamics in contact) physics simulator, which is proprietary and requires binaries and a license (temporary 30-day license can be obtained from [www.mujoco.org](http://www.mujoco.org)). Instructions on setting up MuJoCo can be found [here](https://github.com/openai/mujoco-py)
|
Some of the baselines examples use [MuJoCo](http://www.mujoco.org) (multi-joint dynamics in contact) physics simulator, which is proprietary and requires binaries and a license (temporary 30-day license can be obtained from [www.mujoco.org](http://www.mujoco.org)). Instructions on setting up MuJoCo can be found [here](https://github.com/openai/mujoco-py)
|
||||||
|
|
||||||
@@ -103,6 +110,13 @@ python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 --
|
|||||||
*NOTE:* At the moment Mujoco training uses VecNormalize wrapper for the environment which is not being saved correctly; so loading the models trained on Mujoco will not work well if the environment is recreated. If necessary, you can work around that by replacing RunningMeanStd by TfRunningMeanStd in [baselines/common/vec_env/vec_normalize.py](baselines/common/vec_env/vec_normalize.py#L12). This way, mean and std of environment normalizing wrapper will be saved in tensorflow variables and included in the model file; however, training is slower that way - hence not including it by default
|
*NOTE:* At the moment Mujoco training uses VecNormalize wrapper for the environment which is not being saved correctly; so loading the models trained on Mujoco will not work well if the environment is recreated. If necessary, you can work around that by replacing RunningMeanStd by TfRunningMeanStd in [baselines/common/vec_env/vec_normalize.py](baselines/common/vec_env/vec_normalize.py#L12). This way, mean and std of environment normalizing wrapper will be saved in tensorflow variables and included in the model file; however, training is slower that way - hence not including it by default
|
||||||
|
|
||||||
|
|
||||||
|
## Using baselines with TensorBoard
|
||||||
|
Baselines logger can save data in the TensorBoard format. To do so, set environment variables `OPENAI_LOG_FORMAT` and `OPENAI_LOGDIR`:
|
||||||
|
```bash
|
||||||
|
export OPENAI_LOG_FORMAT='stdout,log,csv,tensorboard' # formats are comma-separated, but for tensorboard you only really need the last one
|
||||||
|
export OPENAI_LOGDIR=path/to/tensorboard/data
|
||||||
|
```
|
||||||
|
|
||||||
## Subpackages
|
## Subpackages
|
||||||
|
|
||||||
- [A2C](baselines/a2c)
|
- [A2C](baselines/a2c)
|
||||||
|
@@ -13,7 +13,6 @@ from baselines import bench, logger
|
|||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
|
|
||||||
from baselines.common.vec_env.vec_normalize import VecNormalize
|
from baselines.common.vec_env.vec_normalize import VecNormalize
|
||||||
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
|
|
||||||
from baselines.common import atari_wrappers, retro_wrappers
|
from baselines.common import atari_wrappers, retro_wrappers
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -92,19 +91,8 @@ def build_env(args):
|
|||||||
seed = args.seed
|
seed = args.seed
|
||||||
|
|
||||||
env_type, env_id = get_env_type(args.env)
|
env_type, env_id = get_env_type(args.env)
|
||||||
if env_type == 'mujoco':
|
|
||||||
get_session(tf.ConfigProto(allow_soft_placement=True,
|
|
||||||
intra_op_parallelism_threads=1,
|
|
||||||
inter_op_parallelism_threads=1))
|
|
||||||
|
|
||||||
if args.num_env:
|
if env_type == 'atari':
|
||||||
env = make_vec_env(env_id, env_type, nenv, seed, reward_scale=args.reward_scale)
|
|
||||||
else:
|
|
||||||
env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale)
|
|
||||||
|
|
||||||
env = VecNormalize(env)
|
|
||||||
|
|
||||||
elif env_type == 'atari':
|
|
||||||
if alg == 'acer':
|
if alg == 'acer':
|
||||||
env = make_vec_env(env_id, env_type, nenv, seed)
|
env = make_vec_env(env_id, env_type, nenv, seed)
|
||||||
elif alg == 'deepq':
|
elif alg == 'deepq':
|
||||||
@@ -132,17 +120,15 @@ def build_env(args):
|
|||||||
env = bench.Monitor(env, logger.get_dir())
|
env = bench.Monitor(env, logger.get_dir())
|
||||||
env = retro_wrappers.wrap_deepmind_retro(env)
|
env = retro_wrappers.wrap_deepmind_retro(env)
|
||||||
|
|
||||||
elif env_type == 'classic_control':
|
else:
|
||||||
def make_env():
|
get_session(tf.ConfigProto(allow_soft_placement=True,
|
||||||
e = gym.make(env_id)
|
intra_op_parallelism_threads=1,
|
||||||
e = bench.Monitor(e, logger.get_dir(), allow_early_resets=True)
|
inter_op_parallelism_threads=1))
|
||||||
e.seed(seed)
|
|
||||||
return e
|
|
||||||
|
|
||||||
env = DummyVecEnv([make_env])
|
env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)
|
||||||
|
|
||||||
else:
|
if env_type == 'mujoco':
|
||||||
raise ValueError('Unknown env_type {}'.format(env_type))
|
env = VecNormalize(env)
|
||||||
|
|
||||||
return env
|
return env
|
||||||
|
|
||||||
@@ -163,10 +149,10 @@ def get_env_type(env_id):
|
|||||||
|
|
||||||
|
|
||||||
def get_default_network(env_type):
|
def get_default_network(env_type):
|
||||||
if env_type == 'mujoco' or env_type == 'classic_control':
|
|
||||||
return 'mlp'
|
|
||||||
if env_type == 'atari':
|
if env_type == 'atari':
|
||||||
return 'cnn'
|
return 'cnn'
|
||||||
|
else:
|
||||||
|
return 'mlp'
|
||||||
|
|
||||||
raise ValueError('Unknown env_type {}'.format(env_type))
|
raise ValueError('Unknown env_type {}'.format(env_type))
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user