Merge branch 'master' of github.com:openai/baselines into internal

This commit is contained in:
Peter Zhokhov
2019-01-24 14:28:35 -08:00
6 changed files with 16 additions and 10 deletions

View File

@@ -37,9 +37,6 @@ class Runner(AbstractEnvRunner):
obs, rewards, dones, _ = self.env.step(actions)
self.states = states
self.dones = dones
for n, done in enumerate(dones):
if done:
self.obs[n] = self.obs[n]*0
self.obs = obs
mb_rewards.append(rewards)
mb_dones.append(self.dones)

View File

@@ -75,8 +75,8 @@ class Model(object):
train_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs*(nsteps+1),) + ob_space.shape)
with tf.variable_scope('acer_model', reuse=tf.AUTO_REUSE):
step_model = policy(observ_placeholder=step_ob_placeholder, sess=sess)
train_model = policy(observ_placeholder=train_ob_placeholder, sess=sess)
step_model = policy(nbatch=nenvs, nsteps=1, observ_placeholder=step_ob_placeholder, sess=sess)
train_model = policy(nbatch=nbatch, nsteps=nsteps, observ_placeholder=train_ob_placeholder, sess=sess)
params = find_trainable_variables("acer_model")
@@ -94,7 +94,7 @@ class Model(object):
return v
with tf.variable_scope("acer_model", custom_getter=custom_getter, reuse=True):
polyak_model = policy(observ_placeholder=train_ob_placeholder, sess=sess)
polyak_model = policy(nbatch=nbatch, nsteps=nsteps, observ_placeholder=train_ob_placeholder, sess=sess)
# Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i

View File

@@ -145,6 +145,7 @@ def common_arg_parser():
parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int)
parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int)
parser.add_argument('--play', default=False, action='store_true')
parser.add_argument('--extra_import', help='Extra module to import to access external environments', type=str, default=None)
return parser
def robotics_arg_parser():

View File

@@ -24,7 +24,7 @@ Hopper-v1, Walker2d-v1, HalfCheetah-v1, Humanoid-v1, HumanoidStandup-v1. Every i
For details (e.g., adversarial loss, discriminator accuracy, etc.) about GAIL training, please see [here](https://drive.google.com/drive/folders/1nnU8dqAV9i37-_5_vWIspyFUJFQLCsDD?usp=sharing)
### Determinstic Polciy (Set std=0)
### Determinstic Policy (Set std=0)
| | Un-normalized | Normalized |
|---|---|---|
| Hopper-v1 | <img src='Hopper-unnormalized-deterministic-scores.png'> | <img src='Hopper-normalized-deterministic-scores.png'> |

View File

@@ -120,6 +120,11 @@ def build_env(args):
def get_env_type(env_id):
# Re-parse the gym registry, since we could have new envs since last time.
for env in gym.envs.registry.all():
env_type = env._entry_point.split(':')[0].split('.')[-1]
_game_envs[env_type].add(env.id) # This is a set so add is idempotent
if env_id in _game_envs.keys():
env_type = env_id
env_id = [g for g in _game_envs[env_type]][0]
@@ -189,6 +194,9 @@ def main(args):
args, unknown_args = arg_parser.parse_known_args(args)
extra_args = parse_cmdline_kwargs(unknown_args)
if args.extra_import is not None:
import_module(args.extra_import)
if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
rank = 0
logger.configure()

View File

@@ -53,11 +53,11 @@ setup(name='baselines',
# ensure there is some tensorflow build with version above 1.4
import pkg_resources
tf_pkg = None
for tf_pkg_name in ['tensorflow', 'tensorflow-gpu']:
for tf_pkg_name in ['tensorflow', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-gpu']:
try:
tf_pkg = pkg_resources.get_distribution(tf_pkg_name)
except pkg_resources.DistributionNotFound:
pass
assert tf_pkg is not None, 'TensorFlow needed, of version above 1.4'
from distutils.version import StrictVersion
assert StrictVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= StrictVersion('1.4.0')
from distutils.version import LooseVersion
assert LooseVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= LooseVersion('1.4.0')