From 9f9835fe386458138f7e6e8068f6932a130b9ebd Mon Sep 17 00:00:00 2001 From: pzhokhov Date: Wed, 21 Nov 2018 12:51:15 -0800 Subject: [PATCH 1/4] Update __init__.py --- baselines/deepq/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/deepq/__init__.py b/baselines/deepq/__init__.py index 6859c05..7f1a15c 100644 --- a/baselines/deepq/__init__.py +++ b/baselines/deepq/__init__.py @@ -5,4 +5,4 @@ from baselines.deepq.replay_buffer import ReplayBuffer, PrioritizedReplayBuffer def wrap_atari_dqn(env): from baselines.common.atari_wrappers import wrap_deepmind - return wrap_deepmind(env, frame_stack=True, scale=True) + return wrap_deepmind(env, frame_stack=True, scale=False) From 8607dca99ee31f8fb3f8d58ea2cbe44f393c7cc1 Mon Sep 17 00:00:00 2001 From: Christopher Hesse Date: Wed, 21 Nov 2018 14:57:10 -0800 Subject: [PATCH 2/4] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e4f8697..23487b6 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +**Status:** Active (under active development, breaking changes may occur) + [![Build status](https://travis-ci.org/openai/baselines.svg?branch=master)](https://travis-ci.org/openai/baselines) # Baselines From 7dc6bc7c702d4fd69a6c5a13c1c4cfc93ee073a5 Mon Sep 17 00:00:00 2001 From: Prabhat Nagarajan Date: Tue, 27 Nov 2018 09:19:09 +0900 Subject: [PATCH 3/4] fixes typo (#732) * fixes typo * adds apostrophe --- baselines/common/atari_wrappers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baselines/common/atari_wrappers.py b/baselines/common/atari_wrappers.py index 6f551a7..a08ab8f 100644 --- a/baselines/common/atari_wrappers.py +++ b/baselines/common/atari_wrappers.py @@ -72,8 +72,8 @@ class EpisodicLifeEnv(gym.Wrapper): # then update lives to handle bonus lives lives = self.env.unwrapped.ale.lives() if lives < self.lives and lives > 0: - # for Qbert sometimes we stay in lives == 0 condtion for a few frames - # so its important to keep lives > 0, so that we only reset once + # for Qbert sometimes we stay in lives == 0 condition for a few frames + # so it's important to keep lives > 0, so that we only reset once # the environment advertises done. done = True self.lives = lives From 25ecb64821056c4421d0dd87364794f6ff868e0e Mon Sep 17 00:00:00 2001 From: pzhokhov Date: Mon, 26 Nov 2018 16:30:37 -0800 Subject: [PATCH 4/4] fixed issue with wrong output layer variable names in ddpg (#733) --- baselines/ddpg/ddpg_learner.py | 2 +- baselines/ddpg/models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/baselines/ddpg/ddpg_learner.py b/baselines/ddpg/ddpg_learner.py index a065c98..3fc8a77 100755 --- a/baselines/ddpg/ddpg_learner.py +++ b/baselines/ddpg/ddpg_learner.py @@ -185,7 +185,7 @@ class DDPG(object): normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1]) self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf)) if self.critic_l2_reg > 0.: - critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name] + critic_reg_vars = [var for var in self.critic.trainable_vars if var.name.endswith('/w:0') and 'output' not in var.name] for var in critic_reg_vars: logger.info(' regularizing: {}'.format(var.name)) logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg)) diff --git a/baselines/ddpg/models.py b/baselines/ddpg/models.py index 329c168..bfde840 100755 --- a/baselines/ddpg/models.py +++ b/baselines/ddpg/models.py @@ -42,7 +42,7 @@ class Critic(Model): with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated x = self.network_builder(x) - x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) + x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output') return x @property