Merge branch 'master' of github.com:openai/baselines into internal
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
**Status:** Active (under active development, breaking changes may occur)
|
||||
|
||||
<img src="data/logo.jpg" width=25% align="right" /> [](https://travis-ci.org/openai/baselines)
|
||||
|
||||
# Baselines
|
||||
|
@@ -72,8 +72,8 @@ class EpisodicLifeEnv(gym.Wrapper):
|
||||
# then update lives to handle bonus lives
|
||||
lives = self.env.unwrapped.ale.lives()
|
||||
if lives < self.lives and lives > 0:
|
||||
# for Qbert sometimes we stay in lives == 0 condtion for a few frames
|
||||
# so its important to keep lives > 0, so that we only reset once
|
||||
# for Qbert sometimes we stay in lives == 0 condition for a few frames
|
||||
# so it's important to keep lives > 0, so that we only reset once
|
||||
# the environment advertises done.
|
||||
done = True
|
||||
self.lives = lives
|
||||
|
@@ -185,7 +185,7 @@ class DDPG(object):
|
||||
normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1])
|
||||
self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf))
|
||||
if self.critic_l2_reg > 0.:
|
||||
critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name]
|
||||
critic_reg_vars = [var for var in self.critic.trainable_vars if var.name.endswith('/w:0') and 'output' not in var.name]
|
||||
for var in critic_reg_vars:
|
||||
logger.info(' regularizing: {}'.format(var.name))
|
||||
logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg))
|
||||
|
@@ -42,7 +42,7 @@ class Critic(Model):
|
||||
with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
|
||||
x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated
|
||||
x = self.network_builder(x)
|
||||
x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3))
|
||||
x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output')
|
||||
return x
|
||||
|
||||
@property
|
||||
|
@@ -5,4 +5,4 @@ from baselines.deepq.replay_buffer import ReplayBuffer, PrioritizedReplayBuffer
|
||||
|
||||
def wrap_atari_dqn(env):
|
||||
from baselines.common.atari_wrappers import wrap_deepmind
|
||||
return wrap_deepmind(env, frame_stack=True, scale=True)
|
||||
return wrap_deepmind(env, frame_stack=True, scale=False)
|
||||
|
Reference in New Issue
Block a user