From 624231827c6fc852969e2664b52bf5e0690b0676 Mon Sep 17 00:00:00 2001 From: Peter Zhokhov Date: Mon, 13 Aug 2018 09:28:10 -0700 Subject: [PATCH] merged benchmarks branch --- .benchmark_pattern | 2 +- README.md | 1 + baselines/common/atari_wrappers.py | 3 ++- baselines/common/models.py | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.benchmark_pattern b/.benchmark_pattern index e53df25..8b13789 100644 --- a/.benchmark_pattern +++ b/.benchmark_pattern @@ -1 +1 @@ -ppo2 + diff --git a/README.md b/README.md index e8a4abb..92b5a5a 100644 --- a/README.md +++ b/README.md @@ -139,3 +139,4 @@ To cite this repository in publications: journal = {GitHub repository}, howpublished = {\url{https://github.com/openai/baselines}}, } + diff --git a/baselines/common/atari_wrappers.py b/baselines/common/atari_wrappers.py index 4598e23..6be3582 100644 --- a/baselines/common/atari_wrappers.py +++ b/baselines/common/atari_wrappers.py @@ -156,7 +156,7 @@ class FrameStack(gym.Wrapper): self.k = k self.frames = deque([], maxlen=k) shp = env.observation_space.shape - self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=np.uint8) + self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype) def reset(self): ob = self.env.reset() @@ -176,6 +176,7 @@ class FrameStack(gym.Wrapper): class ScaledFloatFrame(gym.ObservationWrapper): def __init__(self, env): gym.ObservationWrapper.__init__(self, env) + self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32) def observation(self, observation): # careful! This undoes the memory optimization, use diff --git a/baselines/common/models.py b/baselines/common/models.py index 0763095..edaa2eb 100644 --- a/baselines/common/models.py +++ b/baselines/common/models.py @@ -138,7 +138,7 @@ def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs): ''' def network_fn(X): - out = X + out = tf.cast(X, tf.float32) / 255. with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d(out,