From 624231827c6fc852969e2664b52bf5e0690b0676 Mon Sep 17 00:00:00 2001
From: Peter Zhokhov <peterz@openai.com>
Date: Mon, 13 Aug 2018 09:28:10 -0700
Subject: [PATCH] merged benchmarks branch

---
 .benchmark_pattern                 | 2 +-
 README.md                          | 1 +
 baselines/common/atari_wrappers.py | 3 ++-
 baselines/common/models.py         | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.benchmark_pattern b/.benchmark_pattern
index e53df25..8b13789 100644
--- a/.benchmark_pattern
+++ b/.benchmark_pattern
@@ -1 +1 @@
-ppo2
+
diff --git a/README.md b/README.md
index e8a4abb..92b5a5a 100644
--- a/README.md
+++ b/README.md
@@ -139,3 +139,4 @@ To cite this repository in publications:
       journal = {GitHub repository},
       howpublished = {\url{https://github.com/openai/baselines}},
     }
+
diff --git a/baselines/common/atari_wrappers.py b/baselines/common/atari_wrappers.py
index 4598e23..6be3582 100644
--- a/baselines/common/atari_wrappers.py
+++ b/baselines/common/atari_wrappers.py
@@ -156,7 +156,7 @@ class FrameStack(gym.Wrapper):
         self.k = k
         self.frames = deque([], maxlen=k)
         shp = env.observation_space.shape
-        self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=np.uint8)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)
 
     def reset(self):
         ob = self.env.reset()
@@ -176,6 +176,7 @@ class FrameStack(gym.Wrapper):
 class ScaledFloatFrame(gym.ObservationWrapper):
     def __init__(self, env):
         gym.ObservationWrapper.__init__(self, env)
+        self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
 
     def observation(self, observation):
         # careful! This undoes the memory optimization, use
diff --git a/baselines/common/models.py b/baselines/common/models.py
index 0763095..edaa2eb 100644
--- a/baselines/common/models.py
+++ b/baselines/common/models.py
@@ -138,7 +138,7 @@ def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs):
     '''
 
     def network_fn(X):
-        out = X
+        out = tf.cast(X, tf.float32) / 255.
         with tf.variable_scope("convnet"):
             for num_outputs, kernel_size, stride in convs:
                 out = layers.convolution2d(out,