Fix autodetect dtype warnings (#1234)

* Fix autodetect dtype warnings * Use warnings module for gym logger * Fix warning in tests
2025-07-31 13:54:31 +00:00 · 2018-11-29 02:27:27 +01:00
parent e09b0f50ef
commit cdd212db4b
12 changed files with 65 additions and 37 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 *.py~
 .DS_Store
 .cache
+.pytest_cache/

 # Setuptools distribution and build folders.
 /dist/
--- a/gym/envs/box2d/bipedal_walker.py
+++ b/gym/envs/box2d/bipedal_walker.py
@@ -1,6 +1,7 @@
-import sys, math
-import numpy as np
+import sys
+import math

+import numpy as np
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)

@@ -137,9 +138,9 @@ class BipedalWalker(gym.Env, EzPickle):

        self.reset()

-        high = np.array([np.inf]*24)
-        self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
-        self.observation_space = spaces.Box(-high, high)
+        high = np.array([np.inf] * 24)
+        self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
--- a/gym/envs/classic_control/acrobot.py
+++ b/gym/envs/classic_control/acrobot.py
@@ -1,9 +1,10 @@
 """classic Acrobot task"""
-from gym import core, spaces
-from gym.utils import seeding
 import numpy as np
 from numpy import sin, cos, pi

+from gym import core, spaces
+from gym.utils import seeding
+
 __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
 __credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
               "William Dabney", "Jonathan P. How"]
@@ -86,7 +87,7 @@ class AcrobotEnv(core.Env):
        self.viewer = None
        high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
        low = -high
-        self.observation_space = spaces.Box(low=low, high=high)
+        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
        self.action_space = spaces.Discrete(3)
        self.state = None
        self.seed()
--- a/gym/envs/classic_control/continuous_mountain_car.py
+++ b/gym/envs/classic_control/continuous_mountain_car.py
@@ -9,16 +9,18 @@ of Jose Antonio Martin H. (version 1.0), adapted by  'Tom Schaul, tom@idsia.ch'
 and then modified by Arnaud de Broissia

 * the OpenAI/gym MountainCar environment
-itself from 
+itself from
 http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
 permalink: https://perma.cc/6Z2N-PFWC
 """

 import math
+
+import numpy as np
+
 import gym
 from gym import spaces
 from gym.utils import seeding
-import numpy as np

 class Continuous_MountainCarEnv(gym.Env):
    metadata = {
@@ -40,8 +42,10 @@ class Continuous_MountainCarEnv(gym.Env):

        self.viewer = None

-        self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
-        self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
+        self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
+                                       shape=(1,), dtype=np.float32)
+        self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
+                                            dtype=np.float32)

        self.seed()
        self.reset()
--- a/gym/envs/classic_control/mountain_car.py
+++ b/gym/envs/classic_control/mountain_car.py
@@ -4,10 +4,12 @@ permalink: https://perma.cc/6Z2N-PFWC
 """

 import math
+
+import numpy as np
+
 import gym
 from gym import spaces
 from gym.utils import seeding
-import numpy as np

 class MountainCarEnv(gym.Env):
    metadata = {
@@ -27,7 +29,7 @@ class MountainCarEnv(gym.Env):
        self.viewer = None

        self.action_space = spaces.Discrete(3)
-        self.observation_space = spaces.Box(self.low, self.high)
+        self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)

        self.seed()
        self.reset()
--- a/gym/envs/tests/test_envs.py
+++ b/gym/envs/tests/test_envs.py
@@ -1,5 +1,6 @@
-import numpy as np
 import pytest
+import numpy as np
+
 from gym import envs
 from gym.envs.tests.spec_list import spec_list

@@ -8,7 +9,14 @@ from gym.envs.tests.spec_list import spec_list
 # envs.
@pytest.mark.parametrize("spec", spec_list)
 def test_env(spec):
-    env = spec.make()
+    # Capture warnings
+    with pytest.warns(None) as warnings:
+        env = spec.make()
+
+    # Check that dtype is explicitly declared for gym.Box spaces
+    for warning_msg in warnings:
+        assert not 'autodetected dtype' in str(warning_msg.message)
+
    ob_space = env.observation_space
    act_space = env.action_space
    ob = env.reset()
@@ -40,4 +48,3 @@ def test_random_rollout():
            (ob, _reward, done, _info) = env.step(a)
            if done: break
        env.close()
-
--- a/gym/envs/toy_text/guessing_game.py
+++ b/gym/envs/toy_text/guessing_game.py
@@ -1,7 +1,8 @@
+import numpy as np
+
 import gym
 from gym import spaces
 from gym.utils import seeding
-import numpy as np


 class GuessingGame(gym.Env):
@@ -40,7 +41,8 @@ class GuessingGame(gym.Env):
        self.range = 1000  # Randomly selected number is within +/- this value
        self.bounds = 10000

-        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
+        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
+                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)

        self.number = 0
--- a/gym/envs/toy_text/hotter_colder.py
+++ b/gym/envs/toy_text/hotter_colder.py
@@ -1,7 +1,8 @@
+import numpy as np
+
 import gym
 from gym import spaces
 from gym.utils import seeding
-import numpy as np


 class HotterColder(gym.Env):
@@ -25,7 +26,8 @@ class HotterColder(gym.Env):
        self.range = 1000  # +/- value the randomly select number can be between
        self.bounds = 2000  # Action space bounds

-        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
+        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
+                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)

        self.number = 0
--- a/gym/envs/toy_text/kellycoinflip.py
+++ b/gym/envs/toy_text/kellycoinflip.py
@@ -1,15 +1,18 @@
-import gym
-from gym import spaces
-from gym.utils import seeding
-from gym.spaces import prng
 # for Generalized Kelly coinflip game distributions:
 from scipy.stats import genpareto
 import numpy as np
 import numpy.random

+import gym
+from gym import spaces
+from gym.utils import seeding
+from gym.spaces import prng
+
+
 def flip(edge, np_random):
    return np_random.uniform() < edge

+
 class KellyCoinflipEnv(gym.Env):
    """The Kelly coinflip game is a simple gambling introduced by Haghani & Dewey 2016's 'Rational Decision-Making Under Uncertainty: Observed Betting Patterns on a Biased Coin' (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2856963), to test human decision-making in a setting like that of the stock market: positive expected value but highly stochastic; they found many subjects performed badly, often going broke, even though optimal play would reach the maximum with ~95% probability. In the coinflip game, the player starts with $25.00 to gamble over 300 rounds; each round, they can bet anywhere up to their net worth (in penny increments), and then a coin is flipped; with P=0.6, the player wins twice what they bet, otherwise, they lose it. $250 is the maximum players are allowed to have. At the end of the 300 rounds, they keep whatever they have. The human subjects earned an average of $91; a simple use of the Kelly criterion (https://en.wikipedia.org/wiki/Kelly_criterion), giving a strategy of betting 20% until the cap is hit, would earn $240; a decision tree analysis shows that optimal play earns $246 (https://www.gwern.net/Coin-flip). The game short-circuits when either wealth = $0 (since one can never recover) or wealth = cap (trivial optimal play: one simply bets nothing thereafter). In this implementation, we default to the paper settings of $25, 60% odds, wealth cap of $250, and 300 rounds. To specify the action space in advance, we multiply the wealth cap (in dollars) by 100 (to allow for all penny bets); should one attempt to bet more money than one has, it is rounded down to one's net worth. (Alternately, a mistaken bet could end the episode immediately; it's not clear to me which version would be better.) For a harder version which randomizes the 3 key parameters, see the Generalized Kelly coinflip game."""
    metadata = {'render.modes': ['human']}
@@ -17,7 +20,7 @@ class KellyCoinflipEnv(gym.Env):

        self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
        self.observation_space = spaces.Tuple((
-            spaces.Box(0, maxWealth, [1]), # (w,b)
+            spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b)
            spaces.Discrete(maxRounds+1)))
        self.reward_range = (0, maxWealth)
        self.edge = edge
@@ -95,11 +98,11 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(maxWealth*100))
        self.observation_space = spaces.Tuple((
-            spaces.Box(0, maxWealth, shape=[1]), # current wealth
+            spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth
            spaces.Discrete(maxRounds+1), # rounds elapsed
            spaces.Discrete(maxRounds+1), # wins
            spaces.Discrete(maxRounds+1), # losses
-            spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
+            spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = self.initialWealth
--- a/gym/logger.py
+++ b/gym/logger.py
@@ -1,3 +1,5 @@
+import warnings
+
 from gym.utils import colorize

 DEBUG = 10
@@ -25,7 +27,7 @@ def info(msg, *args):

 def warn(msg, *args):
    if MIN_LEVEL <= WARN:
-        print(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
+        warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))

 def error(msg, *args):
    if MIN_LEVEL <= ERROR:
--- a/gym/spaces/box.py
+++ b/gym/spaces/box.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 import gym
 from gym import logger

@@ -28,7 +29,7 @@ class Box(gym.Space):
                dtype = np.uint8
            else:
                dtype = np.float32
-            logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
+            logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype))
        self.low = low.astype(dtype)
        self.high = high.astype(dtype)
        gym.Space.__init__(self, shape, dtype)
@@ -47,6 +48,6 @@ class Box(gym.Space):

    def __repr__(self):
        return "Box" + str(self.shape)
-        
+
    def __eq__(self, other):
        return np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
--- a/gym/spaces/tests/test_spaces.py
+++ b/gym/spaces/tests/test_spaces.py
@@ -10,10 +10,11 @@ from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict
@pytest.mark.parametrize("space", [
              Discrete(3),
              Tuple([Discrete(5), Discrete(10)]),
-              Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
+              Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
              Tuple((Discrete(5), Discrete(2), Discrete(2))),
              MultiDiscrete([2, 2, 100]),
-              Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
+              Dict({"position": Discrete(5),
+                    "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
              ])
 def test_roundtripping(space):
    sample_1 = space.sample()
@@ -39,11 +40,12 @@ def test_roundtripping(space):
              Discrete(3),
              Box(low=np.array([-10, 0]),high=np.array([10, 10])),
              Tuple([Discrete(5), Discrete(10)]),
-              Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
+              Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
              Tuple((Discrete(5), Discrete(2), Discrete(2))),
              MultiDiscrete([2, 2, 100]),
              MultiBinary(6),
-              Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
+              Dict({"position": Discrete(5),
+                    "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
              ])
 def test_equality(space):
    space1 = space
@@ -55,8 +57,8 @@ def test_equality(space):
              (Discrete(3), Discrete(4)),
              (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
              (MultiBinary(8), MultiBinary(7)),
-              (Box(low=np.array([-10, 0]),high=np.array([10, 10])),
-                Box(low=np.array([-10, 0]),high=np.array([10, 9]))),
+              (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
+                Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
              (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
              (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
              (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),