Fix autodetect dtype warnings (#1234)

* Fix autodetect dtype warnings * Use warnings module for gym logger * Fix warning in tests
2025-08-01 14:10:30 +00:00 · 2018-11-29 02:27:27 +01:00
parent e09b0f50ef
commit cdd212db4b
12 changed files with 65 additions and 37 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 *.py~
 .DS_Store
 .cache
 .pytest_cache/
 # Setuptools distribution and build folders.
 /dist/
--- a/gym/envs/box2d/bipedal_walker.py
+++ b/gym/envs/box2d/bipedal_walker.py
@@ -1,6 +1,7 @@
-import sys, math
+import sys
-import numpy as np
+import math
 import numpy as np
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
@@ -137,9 +138,9 @@ class BipedalWalker(gym.Env, EzPickle):
        self.reset()
-        high = np.array([np.inf]*24)
+        high = np.array([np.inf] * 24)
-        self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
+        self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
-        self.observation_space = spaces.Box(-high, high)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
--- a/gym/envs/classic_control/acrobot.py
+++ b/gym/envs/classic_control/acrobot.py
@@ -1,9 +1,10 @@
 """classic Acrobot task"""
 from gym import core, spaces
 from gym.utils import seeding
 import numpy as np
 from numpy import sin, cos, pi
 from gym import core, spaces
 from gym.utils import seeding
 __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
 __credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
               "William Dabney", "Jonathan P. How"]
@@ -86,7 +87,7 @@ class AcrobotEnv(core.Env):
        self.viewer = None
        high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
        low = -high
-        self.observation_space = spaces.Box(low=low, high=high)
+        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
        self.action_space = spaces.Discrete(3)
        self.state = None
        self.seed()
--- a/gym/envs/classic_control/continuous_mountain_car.py
+++ b/gym/envs/classic_control/continuous_mountain_car.py
@@ -9,16 +9,18 @@ of Jose Antonio Martin H. (version 1.0), adapted by  'Tom Schaul, tom@idsia.ch'
 and then modified by Arnaud de Broissia
 * the OpenAI/gym MountainCar environment
-itself from 
+itself from
 http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
 permalink: https://perma.cc/6Z2N-PFWC
 """
 import math
 import numpy as np
 import gym
 from gym import spaces
 from gym.utils import seeding
 import numpy as np
 class Continuous_MountainCarEnv(gym.Env):
    metadata = {
@@ -40,8 +42,10 @@ class Continuous_MountainCarEnv(gym.Env):
        self.viewer = None
-        self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
+        self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
-        self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
+                                       shape=(1,), dtype=np.float32)
        self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
                                            dtype=np.float32)
        self.seed()
        self.reset()
--- a/gym/envs/classic_control/mountain_car.py
+++ b/gym/envs/classic_control/mountain_car.py
@@ -4,10 +4,12 @@ permalink: https://perma.cc/6Z2N-PFWC
 """
 import math
 import numpy as np
 import gym
 from gym import spaces
 from gym.utils import seeding
 import numpy as np
 class MountainCarEnv(gym.Env):
    metadata = {
@@ -27,7 +29,7 @@ class MountainCarEnv(gym.Env):
        self.viewer = None
        self.action_space = spaces.Discrete(3)
-        self.observation_space = spaces.Box(self.low, self.high)
+        self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
        self.seed()
        self.reset()
--- a/gym/envs/tests/test_envs.py
+++ b/gym/envs/tests/test_envs.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
 import numpy as np
 from gym import envs
 from gym.envs.tests.spec_list import spec_list
@@ -8,7 +9,14 @@ from gym.envs.tests.spec_list import spec_list
 # envs.
@pytest.mark.parametrize("spec", spec_list)
 def test_env(spec):
-    env = spec.make()
+    # Capture warnings
    with pytest.warns(None) as warnings:
        env = spec.make()
    # Check that dtype is explicitly declared for gym.Box spaces
    for warning_msg in warnings:
        assert not 'autodetected dtype' in str(warning_msg.message)
    ob_space = env.observation_space
    act_space = env.action_space
    ob = env.reset()
@@ -40,4 +48,3 @@ def test_random_rollout():
            (ob, _reward, done, _info) = env.step(a)
            if done: break
        env.close()
--- a/gym/envs/toy_text/guessing_game.py
+++ b/gym/envs/toy_text/guessing_game.py
@@ -1,7 +1,8 @@
 import numpy as np
 import gym
 from gym import spaces
 from gym.utils import seeding
 import numpy as np
 class GuessingGame(gym.Env):
@@ -40,7 +41,8 @@ class GuessingGame(gym.Env):
        self.range = 1000  # Randomly selected number is within +/- this value
        self.bounds = 10000
-        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
+        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)
        self.number = 0
--- a/gym/envs/toy_text/hotter_colder.py
+++ b/gym/envs/toy_text/hotter_colder.py
@@ -1,7 +1,8 @@
 import numpy as np
 import gym
 from gym import spaces
 from gym.utils import seeding
 import numpy as np
 class HotterColder(gym.Env):
@@ -25,7 +26,8 @@ class HotterColder(gym.Env):
        self.range = 1000  # +/- value the randomly select number can be between
        self.bounds = 2000  # Action space bounds
-        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
+        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)
        self.number = 0
--- a/gym/envs/toy_text/kellycoinflip.py
+++ b/gym/envs/toy_text/kellycoinflip.py
@@ -1,15 +1,18 @@
 import gym
 from gym import spaces
 from gym.utils import seeding
 from gym.spaces import prng
 # for Generalized Kelly coinflip game distributions:
 from scipy.stats import genpareto
 import numpy as np
 import numpy.random
 import gym
 from gym import spaces
 from gym.utils import seeding
 from gym.spaces import prng
 def flip(edge, np_random):
    return np_random.uniform() < edge
 class KellyCoinflipEnv(gym.Env):
    """The Kelly coinflip game is a simple gambling introduced by Haghani & Dewey 2016's 'Rational Decision-Making Under Uncertainty: Observed Betting Patterns on a Biased Coin' (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2856963), to test human decision-making in a setting like that of the stock market: positive expected value but highly stochastic; they found many subjects performed badly, often going broke, even though optimal play would reach the maximum with ~95% probability. In the coinflip game, the player starts with $25.00 to gamble over 300 rounds; each round, they can bet anywhere up to their net worth (in penny increments), and then a coin is flipped; with P=0.6, the player wins twice what they bet, otherwise, they lose it. $250 is the maximum players are allowed to have. At the end of the 300 rounds, they keep whatever they have. The human subjects earned an average of $91; a simple use of the Kelly criterion (https://en.wikipedia.org/wiki/Kelly_criterion), giving a strategy of betting 20% until the cap is hit, would earn $240; a decision tree analysis shows that optimal play earns $246 (https://www.gwern.net/Coin-flip). The game short-circuits when either wealth = $0 (since one can never recover) or wealth = cap (trivial optimal play: one simply bets nothing thereafter). In this implementation, we default to the paper settings of $25, 60% odds, wealth cap of $250, and 300 rounds. To specify the action space in advance, we multiply the wealth cap (in dollars) by 100 (to allow for all penny bets); should one attempt to bet more money than one has, it is rounded down to one's net worth. (Alternately, a mistaken bet could end the episode immediately; it's not clear to me which version would be better.) For a harder version which randomizes the 3 key parameters, see the Generalized Kelly coinflip game."""
    metadata = {'render.modes': ['human']}
@@ -17,7 +20,7 @@ class KellyCoinflipEnv(gym.Env):
        self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
        self.observation_space = spaces.Tuple((
-            spaces.Box(0, maxWealth, [1]), # (w,b)
+            spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b)
            spaces.Discrete(maxRounds+1)))
        self.reward_range = (0, maxWealth)
        self.edge = edge
@@ -95,11 +98,11 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(maxWealth*100))
        self.observation_space = spaces.Tuple((
-            spaces.Box(0, maxWealth, shape=[1]), # current wealth
+            spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth
            spaces.Discrete(maxRounds+1), # rounds elapsed
            spaces.Discrete(maxRounds+1), # wins
            spaces.Discrete(maxRounds+1), # losses
-            spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
+            spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = self.initialWealth
--- a/gym/logger.py
+++ b/gym/logger.py
@@ -1,3 +1,5 @@
 import warnings
 from gym.utils import colorize
 DEBUG = 10
@@ -25,7 +27,7 @@ def info(msg, *args):
 def warn(msg, *args):
    if MIN_LEVEL <= WARN:
-        print(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
+        warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
 def error(msg, *args):
    if MIN_LEVEL <= ERROR:
--- a/gym/spaces/box.py
+++ b/gym/spaces/box.py
@@ -1,4 +1,5 @@
 import numpy as np
 import gym
 from gym import logger
@@ -28,7 +29,7 @@ class Box(gym.Space):
                dtype = np.uint8
            else:
                dtype = np.float32
-            logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
+            logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype))
        self.low = low.astype(dtype)
        self.high = high.astype(dtype)
        gym.Space.__init__(self, shape, dtype)
@@ -47,6 +48,6 @@ class Box(gym.Space):
    def __repr__(self):
        return "Box" + str(self.shape)
-        
+
    def __eq__(self, other):
        return np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
--- a/gym/spaces/tests/test_spaces.py
+++ b/gym/spaces/tests/test_spaces.py
@@ -10,10 +10,11 @@ from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict
@pytest.mark.parametrize("space", [
              Discrete(3),
              Tuple([Discrete(5), Discrete(10)]),
-              Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
+              Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
              Tuple((Discrete(5), Discrete(2), Discrete(2))),
              MultiDiscrete([2, 2, 100]),
-              Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
+              Dict({"position": Discrete(5),
                    "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
              ])
 def test_roundtripping(space):
    sample_1 = space.sample()
@@ -39,11 +40,12 @@ def test_roundtripping(space):
              Discrete(3),
              Box(low=np.array([-10, 0]),high=np.array([10, 10])),
              Tuple([Discrete(5), Discrete(10)]),
-              Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
+              Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
              Tuple((Discrete(5), Discrete(2), Discrete(2))),
              MultiDiscrete([2, 2, 100]),
              MultiBinary(6),
-              Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
+              Dict({"position": Discrete(5),
                    "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
              ])
 def test_equality(space):
    space1 = space
@@ -55,8 +57,8 @@ def test_equality(space):
              (Discrete(3), Discrete(4)),
              (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
              (MultiBinary(8), MultiBinary(7)),
-              (Box(low=np.array([-10, 0]),high=np.array([10, 10])),
+              (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
-                Box(low=np.array([-10, 0]),high=np.array([10, 9]))),
+                Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
              (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
              (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
              (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),