Fix autodetect dtype warnings (#1234)

* Fix autodetect dtype warnings

* Use warnings module for gym logger

* Fix warning in tests
This commit is contained in:
Antonin RAFFIN
2018-11-29 02:27:27 +01:00
committed by pzhokhov
parent e09b0f50ef
commit cdd212db4b
12 changed files with 65 additions and 37 deletions

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@
*.py~
.DS_Store
.cache
.pytest_cache/
# Setuptools distribution and build folders.
/dist/

View File

@@ -1,6 +1,7 @@
import sys, math
import numpy as np
import sys
import math
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
@@ -137,9 +138,9 @@ class BipedalWalker(gym.Env, EzPickle):
self.reset()
high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
self.observation_space = spaces.Box(-high, high)
high = np.array([np.inf] * 24)
self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)

View File

@@ -1,9 +1,10 @@
"""classic Acrobot task"""
from gym import core, spaces
from gym.utils import seeding
import numpy as np
from numpy import sin, cos, pi
from gym import core, spaces
from gym.utils import seeding
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
"William Dabney", "Jonathan P. How"]
@@ -86,7 +87,7 @@ class AcrobotEnv(core.Env):
self.viewer = None
high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low=low, high=high)
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
self.action_space = spaces.Discrete(3)
self.state = None
self.seed()

View File

@@ -9,16 +9,18 @@ of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia
* the OpenAI/gym MountainCar environment
itself from
itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
class Continuous_MountainCarEnv(gym.Env):
metadata = {
@@ -40,8 +42,10 @@ class Continuous_MountainCarEnv(gym.Env):
self.viewer = None
self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
shape=(1,), dtype=np.float32)
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
dtype=np.float32)
self.seed()
self.reset()

View File

@@ -4,10 +4,12 @@ permalink: https://perma.cc/6Z2N-PFWC
"""
import math
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
class MountainCarEnv(gym.Env):
metadata = {
@@ -27,7 +29,7 @@ class MountainCarEnv(gym.Env):
self.viewer = None
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high)
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
self.seed()
self.reset()

View File

@@ -1,5 +1,6 @@
import numpy as np
import pytest
import numpy as np
from gym import envs
from gym.envs.tests.spec_list import spec_list
@@ -8,7 +9,14 @@ from gym.envs.tests.spec_list import spec_list
# envs.
@pytest.mark.parametrize("spec", spec_list)
def test_env(spec):
env = spec.make()
# Capture warnings
with pytest.warns(None) as warnings:
env = spec.make()
# Check that dtype is explicitly declared for gym.Box spaces
for warning_msg in warnings:
assert not 'autodetected dtype' in str(warning_msg.message)
ob_space = env.observation_space
act_space = env.action_space
ob = env.reset()
@@ -40,4 +48,3 @@ def test_random_rollout():
(ob, _reward, done, _info) = env.step(a)
if done: break
env.close()

View File

@@ -1,7 +1,8 @@
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
class GuessingGame(gym.Env):
@@ -40,7 +41,8 @@ class GuessingGame(gym.Env):
self.range = 1000 # Randomly selected number is within +/- this value
self.bounds = 10000
self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
dtype=np.float32)
self.observation_space = spaces.Discrete(4)
self.number = 0

View File

@@ -1,7 +1,8 @@
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
class HotterColder(gym.Env):
@@ -25,7 +26,8 @@ class HotterColder(gym.Env):
self.range = 1000 # +/- value the randomly select number can be between
self.bounds = 2000 # Action space bounds
self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
dtype=np.float32)
self.observation_space = spaces.Discrete(4)
self.number = 0

View File

@@ -1,15 +1,18 @@
import gym
from gym import spaces
from gym.utils import seeding
from gym.spaces import prng
# for Generalized Kelly coinflip game distributions:
from scipy.stats import genpareto
import numpy as np
import numpy.random
import gym
from gym import spaces
from gym.utils import seeding
from gym.spaces import prng
def flip(edge, np_random):
return np_random.uniform() < edge
class KellyCoinflipEnv(gym.Env):
"""The Kelly coinflip game is a simple gambling introduced by Haghani & Dewey 2016's 'Rational Decision-Making Under Uncertainty: Observed Betting Patterns on a Biased Coin' (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2856963), to test human decision-making in a setting like that of the stock market: positive expected value but highly stochastic; they found many subjects performed badly, often going broke, even though optimal play would reach the maximum with ~95% probability. In the coinflip game, the player starts with $25.00 to gamble over 300 rounds; each round, they can bet anywhere up to their net worth (in penny increments), and then a coin is flipped; with P=0.6, the player wins twice what they bet, otherwise, they lose it. $250 is the maximum players are allowed to have. At the end of the 300 rounds, they keep whatever they have. The human subjects earned an average of $91; a simple use of the Kelly criterion (https://en.wikipedia.org/wiki/Kelly_criterion), giving a strategy of betting 20% until the cap is hit, would earn $240; a decision tree analysis shows that optimal play earns $246 (https://www.gwern.net/Coin-flip). The game short-circuits when either wealth = $0 (since one can never recover) or wealth = cap (trivial optimal play: one simply bets nothing thereafter). In this implementation, we default to the paper settings of $25, 60% odds, wealth cap of $250, and 300 rounds. To specify the action space in advance, we multiply the wealth cap (in dollars) by 100 (to allow for all penny bets); should one attempt to bet more money than one has, it is rounded down to one's net worth. (Alternately, a mistaken bet could end the episode immediately; it's not clear to me which version would be better.) For a harder version which randomizes the 3 key parameters, see the Generalized Kelly coinflip game."""
metadata = {'render.modes': ['human']}
@@ -17,7 +20,7 @@ class KellyCoinflipEnv(gym.Env):
self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
self.observation_space = spaces.Tuple((
spaces.Box(0, maxWealth, [1]), # (w,b)
spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b)
spaces.Discrete(maxRounds+1)))
self.reward_range = (0, maxWealth)
self.edge = edge
@@ -95,11 +98,11 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
# the rest proceeds as before:
self.action_space = spaces.Discrete(int(maxWealth*100))
self.observation_space = spaces.Tuple((
spaces.Box(0, maxWealth, shape=[1]), # current wealth
spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth
spaces.Discrete(maxRounds+1), # rounds elapsed
spaces.Discrete(maxRounds+1), # wins
spaces.Discrete(maxRounds+1), # losses
spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth
self.reward_range = (0, maxWealth)
self.edge = edge
self.wealth = self.initialWealth

View File

@@ -1,3 +1,5 @@
import warnings
from gym.utils import colorize
DEBUG = 10
@@ -25,7 +27,7 @@ def info(msg, *args):
def warn(msg, *args):
if MIN_LEVEL <= WARN:
print(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
def error(msg, *args):
if MIN_LEVEL <= ERROR:

View File

@@ -1,4 +1,5 @@
import numpy as np
import gym
from gym import logger
@@ -28,7 +29,7 @@ class Box(gym.Space):
dtype = np.uint8
else:
dtype = np.float32
logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype))
self.low = low.astype(dtype)
self.high = high.astype(dtype)
gym.Space.__init__(self, shape, dtype)
@@ -47,6 +48,6 @@ class Box(gym.Space):
def __repr__(self):
return "Box" + str(self.shape)
def __eq__(self, other):
return np.allclose(self.low, other.low) and np.allclose(self.high, other.high)

View File

@@ -10,10 +10,11 @@ from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict
@pytest.mark.parametrize("space", [
Discrete(3),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
def test_roundtripping(space):
sample_1 = space.sample()
@@ -39,11 +40,12 @@ def test_roundtripping(space):
Discrete(3),
Box(low=np.array([-10, 0]),high=np.array([10, 10])),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(6),
Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
def test_equality(space):
space1 = space
@@ -55,8 +57,8 @@ def test_equality(space):
(Discrete(3), Discrete(4)),
(MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
(MultiBinary(8), MultiBinary(7)),
(Box(low=np.array([-10, 0]),high=np.array([10, 10])),
Box(low=np.array([-10, 0]),high=np.array([10, 9]))),
(Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
(Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
(Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
(Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),