Fix autodetect dtype warnings (#1234)

* Fix autodetect dtype warnings

* Use warnings module for gym logger

* Fix warning in tests
This commit is contained in:
Antonin RAFFIN
2018-11-29 02:27:27 +01:00
committed by pzhokhov
parent e09b0f50ef
commit cdd212db4b
12 changed files with 65 additions and 37 deletions

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@
*.py~ *.py~
.DS_Store .DS_Store
.cache .cache
.pytest_cache/
# Setuptools distribution and build folders. # Setuptools distribution and build folders.
/dist/ /dist/

View File

@@ -1,6 +1,7 @@
import sys, math import sys
import numpy as np import math
import numpy as np
import Box2D import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener) from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
@@ -137,9 +138,9 @@ class BipedalWalker(gym.Env, EzPickle):
self.reset() self.reset()
high = np.array([np.inf]*24) high = np.array([np.inf] * 24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1])) self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
self.observation_space = spaces.Box(-high, high) self.observation_space = spaces.Box(-high, high, dtype=np.float32)
def seed(self, seed=None): def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)

View File

@@ -1,9 +1,10 @@
"""classic Acrobot task""" """classic Acrobot task"""
from gym import core, spaces
from gym.utils import seeding
import numpy as np import numpy as np
from numpy import sin, cos, pi from numpy import sin, cos, pi
from gym import core, spaces
from gym.utils import seeding
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy" __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann", __credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
"William Dabney", "Jonathan P. How"] "William Dabney", "Jonathan P. How"]
@@ -86,7 +87,7 @@ class AcrobotEnv(core.Env):
self.viewer = None self.viewer = None
high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high low = -high
self.observation_space = spaces.Box(low=low, high=high) self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
self.action_space = spaces.Discrete(3) self.action_space = spaces.Discrete(3)
self.state = None self.state = None
self.seed() self.seed()

View File

@@ -9,16 +9,18 @@ of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia and then modified by Arnaud de Broissia
* the OpenAI/gym MountainCar environment * the OpenAI/gym MountainCar environment
itself from itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC permalink: https://perma.cc/6Z2N-PFWC
""" """
import math import math
import numpy as np
import gym import gym
from gym import spaces from gym import spaces
from gym.utils import seeding from gym.utils import seeding
import numpy as np
class Continuous_MountainCarEnv(gym.Env): class Continuous_MountainCarEnv(gym.Env):
metadata = { metadata = {
@@ -40,8 +42,10 @@ class Continuous_MountainCarEnv(gym.Env):
self.viewer = None self.viewer = None
self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,)) self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state) shape=(1,), dtype=np.float32)
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
dtype=np.float32)
self.seed() self.seed()
self.reset() self.reset()

View File

@@ -4,10 +4,12 @@ permalink: https://perma.cc/6Z2N-PFWC
""" """
import math import math
import numpy as np
import gym import gym
from gym import spaces from gym import spaces
from gym.utils import seeding from gym.utils import seeding
import numpy as np
class MountainCarEnv(gym.Env): class MountainCarEnv(gym.Env):
metadata = { metadata = {
@@ -27,7 +29,7 @@ class MountainCarEnv(gym.Env):
self.viewer = None self.viewer = None
self.action_space = spaces.Discrete(3) self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high) self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
self.seed() self.seed()
self.reset() self.reset()

View File

@@ -1,5 +1,6 @@
import numpy as np
import pytest import pytest
import numpy as np
from gym import envs from gym import envs
from gym.envs.tests.spec_list import spec_list from gym.envs.tests.spec_list import spec_list
@@ -8,7 +9,14 @@ from gym.envs.tests.spec_list import spec_list
# envs. # envs.
@pytest.mark.parametrize("spec", spec_list) @pytest.mark.parametrize("spec", spec_list)
def test_env(spec): def test_env(spec):
env = spec.make() # Capture warnings
with pytest.warns(None) as warnings:
env = spec.make()
# Check that dtype is explicitly declared for gym.Box spaces
for warning_msg in warnings:
assert not 'autodetected dtype' in str(warning_msg.message)
ob_space = env.observation_space ob_space = env.observation_space
act_space = env.action_space act_space = env.action_space
ob = env.reset() ob = env.reset()
@@ -40,4 +48,3 @@ def test_random_rollout():
(ob, _reward, done, _info) = env.step(a) (ob, _reward, done, _info) = env.step(a)
if done: break if done: break
env.close() env.close()

View File

@@ -1,7 +1,8 @@
import numpy as np
import gym import gym
from gym import spaces from gym import spaces
from gym.utils import seeding from gym.utils import seeding
import numpy as np
class GuessingGame(gym.Env): class GuessingGame(gym.Env):
@@ -40,7 +41,8 @@ class GuessingGame(gym.Env):
self.range = 1000 # Randomly selected number is within +/- this value self.range = 1000 # Randomly selected number is within +/- this value
self.bounds = 10000 self.bounds = 10000
self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
dtype=np.float32)
self.observation_space = spaces.Discrete(4) self.observation_space = spaces.Discrete(4)
self.number = 0 self.number = 0

View File

@@ -1,7 +1,8 @@
import numpy as np
import gym import gym
from gym import spaces from gym import spaces
from gym.utils import seeding from gym.utils import seeding
import numpy as np
class HotterColder(gym.Env): class HotterColder(gym.Env):
@@ -25,7 +26,8 @@ class HotterColder(gym.Env):
self.range = 1000 # +/- value the randomly select number can be between self.range = 1000 # +/- value the randomly select number can be between
self.bounds = 2000 # Action space bounds self.bounds = 2000 # Action space bounds
self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds])) self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
dtype=np.float32)
self.observation_space = spaces.Discrete(4) self.observation_space = spaces.Discrete(4)
self.number = 0 self.number = 0

View File

@@ -1,15 +1,18 @@
import gym
from gym import spaces
from gym.utils import seeding
from gym.spaces import prng
# for Generalized Kelly coinflip game distributions: # for Generalized Kelly coinflip game distributions:
from scipy.stats import genpareto from scipy.stats import genpareto
import numpy as np import numpy as np
import numpy.random import numpy.random
import gym
from gym import spaces
from gym.utils import seeding
from gym.spaces import prng
def flip(edge, np_random): def flip(edge, np_random):
return np_random.uniform() < edge return np_random.uniform() < edge
class KellyCoinflipEnv(gym.Env): class KellyCoinflipEnv(gym.Env):
"""The Kelly coinflip game is a simple gambling introduced by Haghani & Dewey 2016's 'Rational Decision-Making Under Uncertainty: Observed Betting Patterns on a Biased Coin' (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2856963), to test human decision-making in a setting like that of the stock market: positive expected value but highly stochastic; they found many subjects performed badly, often going broke, even though optimal play would reach the maximum with ~95% probability. In the coinflip game, the player starts with $25.00 to gamble over 300 rounds; each round, they can bet anywhere up to their net worth (in penny increments), and then a coin is flipped; with P=0.6, the player wins twice what they bet, otherwise, they lose it. $250 is the maximum players are allowed to have. At the end of the 300 rounds, they keep whatever they have. The human subjects earned an average of $91; a simple use of the Kelly criterion (https://en.wikipedia.org/wiki/Kelly_criterion), giving a strategy of betting 20% until the cap is hit, would earn $240; a decision tree analysis shows that optimal play earns $246 (https://www.gwern.net/Coin-flip). The game short-circuits when either wealth = $0 (since one can never recover) or wealth = cap (trivial optimal play: one simply bets nothing thereafter). In this implementation, we default to the paper settings of $25, 60% odds, wealth cap of $250, and 300 rounds. To specify the action space in advance, we multiply the wealth cap (in dollars) by 100 (to allow for all penny bets); should one attempt to bet more money than one has, it is rounded down to one's net worth. (Alternately, a mistaken bet could end the episode immediately; it's not clear to me which version would be better.) For a harder version which randomizes the 3 key parameters, see the Generalized Kelly coinflip game.""" """The Kelly coinflip game is a simple gambling introduced by Haghani & Dewey 2016's 'Rational Decision-Making Under Uncertainty: Observed Betting Patterns on a Biased Coin' (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2856963), to test human decision-making in a setting like that of the stock market: positive expected value but highly stochastic; they found many subjects performed badly, often going broke, even though optimal play would reach the maximum with ~95% probability. In the coinflip game, the player starts with $25.00 to gamble over 300 rounds; each round, they can bet anywhere up to their net worth (in penny increments), and then a coin is flipped; with P=0.6, the player wins twice what they bet, otherwise, they lose it. $250 is the maximum players are allowed to have. At the end of the 300 rounds, they keep whatever they have. The human subjects earned an average of $91; a simple use of the Kelly criterion (https://en.wikipedia.org/wiki/Kelly_criterion), giving a strategy of betting 20% until the cap is hit, would earn $240; a decision tree analysis shows that optimal play earns $246 (https://www.gwern.net/Coin-flip). The game short-circuits when either wealth = $0 (since one can never recover) or wealth = cap (trivial optimal play: one simply bets nothing thereafter). In this implementation, we default to the paper settings of $25, 60% odds, wealth cap of $250, and 300 rounds. To specify the action space in advance, we multiply the wealth cap (in dollars) by 100 (to allow for all penny bets); should one attempt to bet more money than one has, it is rounded down to one's net worth. (Alternately, a mistaken bet could end the episode immediately; it's not clear to me which version would be better.) For a harder version which randomizes the 3 key parameters, see the Generalized Kelly coinflip game."""
metadata = {'render.modes': ['human']} metadata = {'render.modes': ['human']}
@@ -17,7 +20,7 @@ class KellyCoinflipEnv(gym.Env):
self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
self.observation_space = spaces.Tuple(( self.observation_space = spaces.Tuple((
spaces.Box(0, maxWealth, [1]), # (w,b) spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b)
spaces.Discrete(maxRounds+1))) spaces.Discrete(maxRounds+1)))
self.reward_range = (0, maxWealth) self.reward_range = (0, maxWealth)
self.edge = edge self.edge = edge
@@ -95,11 +98,11 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
# the rest proceeds as before: # the rest proceeds as before:
self.action_space = spaces.Discrete(int(maxWealth*100)) self.action_space = spaces.Discrete(int(maxWealth*100))
self.observation_space = spaces.Tuple(( self.observation_space = spaces.Tuple((
spaces.Box(0, maxWealth, shape=[1]), # current wealth spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth
spaces.Discrete(maxRounds+1), # rounds elapsed spaces.Discrete(maxRounds+1), # rounds elapsed
spaces.Discrete(maxRounds+1), # wins spaces.Discrete(maxRounds+1), # wins
spaces.Discrete(maxRounds+1), # losses spaces.Discrete(maxRounds+1), # losses
spaces.Box(0, maxWealth, [1]))) # maximum observed wealth spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth
self.reward_range = (0, maxWealth) self.reward_range = (0, maxWealth)
self.edge = edge self.edge = edge
self.wealth = self.initialWealth self.wealth = self.initialWealth

View File

@@ -1,3 +1,5 @@
import warnings
from gym.utils import colorize from gym.utils import colorize
DEBUG = 10 DEBUG = 10
@@ -25,7 +27,7 @@ def info(msg, *args):
def warn(msg, *args): def warn(msg, *args):
if MIN_LEVEL <= WARN: if MIN_LEVEL <= WARN:
print(colorize('%s: %s'%('WARN', msg % args), 'yellow')) warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
def error(msg, *args): def error(msg, *args):
if MIN_LEVEL <= ERROR: if MIN_LEVEL <= ERROR:

View File

@@ -1,4 +1,5 @@
import numpy as np import numpy as np
import gym import gym
from gym import logger from gym import logger
@@ -28,7 +29,7 @@ class Box(gym.Space):
dtype = np.uint8 dtype = np.uint8
else: else:
dtype = np.float32 dtype = np.float32
logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype) logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype))
self.low = low.astype(dtype) self.low = low.astype(dtype)
self.high = high.astype(dtype) self.high = high.astype(dtype)
gym.Space.__init__(self, shape, dtype) gym.Space.__init__(self, shape, dtype)
@@ -47,6 +48,6 @@ class Box(gym.Space):
def __repr__(self): def __repr__(self):
return "Box" + str(self.shape) return "Box" + str(self.shape)
def __eq__(self, other): def __eq__(self, other):
return np.allclose(self.low, other.low) and np.allclose(self.high, other.high) return np.allclose(self.low, other.low) and np.allclose(self.high, other.high)

View File

@@ -10,10 +10,11 @@ from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict
@pytest.mark.parametrize("space", [ @pytest.mark.parametrize("space", [
Discrete(3), Discrete(3),
Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]), Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))), Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 100]),
Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}), Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
]) ])
def test_roundtripping(space): def test_roundtripping(space):
sample_1 = space.sample() sample_1 = space.sample()
@@ -39,11 +40,12 @@ def test_roundtripping(space):
Discrete(3), Discrete(3),
Box(low=np.array([-10, 0]),high=np.array([10, 10])), Box(low=np.array([-10, 0]),high=np.array([10, 10])),
Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]), Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))), Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 100]),
MultiBinary(6), MultiBinary(6),
Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}), Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
]) ])
def test_equality(space): def test_equality(space):
space1 = space space1 = space
@@ -55,8 +57,8 @@ def test_equality(space):
(Discrete(3), Discrete(4)), (Discrete(3), Discrete(4)),
(MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])), (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
(MultiBinary(8), MultiBinary(7)), (MultiBinary(8), MultiBinary(7)),
(Box(low=np.array([-10, 0]),high=np.array([10, 10])), (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
Box(low=np.array([-10, 0]),high=np.array([10, 9]))), Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
(Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])), (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
(Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})), (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
(Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})), (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),