Files
Gymnasium/gym/vector/utils/spaces.py
Mark Towers 3354451300 Fixed batch spaces where the original space's seed was ignored. Issue 2680 (#2727)
* Add a case for the Box shape where the low and high values are both scalars

* Add seeding.RandomNumberGenerator parameter to Dict seed. Modify __repr__ for the dictionary space string looks similar to an actual dictionary

* Add seeding.RandomNumberGenerator parameter to Multi Binary seed

* Add seeding.RandomNumberGenerator parameter to Multi Binary seed. Modify nvec typing to include np.ndarray

* Space seed typing can be a seeding.RandomNumberGenerator. If a seeding.RNG is provided then it is assigned to _np_random and .seed is not run

* Fixed the tuple seeding type as List[int] is not a valid Space seed type

* Added typing to batch_space. The batch_space seed is equal to the space's seeding

* Fixed the seeding type

* Add test for batch space seeds are identical to the original space's seeding

* Add equivalence function for RandomNumberGenerator comparing the bit_generator.state

* The batch_space functions uses a copy of the seed for the original space

* Set the action space seed for sync_vector_env seed testing

* Add test for the seeding of the sync vector environment

* Update the test_batch_space_seed to check the resulting sampling are equivalent for testing

* Revert representation back to the original version

* Remove additional Box shape initialisation

* Remove additional typing of MultiDiscrete

* Fixed bug of Space batch space where the original space's np_random is not a complete copy of the original space

* Add CustomSpace to the batched space seed test

* Modify the CustomSpace sample to produce a random number not a static value

* Fix CustomSpace to reflect the sample function

* Copy the space.np_random for the batched_space seed to ensure that the original space doesn't sampling doesn't effect the batched_space

* Parameterized the batch_space_seed, added testing for rng_different_at_each_index and test_deterministic

* Black and isort pre-commit changes

* Pre-commit fix

* MacOS, test_read_from_shared_memory throws an error that the inner _process_write function was unpicklable. Making the function a top-level function solves this error

* Fixed typing of seed where a space's seed function differs from Space.seed's typing

* Added check that the sample lengths are equal and explicitly provided the number of batched spaces n=1

* Removed relative imports for absolute imports

* Use deepcopy instead of copy

* Replaces `from numpy.testing._private.utils import assert_array_equal` with `from numpy.testing import assert_array_equal`

* Using the seeding `__eq__` function, replace `np_random.bit_generator.state` with `np_random`

* Added docstrings and comments to the tests to explain their purpose

* Remove __eq__ from RandomNumberGenerator and add to tests/vector/utils

* Add sync vector determinism test for issue #2680

* Fixed bug for 462101d384 (r850740825)

* Made the new seeds a list of integers
2022-04-24 12:14:33 -04:00

216 lines
6.1 KiB
Python

from collections import OrderedDict
from copy import deepcopy
from functools import singledispatch
import numpy as np
from gym.error import CustomSpaceError
from gym.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Space, Tuple
_BaseGymSpaces = (Box, Discrete, MultiDiscrete, MultiBinary)
__all__ = ["_BaseGymSpaces", "batch_space", "iterate"]
@singledispatch
def batch_space(space, n=1):
"""Create a (batched) space, containing multiple copies of a single space.
Parameters
----------
space : `gym.spaces.Space` instance
Space (e.g. the observation space) for a single environment in the
vectorized environment.
n : int
Number of environments in the vectorized environment.
Returns
-------
batched_space : `gym.spaces.Space` instance
Space (e.g. the observation space) for a batch of environments in the
vectorized environment.
Example
-------
>>> from gym.spaces import Box, Dict
>>> space = Dict({
... 'position': Box(low=0, high=1, shape=(3,), dtype=np.float32),
... 'velocity': Box(low=0, high=1, shape=(2,), dtype=np.float32)})
>>> batch_space(space, n=5)
Dict(position:Box(5, 3), velocity:Box(5, 2))
"""
raise ValueError(
f"Cannot batch space with type `{type(space)}`. The space must be a valid `gym.Space` instance."
)
@batch_space.register(Box)
def _batch_space_box(space, n=1):
repeats = tuple([n] + [1] * space.low.ndim)
low, high = np.tile(space.low, repeats), np.tile(space.high, repeats)
return Box(low=low, high=high, dtype=space.dtype, seed=deepcopy(space.np_random))
@batch_space.register(Discrete)
def _batch_space_discrete(space, n=1):
if space.start == 0:
return MultiDiscrete(
np.full((n,), space.n, dtype=space.dtype),
dtype=space.dtype,
seed=deepcopy(space.np_random),
)
else:
return Box(
low=space.start,
high=space.start + space.n - 1,
shape=(n,),
dtype=space.dtype,
seed=deepcopy(space.np_random),
)
@batch_space.register(MultiDiscrete)
def _batch_space_multidiscrete(space, n=1):
repeats = tuple([n] + [1] * space.nvec.ndim)
high = np.tile(space.nvec, repeats) - 1
return Box(
low=np.zeros_like(high),
high=high,
dtype=space.dtype,
seed=deepcopy(space.np_random),
)
@batch_space.register(MultiBinary)
def _batch_space_multibinary(space, n=1):
return Box(
low=0,
high=1,
shape=(n,) + space.shape,
dtype=space.dtype,
seed=deepcopy(space.np_random),
)
@batch_space.register(Tuple)
def _batch_space_tuple(space, n=1):
return Tuple(
tuple(batch_space(subspace, n=n) for subspace in space.spaces),
seed=deepcopy(space.np_random),
)
@batch_space.register(Dict)
def _batch_space_dict(space, n=1):
return Dict(
OrderedDict(
[
(key, batch_space(subspace, n=n))
for (key, subspace) in space.spaces.items()
]
),
seed=deepcopy(space.np_random),
)
@batch_space.register(Space)
def _batch_space_custom(space, n=1):
# Without deepcopy, then the space.np_random is batched_space.spaces[0].np_random
# Which is an issue if you are sampling actions of both the original space and the batched space
batched_space = Tuple(
tuple(deepcopy(space) for _ in range(n)), seed=deepcopy(space.np_random)
)
new_seeds = list(map(int, batched_space.np_random.integers(0, 1e8, n)))
batched_space.seed(new_seeds)
return batched_space
@singledispatch
def iterate(space, items):
"""Iterate over the elements of a (batched) space.
Parameters
----------
space : `gym.spaces.Space` instance
Space to which `items` belong to.
items : samples of `space`
Items to be iterated over.
Returns
-------
iterator : `Iterable` instance
Iterator over the elements in `items`.
Example
-------
>>> from gym.spaces import Box, Dict
>>> space = Dict({
... 'position': Box(low=0, high=1, shape=(2, 3), dtype=np.float32),
... 'velocity': Box(low=0, high=1, shape=(2, 2), dtype=np.float32)})
>>> items = space.sample()
>>> it = iterate(space, items)
>>> next(it)
{'position': array([-0.99644893, -0.08304597, -0.7238421 ], dtype=float32),
'velocity': array([0.35848552, 0.1533453 ], dtype=float32)}
>>> next(it)
{'position': array([-0.67958736, -0.49076623, 0.38661423], dtype=float32),
'velocity': array([0.7975036 , 0.93317133], dtype=float32)}
>>> next(it)
StopIteration
"""
raise ValueError(
"Space of type `{}` is not a valid `gym.Space` " "instance.".format(type(space))
)
@iterate.register(Discrete)
def _iterate_discrete(space, items):
raise TypeError("Unable to iterate over a space of type `Discrete`.")
@iterate.register(Box)
@iterate.register(MultiDiscrete)
@iterate.register(MultiBinary)
def _iterate_base(space, items):
try:
return iter(items)
except TypeError:
raise TypeError(f"Unable to iterate over the following elements: {items}")
@iterate.register(Tuple)
def _iterate_tuple(space, items):
# If this is a tuple of custom subspaces only, then simply iterate over items
if all(
isinstance(subspace, Space)
and (not isinstance(subspace, _BaseGymSpaces + (Tuple, Dict)))
for subspace in space.spaces
):
return iter(items)
return zip(
*[iterate(subspace, items[i]) for i, subspace in enumerate(space.spaces)]
)
@iterate.register(Dict)
def _iterate_dict(space, items):
keys, values = zip(
*[
(key, iterate(subspace, items[key]))
for key, subspace in space.spaces.items()
]
)
for item in zip(*values):
yield OrderedDict([(key, value) for (key, value) in zip(keys, item)])
@iterate.register(Space)
def _iterate_custom(space, items):
raise CustomSpaceError(
f"Unable to iterate over {items}, since {space} "
"is a custom `gym.Space` instance (i.e. not one of "
"`Box`, `Dict`, etc...)."
)