mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-23 15:04:20 +00:00
* Moved pygame imports into render * Formatting * Make pygame optional for box2d, try to make formatting work * fix tests, fix pre-commit. * Update ci linter config. * fix type hints for latest pyright version and backward compatibility with numpy <= 1.21.5 * pre-commit. Co-authored-by: Ariel Kwiatkowski <ariel.j.kwiatkowski@gmail.com> Co-authored-by: Gianluca De Cola <gianluca.decola@ags-it.com>
This commit is contained in:
6
.github/workflows/lint_python.yml
vendored
6
.github/workflows/lint_python.yml
vendored
@@ -8,13 +8,15 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-platform: ["Linux"]
|
python-platform: ["Linux"]
|
||||||
python-version: ["3.7"]
|
python-version: ["3.7", "3.8", "3.9", "3.10"]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
env:
|
env:
|
||||||
PYRIGHT_VERSION: 1.1.204
|
PYRIGHT_VERSION: 1.1.235
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- uses: actions/setup-python@v2
|
- uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
- run: pip install -e .[nomujoco]
|
- run: pip install -e .[nomujoco]
|
||||||
- uses: jakebailey/pyright-action@v1
|
- uses: jakebailey/pyright-action@v1
|
||||||
with:
|
with:
|
||||||
|
@@ -78,7 +78,7 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
reward (float) : amount of reward returned after previous action
|
reward (float) : amount of reward returned after previous action
|
||||||
done (bool): whether the episode has ended, in which case further :meth:`step` calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state. ``info`` may contain additional information regarding the reason for a ``done`` signal.
|
done (bool): whether the episode has ended, in which case further :meth:`step` calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state. ``info`` may contain additional information regarding the reason for a ``done`` signal.
|
||||||
info (dict): contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain:
|
info (dict): contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain:
|
||||||
|
|
||||||
- metrics that describe the agent's performance or
|
- metrics that describe the agent's performance or
|
||||||
- state variables that are hidden from observations or
|
- state variables that are hidden from observations or
|
||||||
- information that distinguishes truncation and termination or
|
- information that distinguishes truncation and termination or
|
||||||
@@ -106,7 +106,8 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
integer seed right after initialization and then never again.
|
integer seed right after initialization and then never again.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
seed (int or None): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and ``seed=None`` is pased, the PRNG will *not* be reset. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. Please refer to the minimal example above to see this paradigm in action.
|
seed (int or None): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset.
|
||||||
|
If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. Please refer to the minimal example above to see this paradigm in action.
|
||||||
return_info (bool): If true, return additional information along with initial observation. This info should be analogous to the info returned in :meth:`step`
|
return_info (bool): If true, return additional information along with initial observation. This info should be analogous to the info returned in :meth:`step`
|
||||||
options (dict or None): Additional information to specify how the environment is reset (optional, depending on the specific environment)
|
options (dict or None): Additional information to specify how the environment is reset (optional, depending on the specific environment)
|
||||||
|
|
||||||
@@ -135,7 +136,7 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
- ansi: Return a string (str) or StringIO.StringIO containing a
|
- ansi: Return a string (str) or StringIO.StringIO containing a
|
||||||
terminal-style text representation. The text can include newlines
|
terminal-style text representation. The text can include newlines
|
||||||
and ANSI escape sequences (e.g. for colors).
|
and ANSI escape sequences (e.g. for colors).
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
Make sure that your class's metadata 'render_modes' key includes
|
Make sure that your class's metadata 'render_modes' key includes
|
||||||
the list of supported modes. It's recommended to call super()
|
the list of supported modes. It's recommended to call super()
|
||||||
|
@@ -125,7 +125,7 @@ class Continuous_MountainCarEnv(gym.Env):
|
|||||||
low=self.low_state, high=self.high_state, dtype=np.float32
|
low=self.low_state, high=self.high_state, dtype=np.float32
|
||||||
)
|
)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action: np.ndarray):
|
||||||
|
|
||||||
position = self.state[0]
|
position = self.state[0]
|
||||||
velocity = self.state[1]
|
velocity = self.state[1]
|
||||||
|
@@ -112,7 +112,7 @@ class MountainCarEnv(gym.Env):
|
|||||||
self.action_space = spaces.Discrete(3)
|
self.action_space = spaces.Discrete(3)
|
||||||
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
|
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action: int):
|
||||||
assert self.action_space.contains(
|
assert self.action_space.contains(
|
||||||
action
|
action
|
||||||
), f"{action!r} ({type(action)}) invalid"
|
), f"{action!r} ({type(action)}) invalid"
|
||||||
|
@@ -29,12 +29,12 @@ class Box(Space[np.ndarray]):
|
|||||||
There are two common use cases:
|
There are two common use cases:
|
||||||
|
|
||||||
* Identical bound for each dimension::
|
* Identical bound for each dimension::
|
||||||
|
|
||||||
>>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
|
>>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
|
||||||
Box(3, 4)
|
Box(3, 4)
|
||||||
|
|
||||||
* Independent bound for each dimension::
|
* Independent bound for each dimension::
|
||||||
|
|
||||||
>>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
|
>>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
|
||||||
Box(2,)
|
Box(2,)
|
||||||
|
|
||||||
@@ -66,9 +66,9 @@ class Box(Space[np.ndarray]):
|
|||||||
|
|
||||||
# Capture the boundedness information before replacing np.inf with get_inf
|
# Capture the boundedness information before replacing np.inf with get_inf
|
||||||
_low = np.full(shape, low, dtype=float) if np.isscalar(low) else low
|
_low = np.full(shape, low, dtype=float) if np.isscalar(low) else low
|
||||||
self.bounded_below = -np.inf < _low
|
self.bounded_below = -np.inf < _low # type: ignore
|
||||||
_high = np.full(shape, high, dtype=float) if np.isscalar(high) else high
|
_high = np.full(shape, high, dtype=float) if np.isscalar(high) else high
|
||||||
self.bounded_above = np.inf > _high
|
self.bounded_above = np.inf > _high # type: ignore
|
||||||
|
|
||||||
low = _broadcast(low, dtype, shape, inf_sign="-") # type: ignore
|
low = _broadcast(low, dtype, shape, inf_sign="-") # type: ignore
|
||||||
high = _broadcast(high, dtype, shape, inf_sign="+") # type: ignore
|
high = _broadcast(high, dtype, shape, inf_sign="+") # type: ignore
|
||||||
|
@@ -18,7 +18,7 @@ class Dict(Space[TypingDict[str, Space]], Mapping):
|
|||||||
self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
||||||
|
|
||||||
Example usage [nested]::
|
Example usage [nested]::
|
||||||
|
|
||||||
self.nested_observation_space = spaces.Dict({
|
self.nested_observation_space = spaces.Dict({
|
||||||
'sensors': spaces.Dict({
|
'sensors': spaces.Dict({
|
||||||
'position': spaces.Box(low=-100, high=100, shape=(3,)),
|
'position': spaces.Box(low=-100, high=100, shape=(3,)),
|
||||||
|
@@ -35,7 +35,8 @@ class MultiBinary(Space[np.ndarray]):
|
|||||||
self.n = n = int(n)
|
self.n = n = int(n)
|
||||||
input_n = (n,)
|
input_n = (n,)
|
||||||
|
|
||||||
assert (np.asarray(input_n) > 0).all(), "n (counts) have to be positive"
|
# n (counts) have to be positive
|
||||||
|
assert (np.asarray(input_n) > 0).all() # type: ignore
|
||||||
|
|
||||||
super().__init__(input_n, np.int8, seed)
|
super().__init__(input_n, np.int8, seed)
|
||||||
|
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -13,9 +14,9 @@ from .space import Space
|
|||||||
class MultiDiscrete(Space[np.ndarray]):
|
class MultiDiscrete(Space[np.ndarray]):
|
||||||
"""
|
"""
|
||||||
The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each. It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space. It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space.
|
The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each. It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space. It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
|
|
||||||
Some environment wrappers assume a value of 0 always represents the NOOP action.
|
Some environment wrappers assume a value of 0 always represents the NOOP action.
|
||||||
|
|
||||||
e.g. Nintendo Game Controller - Can be conceptualized as 3 discrete action spaces:
|
e.g. Nintendo Game Controller - Can be conceptualized as 3 discrete action spaces:
|
||||||
@@ -52,7 +53,7 @@ class MultiDiscrete(Space[np.ndarray]):
|
|||||||
# is within correct bounds for space dtype (even though x does not have to be unsigned)
|
# is within correct bounds for space dtype (even though x does not have to be unsigned)
|
||||||
return bool(x.shape == self.shape and (0 <= x).all() and (x < self.nvec).all())
|
return bool(x.shape == self.shape and (0 <= x).all() and (x < self.nvec).all())
|
||||||
|
|
||||||
def to_jsonable(self, sample_n):
|
def to_jsonable(self, sample_n: Iterable[np.ndarray]):
|
||||||
return [sample.tolist() for sample in sample_n]
|
return [sample.tolist() for sample in sample_n]
|
||||||
|
|
||||||
def from_jsonable(self, sample_n):
|
def from_jsonable(self, sample_n):
|
||||||
@@ -66,7 +67,7 @@ class MultiDiscrete(Space[np.ndarray]):
|
|||||||
if nvec.ndim == 0:
|
if nvec.ndim == 0:
|
||||||
subspace = Discrete(nvec)
|
subspace = Discrete(nvec)
|
||||||
else:
|
else:
|
||||||
subspace = MultiDiscrete(nvec, self.dtype)
|
subspace = MultiDiscrete(nvec, self.dtype) # type: ignore
|
||||||
subspace.np_random.bit_generator.state = self.np_random.bit_generator.state
|
subspace.np_random.bit_generator.state = self.np_random.bit_generator.state
|
||||||
return subspace
|
return subspace
|
||||||
|
|
||||||
|
@@ -72,7 +72,7 @@ class Tuple(Space[tuple], Sequence):
|
|||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return "Tuple(" + ", ".join([str(s) for s in self.spaces]) + ")"
|
return "Tuple(" + ", ".join([str(s) for s in self.spaces]) + ")"
|
||||||
|
|
||||||
def to_jsonable(self, sample_n) -> list:
|
def to_jsonable(self, sample_n: Sequence) -> list:
|
||||||
# serialize as list-repr of tuple of vectors
|
# serialize as list-repr of tuple of vectors
|
||||||
return [
|
return [
|
||||||
space.to_jsonable([sample[i] for sample in sample_n])
|
space.to_jsonable([sample[i] for sample in sample_n])
|
||||||
|
@@ -19,7 +19,7 @@ def flatdim(space: Space) -> int:
|
|||||||
the space is not defined in ``gym.spaces``.
|
the space is not defined in ``gym.spaces``.
|
||||||
|
|
||||||
Example usage::
|
Example usage::
|
||||||
|
|
||||||
>>> s = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
>>> s = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
||||||
>>> spaces.flatdim(s)
|
>>> spaces.flatdim(s)
|
||||||
5
|
5
|
||||||
@@ -134,7 +134,7 @@ def _unflatten_multidiscrete(space: MultiDiscrete, x: np.ndarray) -> np.ndarray:
|
|||||||
offsets[1:] = np.cumsum(space.nvec.flatten())
|
offsets[1:] = np.cumsum(space.nvec.flatten())
|
||||||
|
|
||||||
(indices,) = np.nonzero(x)
|
(indices,) = np.nonzero(x)
|
||||||
return np.asarray(indices - offsets[:-1], dtype=space.dtype).reshape(space.shape)
|
return np.asarray(indices - offsets[:-1], dtype=space.dtype).reshape(space.shape) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
@unflatten.register(Tuple)
|
@unflatten.register(Tuple)
|
||||||
|
Reference in New Issue
Block a user