Adds wrappers for jax environments converting to numpy and pytorch (#168)

Co-authored-by: Justin Deutsch <djustin8@vt.edu> Co-authored-by: Gianluca De Cola <42657588+gianlucadecola@users.noreply.github.com>
2025-07-31 13:54:31 +00:00 · 2022-12-03 19:46:01 +00:00
parent e2caec7c06
commit 9157a97c80
7 changed files with 503 additions and 8 deletions
--- a/docs/api/experimental.md
+++ b/docs/api/experimental.md
@@ -175,21 +175,21 @@ Gymnasium already contains a large collection of wrappers, but we believe that t
    * - :class:`wrappers.EnvCompatibility`
      - Moved to `shimmy <https://github.com/Farama-Foundation/Shimmy/blob/main/shimmy/openai_gym_compatibility.py>`_
      - Not Implemented
-    * - :class:`RecordEpisodeStatistics`
+    * - :class:`wrappers.RecordEpisodeStatistics`
      - RecordEpisodeStatistics
      - VectorRecordEpisodeStatistics
-    * - :class:`RenderCollection`
+    * - :class:`wrappers.RenderCollection`
      - RenderCollection
      - VectorRenderCollection
-    * - :class:`HumanRendering`
+    * - :class:`wrappers.HumanRendering`
      - HumanRendering
      - Not Implemented
    * - Not Implemented
-      - JaxToNumpy
+      - :class:`experimental.wrappers.JaxToNumpy`
-      - VectorJaxToNumpy
+      - VectorJaxToNumpy (*)
    * - Not Implemented
-      - JaxToTorch
+      - :class:`experimental.wrappers.JaxToTorch`
-      - VectorJaxToTorch
+      - VectorJaxToTorch (*)
 ```
 ### Vector Only Wrappers
--- a/gymnasium/experimental/wrappers/init.py
+++ b/gymnasium/experimental/wrappers/init.py
@@ -12,6 +12,8 @@ from gymnasium.experimental.wrappers.lambda_action import (
 )
 from gymnasium.experimental.wrappers.lambda_observations import LambdaObservationV0
 from gymnasium.experimental.wrappers.lambda_reward import ClipRewardV0, LambdaRewardV0
 from gymnasium.experimental.wrappers.numpy_to_jax import JaxToNumpyV0
 from gymnasium.experimental.wrappers.torch_to_jax import JaxToTorchV0
 from gymnasium.experimental.wrappers.sticky_action import StickyActionV0
 from gymnasium.experimental.wrappers.time_aware_observation import (
    TimeAwareObservationV0,
@@ -32,4 +34,7 @@ __all__ = [
    # Lambda Reward
    "LambdaRewardV0",
    "ClipRewardV0",
    # Jax conversion wrappers
    "JaxToNumpyV0",
    "JaxToTorchV0",
 ]
--- a/gymnasium/experimental/wrappers/numpy_to_jax.py
+++ b/gymnasium/experimental/wrappers/numpy_to_jax.py
@@ -0,0 +1,134 @@
 """Helper functions and wrapper class for converting between numpy and Jax."""
 from __future__ import annotations
 import functools
 import numbers
 from collections import abc
 from typing import Any, Iterable, Mapping, SupportsFloat
 import jax.numpy as jnp
 import numpy as np
 from gymnasium import Env, Wrapper
 from gymnasium.core import RenderFrame, WrapperActType, WrapperObsType
@functools.singledispatch
 def numpy_to_jax(value: Any) -> Any:
    """Converts a value to a Jax DeviceArray."""
    raise Exception(
        f"No conversion for Numpy to Jax registered for type: {type(value)}"
    )
@numpy_to_jax.register(numbers.Number)
@numpy_to_jax.register(np.ndarray)
 def _number_ndarray_numpy_to_jax(value: np.ndarray | numbers.Number) -> jnp.DeviceArray:
    """Converts a numpy array or  number (int, float, etc.) to a Jax DeviceArray."""
    return jnp.array(value)
@numpy_to_jax.register(abc.Mapping)
 def _mapping_numpy_to_jax(value: Mapping[str, Any]) -> Mapping[str, Any]:
    """Converts a dictionary of numpy arrays to a mapping of Jax DeviceArrays."""
    return type(value)(**{k: numpy_to_jax(v) for k, v in value.items()})
@numpy_to_jax.register(abc.Iterable)
 def _iterable_numpy_to_jax(
    value: Iterable[np.ndarray | Any],
 ) -> Iterable[jnp.DeviceArray | Any]:
    """Converts an Iterable from Numpy Arrays to an iterable of Jax DeviceArrays."""
    return type(value)(numpy_to_jax(v) for v in value)
@functools.singledispatch
 def jax_to_numpy(value: Any) -> Any:
    """Converts a value to a numpy array."""
    raise Exception(
        f"No conversion for Jax to Numpy registered for type: {type(value)}"
    )
@jax_to_numpy.register(jnp.DeviceArray)
 def _devicearray_jax_to_numpy(value: jnp.DeviceArray) -> np.ndarray:
    """Converts a Jax DeviceArray to a numpy array."""
    return np.array(value)
@jax_to_numpy.register(abc.Mapping)
 def _mapping_jax_to_numpy(
    value: Mapping[str, jnp.DeviceArray | Any]
 ) -> Mapping[str, np.ndarray | Any]:
    """Converts a dictionary of Jax DeviceArrays to a mapping of numpy arrays."""
    return type(value)(**{k: jax_to_numpy(v) for k, v in value.items()})
@jax_to_numpy.register(abc.Iterable)
 def _iterable_jax_to_numpy(
    value: Iterable[np.ndarray | Any],
 ) -> Iterable[jnp.DeviceArray | Any]:
    """Converts an Iterable from Numpy arrays to an iterable of Jax DeviceArrays."""
    return type(value)(jax_to_numpy(v) for v in value)
 class JaxToNumpyV0(Wrapper):
    """Wraps a jax environment so that it can be interacted with through numpy arrays.
    Actions must be provided as numpy arrays and observations will be returned as numpy arrays.
    Notes:
        The Jax To Numpy and Numpy to Jax conversion does not guarantee a roundtrip (jax -> numpy -> jax) and vice versa.
        The reason for this is jax does not support non-array values, therefore numpy ``int_32(5) -> DeviceArray([5], dtype=jnp.int23)``
    """
    def __init__(self, env: Env):
        """Wraps an environment such that the input and outputs are numpy arrays.
        Args:
            env: the environment to wrap
        """
        super().__init__(env)
    def step(
        self, action: WrapperActType
    ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict]:
        """Transforms the action to a jax array .
        Args:
            action: the action to perform as a numpy array
        Returns:
            A tuple containing the next observation, reward, termination, truncation, and extra info.
        """
        jax_action = numpy_to_jax(action)
        obs, reward, terminated, truncated, info = self.env.step(jax_action)
        return (
            jax_to_numpy(obs),
            float(reward),
            bool(terminated),
            bool(truncated),
            jax_to_numpy(info),
        )
    def reset(
        self, *, seed: int | None = None, options: dict[str, Any] | None = None
    ) -> tuple[WrapperObsType, dict[str, Any]]:
        """Resets the environment returning numpy-based observation and info.
        Args:
            seed: The seed for resetting the environment
            options: The options for resetting the environment, these are converted to jax arrays.
        Returns:
            Numpy-based observations and info
        """
        if options:
            options = numpy_to_jax(options)
        return jax_to_numpy(self.env.reset(seed=seed, options=options))
    def render(self) -> RenderFrame | list[RenderFrame] | None:
        """Returns the rendered frames as a numpy array."""
        return jax_to_numpy(self.env.render())
--- a/gymnasium/experimental/wrappers/torch_to_jax.py
+++ b/gymnasium/experimental/wrappers/torch_to_jax.py
@@ -0,0 +1,163 @@
 # This wrapper will convert torch inputs for the actions and observations to Jax arrays
 # for an underlying Jax environment then convert the return observations from Jax arrays
 # back to torch tensors.
 #
 # Functionality for converting between torch and jax types originally copied from
 # https://github.com/google/brax/blob/9d6b7ced2a13da0d074b5e9fbd3aad8311e26997/brax/io/torch.py
 # Under the Apache 2.0 license. Copyright is held by the authors
 """Helper functions and wrapper class for converting between PyTorch and Jax."""
 from __future__ import annotations
 import functools
 import numbers
 from collections import abc
 from typing import Any, Iterable, Mapping, SupportsFloat, Union
 import jax.numpy as jnp
 from jax import dlpack as jax_dlpack
 from gymnasium import Env, Wrapper
 from gymnasium.core import RenderFrame, WrapperActType, WrapperObsType
 from gymnasium.error import DependencyNotInstalled
 from gymnasium.experimental.wrappers.numpy_to_jax import jax_to_numpy
 try:
    import torch
    from torch.utils import dlpack as torch_dlpack
 except ImportError:
    raise DependencyNotInstalled("torch is not installed, run `pip install torch`")
 Device = Union[str, torch.device]
@functools.singledispatch
 def torch_to_jax(value: Any) -> Any:
    """Converts a PyTorch Tensor into a Jax DeviceArray."""
    raise Exception(
        f"No conversion for PyTorch to Jax registered for type: {type(value)}"
    )
@torch_to_jax.register(numbers.Number)
 def _number_torch_to_jax(value: numbers.Number) -> Any:
    return jnp.array(value)
@torch_to_jax.register(torch.Tensor)
 def _tensor_torch_to_jax(value: torch.Tensor) -> jnp.DeviceArray:
    """Converts a PyTorch Tensor into a Jax DeviceArray."""
    tensor = torch_dlpack.to_dlpack(value)  # pyright: ignore[reportPrivateImportUsage]
    tensor = jax_dlpack.from_dlpack(tensor)  # pyright: ignore[reportPrivateImportUsage]
    return tensor
@torch_to_jax.register(abc.Mapping)
 def _mapping_torch_to_jax(value: Mapping[str, Any]) -> Mapping[str, Any]:
    """Converts a mapping of PyTorch Tensors into a Dictionary of Jax DeviceArrays."""
    return type(value)(**{k: torch_to_jax(v) for k, v in value.items()})
@torch_to_jax.register(abc.Iterable)
 def _iterable_torch_to_jax(value: Iterable[Any]) -> Iterable[Any]:
    """Converts an Iterable from PyTorch Tensors to an iterable of Jax DeviceArrays."""
    return type(value)(torch_to_jax(v) for v in value)
@functools.singledispatch
 def jax_to_torch(value: Any, device: Device | None = None) -> Any:
    """Converts a Jax DeviceArray into a PyTorch Tensor."""
    raise Exception(
        f"No conversion for Jax to PyTorch registered for type={type(value)} and device: {device}"
    )
@jax_to_torch.register(jnp.DeviceArray)
 def _devicearray_jax_to_torch(
    value: jnp.DeviceArray, device: Device | None = None
 ) -> torch.Tensor:
    """Converts a Jax DeviceArray into a PyTorch Tensor."""
    dlpack = jax_dlpack.to_dlpack(value)  # pyright: ignore[reportPrivateImportUsage]
    tensor = torch_dlpack.from_dlpack(dlpack)
    if device:
        return tensor.to(device=device)
    return tensor
@jax_to_torch.register(abc.Mapping)
 def _jax_mapping_to_torch(
    value: Mapping[str, Any], device: Device | None = None
 ) -> Mapping[str, Any]:
    """Converts a mapping of Jax DeviceArrays into a Dictionary of PyTorch Tensors."""
    return type(value)(**{k: jax_to_torch(v, device) for k, v in value.items()})
@jax_to_torch.register(abc.Iterable)
 def _jax_iterable_to_torch(
    value: Iterable[Any], device: Device | None = None
 ) -> Iterable[Any]:
    """Converts an Iterable from Jax DeviceArrays to an iterable of PyTorch Tensors."""
    return type(value)(jax_to_torch(v, device) for v in value)
 class JaxToTorchV0(Wrapper):
    """Wraps a jax-based environment so that it can be interacted with through PyTorch Tensors.
    Actions must be provided as PyTorch Tensors and observations will be returned as PyTorch Tensors.
    For ``rendered`` this is returned as a NumPy array not a pytorch Tensor.
    """
    def __init__(self, env: Env, device: Device | None = None):
        """Wrapper class to change inputs and outputs of environment to PyTorch tensors.
        Args:
            env: The Jax-based environment to wrap
            device: The device the torch Tensors should be moved to
        """
        super().__init__(env)
        self.device: Device | None = device
    def step(
        self, action: WrapperActType
    ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict]:
        """Performs the given action within the environment.
        Args:
            action: The action to perform as a PyTorch Tensor
        Returns:
            The next observation, reward, termination, truncation, and extra info
        """
        jax_action = torch_to_jax(action)
        obs, reward, terminated, truncated, info = self.env.step(jax_action)
        return (
            jax_to_torch(obs, self.device),
            float(reward),
            bool(terminated),
            bool(truncated),
            jax_to_torch(info, self.device),
        )
    def reset(
        self, *, seed: int | None = None, options: dict[str, Any] | None = None
    ) -> tuple[WrapperObsType, dict[str, Any]]:
        """Resets the environment returning PyTorch-based observation and info.
        Args:
            seed: The seed for resetting the environment
            options: The options for resetting the environment, these are converted to jax arrays.
        Returns:
            PyTorch-based observations and info
        """
        if options:
            options = torch_to_jax(options)
        return jax_to_torch(self.env.reset(seed=seed, options=options), self.device)
    def render(self) -> RenderFrame | list[RenderFrame] | None:
        """Returns the rendered frames as a NumPy array."""
        return jax_to_numpy(self.env.render())
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,14 @@ extras: Dict[str, List[str]] = {
    "mujoco": ["mujoco>=2.3.0", "imageio>=2.14.1"],
    "toy_text": ["pygame==2.1.0"],
    "jax": ["jax==0.3.20", "jaxlib==0.3.20"],
-    "other": ["lz4>=3.1.0", "opencv-python>=3.0", "matplotlib>=3.0", "moviepy>=1.0.0"],
+    "other": [
        "lz4>=3.1.0",
        "opencv-python>=3.0",
        "matplotlib>=3.0",
        "moviepy>=1.0.0",
        "tensorflow>=2.1.0",
        "torch>=1.0.0",
    ],
 }
 # All dependency groups - accept rom license as requires user to run
--- a/tests/experimental/wrappers/test_numpy_to_jax.py
+++ b/tests/experimental/wrappers/test_numpy_to_jax.py
@@ -0,0 +1,83 @@
 import jax.numpy as jnp
 import numpy as np
 import pytest
 from gymnasium.experimental.wrappers import JaxToNumpyV0
 from gymnasium.experimental.wrappers.numpy_to_jax import jax_to_numpy, numpy_to_jax
 from gymnasium.utils.env_checker import data_equivalence
 from tests.testing_env import GenericTestEnv
@pytest.mark.parametrize(
    "value, expected_value",
    [
        (1.0, np.array(1.0)),
        (2, np.array(2)),
        ((3.0, 4), (np.array(3.0), np.array(4))),
        ([3.0, 4], [np.array(3.0), np.array(4)]),
        (
            {
                "a": 6.0,
                "b": 7,
            },
            {"a": np.array(6.0), "b": np.array(7)},
        ),
        (np.array(1.0), np.array(1.0)),
        (np.array([1, 2]), np.array([1, 2])),
        (np.array([[1.0], [2.0]]), np.array([[1.0], [2.0]])),
        (
            {"a": (1, np.array(2.0), np.array([3, 4])), "b": {"c": 5}},
            {
                "a": (np.array(1), np.array(2.0), np.array([3, 4])),
                "b": {"c": np.array(5)},
            },
        ),
    ],
 )
 def test_roundtripping(value, expected_value):
    """We test numpy -> jax -> numpy as this is direction in the NumpyToJax wrapper."""
    assert data_equivalence(jax_to_numpy(numpy_to_jax(value)), expected_value)
 def jax_reset_func(self, seed=None, options=None):
    return jnp.array([1.0, 2.0, 3.0]), {"data": jnp.array([1, 2, 3])}
 def jax_step_func(self, action):
    assert isinstance(action, jnp.DeviceArray), type(action)
    return (
        jnp.array([1, 2, 3]),
        jnp.array(5.0),
        jnp.array(True),
        jnp.array(False),
        {"data": jnp.array([1.0, 2.0])},
    )
 def test_jax_to_numpy():
    jax_env = GenericTestEnv(reset_fn=jax_reset_func, step_fn=jax_step_func)
    # Check that the reset and step for jax environment are as expected
    obs, info = jax_env.reset()
    assert isinstance(obs, jnp.DeviceArray)
    assert isinstance(info, dict) and isinstance(info["data"], jnp.DeviceArray)
    obs, reward, terminated, truncated, info = jax_env.step(jnp.array([1, 2]))
    assert isinstance(obs, jnp.DeviceArray)
    assert isinstance(reward, jnp.DeviceArray)
    assert isinstance(terminated, jnp.DeviceArray) and isinstance(
        truncated, jnp.DeviceArray
    )
    assert isinstance(info, dict) and isinstance(info["data"], jnp.DeviceArray)
    # Check that the wrapped version is correct.
    numpy_env = JaxToNumpyV0(jax_env)
    obs, info = numpy_env.reset()
    assert isinstance(obs, np.ndarray)
    assert isinstance(info, dict) and isinstance(info["data"], np.ndarray)
    obs, reward, terminated, truncated, info = numpy_env.step(np.array([1, 2]))
    assert isinstance(obs, np.ndarray)
    assert isinstance(reward, float)
    assert isinstance(terminated, bool) and isinstance(truncated, bool)
    assert isinstance(info, dict) and isinstance(info["data"], np.ndarray)
--- a/tests/experimental/wrappers/test_torch_to_jax.py
+++ b/tests/experimental/wrappers/test_torch_to_jax.py
@@ -0,0 +1,103 @@
 import jax.numpy as jnp
 import numpy as np
 import pytest
 import torch
 from gymnasium.experimental.wrappers import JaxToTorchV0
 from gymnasium.experimental.wrappers.torch_to_jax import jax_to_torch, torch_to_jax
 from tests.testing_env import GenericTestEnv
 def torch_data_equivalence(data_1, data_2) -> bool:
    if type(data_1) == type(data_2):
        if isinstance(data_1, dict):
            return data_1.keys() == data_2.keys() and all(
                torch_data_equivalence(data_1[k], data_2[k]) for k in data_1.keys()
            )
        elif isinstance(data_1, (tuple, list)):
            return len(data_1) == len(data_2) and all(
                torch_data_equivalence(o_1, o_2) for o_1, o_2 in zip(data_1, data_2)
            )
        elif isinstance(data_1, torch.Tensor):
            return data_1.shape == data_2.shape and np.allclose(
                data_1, data_2, atol=0.00001
            )
        else:
            return data_1 == data_2
    else:
        return False
@pytest.mark.parametrize(
    "value, expected_value",
    [
        (1.0, torch.tensor(1.0)),
        (2, torch.tensor(2)),
        ((3.0, 4), (torch.tensor(3.0), torch.tensor(4))),
        ([3.0, 4], [torch.tensor(3.0), torch.tensor(4)]),
        (
            {
                "a": 6.0,
                "b": 7,
            },
            {"a": torch.tensor(6.0), "b": torch.tensor(7)},
        ),
        (torch.tensor(1.0), torch.tensor(1.0)),
        (torch.tensor([1, 2]), torch.tensor([1, 2])),
        (torch.tensor([[1.0], [2.0]]), torch.tensor([[1.0], [2.0]])),
        (
            {"a": (1, torch.tensor(2.0), torch.tensor([3, 4])), "b": {"c": 5}},
            {
                "a": (torch.tensor(1), torch.tensor(2.0), torch.tensor([3, 4])),
                "b": {"c": torch.tensor(5)},
            },
        ),
    ],
 )
 def test_roundtripping(value, expected_value):
    """We test numpy -> jax -> numpy as this is direction in the NumpyToJax wrapper."""
    assert torch_data_equivalence(jax_to_torch(torch_to_jax(value)), expected_value)
 def jax_reset_func(self, seed=None, options=None):
    return jnp.array([1.0, 2.0, 3.0]), {"data": jnp.array([1, 2, 3])}
 def jax_step_func(self, action):
    assert isinstance(action, jnp.DeviceArray), type(action)
    return (
        jnp.array([1, 2, 3]),
        jnp.array(5.0),
        jnp.array(True),
        jnp.array(False),
        {"data": jnp.array([1.0, 2.0])},
    )
 def test_jax_to_torch():
    env = GenericTestEnv(reset_fn=jax_reset_func, step_fn=jax_step_func)
    # Check that the reset and step for jax environment are as expected
    obs, info = env.reset()
    assert isinstance(obs, jnp.DeviceArray)
    assert isinstance(info, dict) and isinstance(info["data"], jnp.DeviceArray)
    obs, reward, terminated, truncated, info = env.step(jnp.array([1, 2]))
    assert isinstance(obs, jnp.DeviceArray)
    assert isinstance(reward, jnp.DeviceArray)
    assert isinstance(terminated, jnp.DeviceArray) and isinstance(
        truncated, jnp.DeviceArray
    )
    assert isinstance(info, dict) and isinstance(info["data"], jnp.DeviceArray)
    # Check that the wrapped version is correct.
    wrapped_env = JaxToTorchV0(env)
    obs, info = wrapped_env.reset()
    assert isinstance(obs, torch.Tensor)
    assert isinstance(info, dict) and isinstance(info["data"], torch.Tensor)
    obs, reward, terminated, truncated, info = wrapped_env.step(torch.tensor([1, 2]))
    assert isinstance(obs, torch.Tensor)
    assert isinstance(reward, float)
    assert isinstance(terminated, bool) and isinstance(truncated, bool)
    assert isinstance(info, dict) and isinstance(info["data"], torch.Tensor)