Gymnasium/gymnasium/wrappers/numpy_to_torch.py

"""Helper functions and wrapper class for converting between PyTorch and NumPy."""

from __future__ import annotations

import functools
import numbers
from collections import abc
from typing import Any, Iterable, Mapping, SupportsFloat, Union

import numpy as np

import gymnasium as gym
from gymnasium.core import WrapperActType, WrapperObsType
from gymnasium.error import DependencyNotInstalled


try:
    import torch

    Device = Union[str, torch.device]
except ImportError:
    raise DependencyNotInstalled(
        'Torch is not installed therefore cannot call `torch_to_numpy`, run `pip install "gymnasium[torch]"`'
    )


__all__ = ["NumpyToTorch", "torch_to_numpy", "numpy_to_torch"]


@functools.singledispatch
def torch_to_numpy(value: Any) -> Any:
    """Converts a PyTorch Tensor into a NumPy Array."""
    raise Exception(
        f"No known conversion for Torch type ({type(value)}) to NumPy registered. Report as issue on github."
    )


@torch_to_numpy.register(numbers.Number)
@torch_to_numpy.register(torch.Tensor)
def _number_torch_to_numpy(value: numbers.Number | torch.Tensor) -> Any:
    """Convert a python number (int, float, complex) and torch.Tensor to a numpy array."""
    return np.array(value)


@torch_to_numpy.register(abc.Mapping)
def _mapping_torch_to_numpy(value: Mapping[str, Any]) -> Mapping[str, Any]:
    """Converts a mapping of PyTorch Tensors into a Dictionary of Jax Array."""
    return type(value)(**{k: torch_to_numpy(v) for k, v in value.items()})


@torch_to_numpy.register(abc.Iterable)
def _iterable_torch_to_numpy(value: Iterable[Any]) -> Iterable[Any]:
    """Converts an Iterable from PyTorch Tensors to an iterable of Jax Array."""
    if hasattr(value, "_make"):
        # namedtuple - underline used to prevent potential name conflicts
        # noinspection PyProtectedMember
        return type(value)._make(torch_to_numpy(v) for v in value)
    else:
        return type(value)(torch_to_numpy(v) for v in value)


@functools.singledispatch
def numpy_to_torch(value: Any, device: Device | None = None) -> Any:
    """Converts a Jax Array into a PyTorch Tensor."""
    raise Exception(
        f"No known conversion for NumPy type ({type(value)}) to PyTorch registered. Report as issue on github."
    )


@numpy_to_torch.register(numbers.Number)
@numpy_to_torch.register(np.ndarray)
def _numpy_to_torch(value: np.ndarray, device: Device | None = None) -> torch.Tensor:
    """Converts a Jax Array into a PyTorch Tensor."""
    assert torch is not None
    tensor = torch.tensor(value)
    if device:
        return tensor.to(device=device)
    return tensor


@numpy_to_torch.register(abc.Mapping)
def _numpy_mapping_to_torch(
    value: Mapping[str, Any], device: Device | None = None
) -> Mapping[str, Any]:
    """Converts a mapping of Jax Array into a Dictionary of PyTorch Tensors."""
    return type(value)(**{k: numpy_to_torch(v, device) for k, v in value.items()})


@numpy_to_torch.register(abc.Iterable)
def _numpy_iterable_to_torch(
    value: Iterable[Any], device: Device | None = None
) -> Iterable[Any]:
    """Converts an Iterable from Jax Array to an iterable of PyTorch Tensors."""
    if hasattr(value, "_make"):
        # namedtuple - underline used to prevent potential name conflicts
        # noinspection PyProtectedMember
        return type(value)._make(numpy_to_torch(v) for v in value)
    else:
        return type(value)(numpy_to_torch(v) for v in value)


class NumpyToTorch(gym.Wrapper, gym.utils.RecordConstructorArgs):
    """Wraps a NumPy-based environment such that it can be interacted with PyTorch Tensors.

    Actions must be provided as PyTorch Tensors and observations will be returned as PyTorch Tensors.
    A vector version of the wrapper exists, :class:`gymnasium.wrappers.vector.NumpyToTorch`.

    Note:
        For ``rendered`` this is returned as a NumPy array not a pytorch Tensor.

    Example:
        >>> import torch
        >>> import gymnasium as gym
        >>> env = gym.make("CartPole-v1")
        >>> env = NumpyToTorch(env)
        >>> obs, _ = env.reset(seed=123)
        >>> type(obs)
        <class 'torch.Tensor'>
        >>> action = torch.tensor(env.action_space.sample())
        >>> obs, reward, terminated, truncated, info = env.step(action)
        >>> type(obs)
        <class 'torch.Tensor'>
        >>> type(reward)
        <class 'float'>
        >>> type(terminated)
        <class 'bool'>
        >>> type(truncated)
        <class 'bool'>

    Change logs:
     * v1.0.0 - Initially added
    """

    def __init__(self, env: gym.Env, device: Device | None = None):
        """Wrapper class to change inputs and outputs of environment to PyTorch tensors.

        Args:
            env: The Jax-based environment to wrap
            device: The device the torch Tensors should be moved to
        """
        gym.utils.RecordConstructorArgs.__init__(self, device=device)
        gym.Wrapper.__init__(self, env)

        self.device: Device | None = device

    def step(
        self, action: WrapperActType
    ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict]:
        """Using a PyTorch based action that is converted to NumPy to be used by the environment.

        Args:
            action: A PyTorch-based action

        Returns:
            The PyTorch-based Tensor next observation, reward, termination, truncation, and extra info
        """
        jax_action = torch_to_numpy(action)
        obs, reward, terminated, truncated, info = self.env.step(jax_action)

        return (
            numpy_to_torch(obs, self.device),
            float(reward),
            bool(terminated),
            bool(truncated),
            numpy_to_torch(info, self.device),
        )

    def reset(
        self, *, seed: int | None = None, options: dict[str, Any] | None = None
    ) -> tuple[WrapperObsType, dict[str, Any]]:
        """Resets the environment returning PyTorch-based observation and info.

        Args:
            seed: The seed for resetting the environment
            options: The options for resetting the environment, these are converted to jax arrays.

        Returns:
            PyTorch-based observations and info
        """
        if options:
            options = torch_to_numpy(options)

        return numpy_to_torch(self.env.reset(seed=seed, options=options), self.device)
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00			`"""Helper functions and wrapper class for converting between PyTorch and NumPy."""`
Pre commit autoupdate (#1082) 2024-06-10 17:07:47 +01:00
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00			`from __future__ import annotations`

			`import functools`
			`import numbers`
			`from collections import abc`
			`from typing import Any, Iterable, Mapping, SupportsFloat, Union`

			`import numpy as np`

Update `EnvSpec` and `make` to support reproducing the "whole" environment spec including wrappers (#292) Co-authored-by: will <will2346@live.co.uk> Co-authored-by: Will Dudley <14932240+WillDudley@users.noreply.github.com> Co-authored-by: Mark Towers <marktowers@Marks-MacBook-Pro.local> 2023-02-24 11:34:20 +00:00			`import gymnasium as gym`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00			`from gymnasium.core import WrapperActType, WrapperObsType`
			`from gymnasium.error import DependencyNotInstalled`


			`try:`
			`import torch`

			`Device = Union[str, torch.device]`
			`except ImportError:`
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`raise DependencyNotInstalled(`
Update requirement list and add quotes around pip install for macos users (#1001) 2024-04-06 15:44:09 +01:00			'Torch is not installed therefore cannot call `torch_to_numpy`, run `pip install "gymnasium[torch]"`'
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`)`


Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`__all__ = ["NumpyToTorch", "torch_to_numpy", "numpy_to_torch"]`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00

			`@functools.singledispatch`
			`def torch_to_numpy(value: Any) -> Any:`
			`"""Converts a PyTorch Tensor into a NumPy Array."""`
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`raise Exception(`
			`f"No known conversion for Torch type ({type(value)}) to NumPy registered. Report as issue on github."`
			`)`

Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`@torch_to_numpy.register(numbers.Number)`
			`@torch_to_numpy.register(torch.Tensor)`
			`def _number_torch_to_numpy(value: numbers.Number \| torch.Tensor) -> Any:`
			`"""Convert a python number (int, float, complex) and torch.Tensor to a numpy array."""`
			`return np.array(value)`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00

Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`@torch_to_numpy.register(abc.Mapping)`
			`def _mapping_torch_to_numpy(value: Mapping[str, Any]) -> Mapping[str, Any]:`
Add changes to support Jax 0.4 (#373) 2023-07-03 23:53:57 +02:00			`"""Converts a mapping of PyTorch Tensors into a Dictionary of Jax Array."""`
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`return type(value)(**{k: torch_to_numpy(v) for k, v in value.items()})`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00

Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`@torch_to_numpy.register(abc.Iterable)`
			`def _iterable_torch_to_numpy(value: Iterable[Any]) -> Iterable[Any]:`
Add changes to support Jax 0.4 (#373) 2023-07-03 23:53:57 +02:00			`"""Converts an Iterable from PyTorch Tensors to an iterable of Jax Array."""`
Add support for NamedTuple in jax->torch and numpy->torch (#811) 2023-12-04 12:14:19 +00:00			`if hasattr(value, "_make"):`
			`# namedtuple - underline used to prevent potential name conflicts`
			`# noinspection PyProtectedMember`
			`return type(value)._make(torch_to_numpy(v) for v in value)`
			`else:`
			`return type(value)(torch_to_numpy(v) for v in value)`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00

			`@functools.singledispatch`
			`def numpy_to_torch(value: Any, device: Device \| None = None) -> Any:`
Add changes to support Jax 0.4 (#373) 2023-07-03 23:53:57 +02:00			`"""Converts a Jax Array into a PyTorch Tensor."""`
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`raise Exception(`
			`f"No known conversion for NumPy type ({type(value)}) to PyTorch registered. Report as issue on github."`
			`)`


Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`@numpy_to_torch.register(numbers.Number)`
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`@numpy_to_torch.register(np.ndarray)`
			`def _numpy_to_torch(value: np.ndarray, device: Device \| None = None) -> torch.Tensor:`
Add changes to support Jax 0.4 (#373) 2023-07-03 23:53:57 +02:00			`"""Converts a Jax Array into a PyTorch Tensor."""`
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`assert torch is not None`
			`tensor = torch.tensor(value)`
			`if device:`
			`return tensor.to(device=device)`
			`return tensor`


			`@numpy_to_torch.register(abc.Mapping)`
			`def _numpy_mapping_to_torch(`
			`value: Mapping[str, Any], device: Device \| None = None`
			`) -> Mapping[str, Any]:`
Add changes to support Jax 0.4 (#373) 2023-07-03 23:53:57 +02:00			`"""Converts a mapping of Jax Array into a Dictionary of PyTorch Tensors."""`
Add `__get_attr__` for experimental wrappers for generic solution to optimise extra module imports (#392) 2023-03-17 21:00:48 +00:00			`return type(value)(**{k: numpy_to_torch(v, device) for k, v in value.items()})`


			`@numpy_to_torch.register(abc.Iterable)`
			`def _numpy_iterable_to_torch(`
			`value: Iterable[Any], device: Device \| None = None`
			`) -> Iterable[Any]:`
Add changes to support Jax 0.4 (#373) 2023-07-03 23:53:57 +02:00			`"""Converts an Iterable from Jax Array to an iterable of PyTorch Tensors."""`
Add support for NamedTuple in jax->torch and numpy->torch (#811) 2023-12-04 12:14:19 +00:00			`if hasattr(value, "_make"):`
			`# namedtuple - underline used to prevent potential name conflicts`
			`# noinspection PyProtectedMember`
			`return type(value)._make(numpy_to_torch(v) for v in value)`
			`else:`
			`return type(value)(numpy_to_torch(v) for v in value)`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00

Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`class NumpyToTorch(gym.Wrapper, gym.utils.RecordConstructorArgs):`
			`"""Wraps a NumPy-based environment such that it can be interacted with PyTorch Tensors.`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00
			`Actions must be provided as PyTorch Tensors and observations will be returned as PyTorch Tensors.`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			A vector version of the wrapper exists, :class:`gymnasium.wrappers.vector.NumpyToTorch`.
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00
			`Note:`
			For ``rendered`` this is returned as a NumPy array not a pytorch Tensor.
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00
			`Example:`
			`>>> import torch`
			`>>> import gymnasium as gym`
			`>>> env = gym.make("CartPole-v1")`
			`>>> env = NumpyToTorch(env)`
			`>>> obs, _ = env.reset(seed=123)`
			`>>> type(obs)`
			`<class 'torch.Tensor'>`
			`>>> action = torch.tensor(env.action_space.sample())`
			`>>> obs, reward, terminated, truncated, info = env.step(action)`
			`>>> type(obs)`
			`<class 'torch.Tensor'>`
			`>>> type(reward)`
			`<class 'float'>`
			`>>> type(terminated)`
			`<class 'bool'>`
			`>>> type(truncated)`
			`<class 'bool'>`

			`Change logs:`
			`* v1.0.0 - Initially added`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00			`"""`

Update `EnvSpec` and `make` to support reproducing the "whole" environment spec including wrappers (#292) Co-authored-by: will <will2346@live.co.uk> Co-authored-by: Will Dudley <14932240+WillDudley@users.noreply.github.com> Co-authored-by: Mark Towers <marktowers@Marks-MacBook-Pro.local> 2023-02-24 11:34:20 +00:00			`def __init__(self, env: gym.Env, device: Device \| None = None):`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00			`"""Wrapper class to change inputs and outputs of environment to PyTorch tensors.`

			`Args:`
			`env: The Jax-based environment to wrap`
			`device: The device the torch Tensors should be moved to`
			`"""`
Update `EnvSpec` and `make` to support reproducing the "whole" environment spec including wrappers (#292) Co-authored-by: will <will2346@live.co.uk> Co-authored-by: Will Dudley <14932240+WillDudley@users.noreply.github.com> Co-authored-by: Mark Towers <marktowers@Marks-MacBook-Pro.local> 2023-02-24 11:34:20 +00:00			`gym.utils.RecordConstructorArgs.__init__(self, device=device)`
			`gym.Wrapper.__init__(self, env)`

Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00			`self.device: Device \| None = device`

			`def step(`
			`self, action: WrapperActType`
			`) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict]:`
Experimental wrapper changes (#517) 2023-05-23 15:46:04 +01:00			`"""Using a PyTorch based action that is converted to NumPy to be used by the environment.`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00
			`Args:`
Experimental wrapper changes (#517) 2023-05-23 15:46:04 +01:00			`action: A PyTorch-based action`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00
			`Returns:`
Experimental wrapper changes (#517) 2023-05-23 15:46:04 +01:00			`The PyTorch-based Tensor next observation, reward, termination, truncation, and extra info`
Add wrappers to experimental (#201) 2022-12-10 22:04:14 +00:00			`"""`
			`jax_action = torch_to_numpy(action)`
			`obs, reward, terminated, truncated, info = self.env.step(jax_action)`

			`return (`
			`numpy_to_torch(obs, self.device),`
			`float(reward),`
			`bool(terminated),`
			`bool(truncated),`
			`numpy_to_torch(info, self.device),`
			`)`

			`def reset(`
			`self, *, seed: int \| None = None, options: dict[str, Any] \| None = None`
			`) -> tuple[WrapperObsType, dict[str, Any]]:`
			`"""Resets the environment returning PyTorch-based observation and info.`

			`Args:`
			`seed: The seed for resetting the environment`
			`options: The options for resetting the environment, these are converted to jax arrays.`

			`Returns:`
			`PyTorch-based observations and info`
			`"""`
			`if options:`
			`options = torch_to_numpy(options)`

			`return numpy_to_torch(self.env.reset(seed=seed, options=options), self.device)`