Gymnasium/gymnasium/utils/step_api_compatibility.py

"""Contains methods for step compatibility, from old-to-new and new-to-old API."""

from __future__ import annotations

from typing import SupportsFloat, Tuple, Union

import numpy as np

from gymnasium.core import ObsType


DoneStepType = Tuple[
    Union[ObsType, np.ndarray],
    Union[SupportsFloat, np.ndarray],
    Union[bool, np.ndarray],
    Union[dict, list],
]

TerminatedTruncatedStepType = Tuple[
    Union[ObsType, np.ndarray],
    Union[SupportsFloat, np.ndarray],
    Union[bool, np.ndarray],
    Union[bool, np.ndarray],
    Union[dict, list],
]


def convert_to_terminated_truncated_step_api(
    step_returns: DoneStepType | TerminatedTruncatedStepType, is_vector_env=False
) -> TerminatedTruncatedStepType:
    """Function to transform step returns to new step API irrespective of input API.

    .. py:currentmodule:: gymnasium.Env

    Args:
        step_returns (tuple): Items returned by :meth:`step`. Can be ``(obs, rew, done, info)`` or ``(obs, rew, terminated, truncated, info)``
        is_vector_env (bool): Whether the ``step_returns`` are from a vector environment
    """
    if len(step_returns) == 5:
        return step_returns
    else:
        assert len(step_returns) == 4
        observations, rewards, dones, infos = step_returns

        # Cases to handle - info single env /  info vector env (list) / info vector env (dict)
        if is_vector_env is False:
            truncated = infos.pop("TimeLimit.truncated", False)
            return (
                observations,
                rewards,
                dones and not truncated,
                dones and truncated,
                infos,
            )
        elif isinstance(infos, list):
            truncated = np.array(
                [info.pop("TimeLimit.truncated", False) for info in infos]
            )
            return (
                observations,
                rewards,
                np.logical_and(dones, np.logical_not(truncated)),
                np.logical_and(dones, truncated),
                infos,
            )
        elif isinstance(infos, dict):
            num_envs = len(dones)
            truncated = infos.pop("TimeLimit.truncated", np.zeros(num_envs, dtype=bool))
            return (
                observations,
                rewards,
                np.logical_and(dones, np.logical_not(truncated)),
                np.logical_and(dones, truncated),
                infos,
            )
        else:
            raise TypeError(
                f"Unexpected value of infos, as is_vector_envs=False, expects `info` to be a list or dict, actual type: {type(infos)}"
            )


def convert_to_done_step_api(
    step_returns: TerminatedTruncatedStepType | DoneStepType,
    is_vector_env: bool = False,
) -> DoneStepType:
    """Function to transform step returns to old step API irrespective of input API.

    .. py:currentmodule:: gymnasium.Env

    Args:
        step_returns (tuple): Items returned by :meth:`step`. Can be ``(obs, rew, done, info)`` or ``(obs, rew, terminated, truncated, info)``
        is_vector_env (bool): Whether the ``step_returns`` are from a vector environment
    """
    if len(step_returns) == 4:
        return step_returns
    else:
        assert len(step_returns) == 5
        observations, rewards, terminated, truncated, infos = step_returns

        # Cases to handle - info single env /  info vector env (list) / info vector env (dict)
        if is_vector_env is False:
            if truncated or terminated:
                infos["TimeLimit.truncated"] = truncated and not terminated
            return (
                observations,
                rewards,
                terminated or truncated,
                infos,
            )
        elif isinstance(infos, list):
            for info, env_truncated, env_terminated in zip(
                infos, truncated, terminated
            ):
                if env_truncated or env_terminated:
                    info["TimeLimit.truncated"] = env_truncated and not env_terminated
            return (
                observations,
                rewards,
                np.logical_or(terminated, truncated),
                infos,
            )
        elif isinstance(infos, dict):
            if np.logical_or(np.any(truncated), np.any(terminated)):
                infos["TimeLimit.truncated"] = np.logical_and(
                    truncated, np.logical_not(terminated)
                )
            return (
                observations,
                rewards,
                np.logical_or(terminated, truncated),
                infos,
            )
        else:
            raise TypeError(
                f"Unexpected value of infos, as is_vector_envs=False, expects `info` to be a list or dict, actual type: {type(infos)}"
            )


def step_api_compatibility(
    step_returns: TerminatedTruncatedStepType | DoneStepType,
    output_truncation_bool: bool = True,
    is_vector_env: bool = False,
) -> TerminatedTruncatedStepType | DoneStepType:
    """Function to transform step returns to the API specified by ``output_truncation_bool``.

    .. py:currentmodule:: gymnasium.Env

    Done (old) step API refers to :meth:`step` method returning ``(observation, reward, done, info)``
    Terminated Truncated (new) step API refers to :meth:`step` method returning ``(observation, reward, terminated, truncated, info)``
    (Refer to docs for details on the API change)

    Args:
        step_returns (tuple): Items returned by :meth:`step`. Can be ``(obs, rew, done, info)`` or ``(obs, rew, terminated, truncated, info)``
        output_truncation_bool (bool): Whether the output should return two booleans (new API) or one (old) (``True`` by default)
        is_vector_env (bool): Whether the ``step_returns`` are from a vector environment

    Returns:
        step_returns (tuple): Depending on ``output_truncation_bool``, it can return ``(obs, rew, done, info)`` or ``(obs, rew, terminated, truncated, info)``

    Example:
        This function can be used to ensure compatibility in step interfaces with conflicting API. E.g. if env is written in old API,
        wrapper is written in new API, and the final step output is desired to be in old API.

        >>> import gymnasium as gym
        >>> env = gym.make("CartPole-v0")
        >>> _, _ = env.reset()
        >>> obs, reward, done, info = step_api_compatibility(env.step(0), output_truncation_bool=False)
        >>> obs, reward, terminated, truncated, info = step_api_compatibility(env.step(0), output_truncation_bool=True)

        >>> vec_env = gym.make_vec("CartPole-v0", vectorization_mode="sync")
        >>> _, _ = vec_env.reset()
        >>> obs, rewards, dones, infos = step_api_compatibility(vec_env.step([0]), is_vector_env=True, output_truncation_bool=False)
        >>> obs, rewards, terminations, truncations, infos = step_api_compatibility(vec_env.step([0]), is_vector_env=True, output_truncation_bool=True)

    """
    if output_truncation_bool:
        return convert_to_terminated_truncated_step_api(step_returns, is_vector_env)
    else:
        return convert_to_done_step_api(step_returns, is_vector_env)
Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`"""Contains methods for step compatibility, from old-to-new and new-to-old API."""`
Pre commit autoupdate (#1082) 2024-06-10 17:07:47 +01:00
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`from __future__ import annotations`

Use SupportsFloat in step_api_compatibility (#368) Co-authored-by: Mark Towers <mark.m.towers@gmail.com> 2023-03-14 16:31:13 +01:00			`from typing import SupportsFloat, Tuple, Union`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30
			`import numpy as np`

Rename to gymnasium 2022-09-08 10:10:07 +01:00			`from gymnasium.core import ObsType`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30
Update and rerun `pre-commit` hooks for better code quality (#179) 2022-12-04 22:24:02 +08:00
Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`DoneStepType = Tuple[`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`Union[ObsType, np.ndarray],`
Use SupportsFloat in step_api_compatibility (#368) Co-authored-by: Mark Towers <mark.m.towers@gmail.com> 2023-03-14 16:31:13 +01:00			`Union[SupportsFloat, np.ndarray],`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`Union[bool, np.ndarray],`
			`Union[dict, list],`
			`]`

Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`TerminatedTruncatedStepType = Tuple[`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`Union[ObsType, np.ndarray],`
Use SupportsFloat in step_api_compatibility (#368) Co-authored-by: Mark Towers <mark.m.towers@gmail.com> 2023-03-14 16:31:13 +01:00			`Union[SupportsFloat, np.ndarray],`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`Union[bool, np.ndarray],`
			`Union[bool, np.ndarray],`
			`Union[dict, list],`
			`]`


Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`def convert_to_terminated_truncated_step_api(`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`step_returns: DoneStepType \| TerminatedTruncatedStepType, is_vector_env=False`
Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`) -> TerminatedTruncatedStepType:`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`"""Function to transform step returns to new step API irrespective of input API.`

Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`.. py:currentmodule:: gymnasium.Env`

New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`Args:`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			step_returns (tuple): Items returned by :meth:`step`. Can be ``(obs, rew, done, info)`` or ``(obs, rew, terminated, truncated, info)``
			is_vector_env (bool): Whether the ``step_returns`` are from a vector environment
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`"""`
			`if len(step_returns) == 5:`
			`return step_returns`
			`else:`
			`assert len(step_returns) == 4`
			`observations, rewards, dones, infos = step_returns`

Add testing for step api compatibility functions and wrapper (#3028) * Initial commit * Fixed tests and forced TimeLimit.truncated to always exist when truncated or terminated * Fix CI issues * pre-commit * Revert back to old language * Revert changes to step api wrapper 2022-08-18 15:25:46 +01:00			`# Cases to handle - info single env / info vector env (list) / info vector env (dict)`
			`if is_vector_env is False:`
			`truncated = infos.pop("TimeLimit.truncated", False)`
			`return (`
			`observations,`
			`rewards,`
			`dones and not truncated,`
			`dones and truncated,`
			`infos,`
			`)`
			`elif isinstance(infos, list):`
			`truncated = np.array(`
			`[info.pop("TimeLimit.truncated", False) for info in infos]`
			`)`
			`return (`
			`observations,`
			`rewards,`
			`np.logical_and(dones, np.logical_not(truncated)),`
			`np.logical_and(dones, truncated),`
			`infos,`
			`)`
			`elif isinstance(infos, dict):`
			`num_envs = len(dones)`
			`truncated = infos.pop("TimeLimit.truncated", np.zeros(num_envs, dtype=bool))`
			`return (`
			`observations,`
			`rewards,`
			`np.logical_and(dones, np.logical_not(truncated)),`
			`np.logical_and(dones, truncated),`
			`infos,`
			`)`
			`else:`
			`raise TypeError(`
			f"Unexpected value of infos, as is_vector_envs=False, expects `info` to be a list or dict, actual type: {type(infos)}"
			`)`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30

Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`def convert_to_done_step_api(`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`step_returns: TerminatedTruncatedStepType \| DoneStepType,`
Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`is_vector_env: bool = False,`
			`) -> DoneStepType:`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`"""Function to transform step returns to old step API irrespective of input API.`

Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`.. py:currentmodule:: gymnasium.Env`

New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`Args:`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			step_returns (tuple): Items returned by :meth:`step`. Can be ``(obs, rew, done, info)`` or ``(obs, rew, terminated, truncated, info)``
			is_vector_env (bool): Whether the ``step_returns`` are from a vector environment
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`"""`
			`if len(step_returns) == 4:`
			`return step_returns`
			`else:`
			`assert len(step_returns) == 5`
Add testing for step api compatibility functions and wrapper (#3028) * Initial commit * Fixed tests and forced TimeLimit.truncated to always exist when truncated or terminated * Fix CI issues * pre-commit * Revert back to old language * Revert changes to step api wrapper 2022-08-18 15:25:46 +01:00			`observations, rewards, terminated, truncated, infos = step_returns`

			`# Cases to handle - info single env / info vector env (list) / info vector env (dict)`
			`if is_vector_env is False:`
			`if truncated or terminated:`
			`infos["TimeLimit.truncated"] = truncated and not terminated`
			`return (`
			`observations,`
			`rewards,`
			`terminated or truncated,`
			`infos,`
			`)`
			`elif isinstance(infos, list):`
			`for info, env_truncated, env_terminated in zip(`
			`infos, truncated, terminated`
			`):`
			`if env_truncated or env_terminated:`
			`info["TimeLimit.truncated"] = env_truncated and not env_terminated`
			`return (`
			`observations,`
			`rewards,`
			`np.logical_or(terminated, truncated),`
			`infos,`
			`)`
			`elif isinstance(infos, dict):`
			`if np.logical_or(np.any(truncated), np.any(terminated)):`
			`infos["TimeLimit.truncated"] = np.logical_and(`
			`truncated, np.logical_not(terminated)`
			`)`
			`return (`
			`observations,`
			`rewards,`
			`np.logical_or(terminated, truncated),`
			`infos,`
			`)`
			`else:`
			`raise TypeError(`
			f"Unexpected value of infos, as is_vector_envs=False, expects `info` to be a list or dict, actual type: {type(infos)}"
			`)`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30

			`def step_api_compatibility(`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`step_returns: TerminatedTruncatedStepType \| DoneStepType,`
Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`output_truncation_bool: bool = True,`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`is_vector_env: bool = False,`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`) -> TerminatedTruncatedStepType \| DoneStepType:`
			"""Function to transform step returns to the API specified by ``output_truncation_bool``.
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`.. py:currentmodule:: gymnasium.Env`

			Done (old) step API refers to :meth:`step` method returning ``(observation, reward, done, info)``
			Terminated Truncated (new) step API refers to :meth:`step` method returning ``(observation, reward, terminated, truncated, info)``
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`(Refer to docs for details on the API change)`

			`Args:`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			step_returns (tuple): Items returned by :meth:`step`. Can be ``(obs, rew, done, info)`` or ``(obs, rew, terminated, truncated, info)``
			output_truncation_bool (bool): Whether the output should return two booleans (new API) or one (old) (``True`` by default)
			is_vector_env (bool): Whether the ``step_returns`` are from a vector environment
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30
			`Returns:`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			step_returns (tuple): Depending on ``output_truncation_bool``, it can return ``(obs, rew, done, info)`` or ``(obs, rew, terminated, truncated, info)``
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30
Formated doctest and added more consistency (#281) 2023-01-23 11:30:00 +01:00			`Example:`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`This function can be used to ensure compatibility in step interfaces with conflicting API. E.g. if env is written in old API,`
			`wrapper is written in new API, and the final step output is desired to be in old API.`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30
Change import gymnasium to import gymnasium as gym (#20) 2022-09-16 23:41:27 +01:00			`>>> import gymnasium as gym`
Add check doctest to CI and fixed existing errors (#274) 2023-01-20 14:28:09 +01:00			`>>> env = gym.make("CartPole-v0")`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`>>> _, _ = env.reset()`
			`>>> obs, reward, done, info = step_api_compatibility(env.step(0), output_truncation_bool=False)`
			`>>> obs, reward, terminated, truncated, info = step_api_compatibility(env.step(0), output_truncation_bool=True)`
Add check doctest to CI and fixed existing errors (#274) 2023-01-20 14:28:09 +01:00
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`>>> vec_env = gym.make_vec("CartPole-v0", vectorization_mode="sync")`
			`>>> _, _ = vec_env.reset()`
Add check doctest to CI and fixed existing errors (#274) 2023-01-20 14:28:09 +01:00			`>>> obs, rewards, dones, infos = step_api_compatibility(vec_env.step([0]), is_vector_env=True, output_truncation_bool=False)`
Merge v1.0.0 (#682) Co-authored-by: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Co-authored-by: Jet <38184875+jjshoots@users.noreply.github.com> Co-authored-by: Omar Younis <42100908+younik@users.noreply.github.com> 2023-11-07 13:27:25 +00:00			`>>> obs, rewards, terminations, truncations, infos = step_api_compatibility(vec_env.step([0]), is_vector_env=True, output_truncation_bool=True)`

New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`"""`
Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`if output_truncation_bool:`
			`return convert_to_terminated_truncated_step_api(step_returns, is_vector_env)`
New Step API with terminated, truncated bools instead of done (#2752) 2022-07-10 02:18:06 +05:30			`else:`
Support only new step API (while retaining compatibility functions) (#3019) 2022-08-30 19:41:59 +05:30			`return convert_to_done_step_api(step_returns, is_vector_env)`