Gymnasium/tests/vector/test_autoreset_mode.py

from __future__ import annotations

from functools import partial

import numpy as np
import pytest

import gymnasium as gym
from gymnasium import VectorizeMode
from gymnasium.spaces import Discrete
from gymnasium.utils.env_checker import data_equivalence
from gymnasium.vector import AsyncVectorEnv, SyncVectorEnv
from gymnasium.vector.vector_env import AutoresetMode
from tests.spaces.utils import TESTING_SPACES, TESTING_SPACES_IDS
from tests.testing_env import GenericTestEnv


def count_reset(
    self: GenericTestEnv, seed: int | None = None, options: dict | None = None
):
    super(GenericTestEnv, self).reset(seed=seed)

    self.count = seed if seed is not None else 0
    return self.count, {}


def count_step(self: GenericTestEnv, action):
    self.count += 1

    return self.count, action, self.count == self.max_count, False, {}


@pytest.mark.parametrize(
    "vectoriser",
    [
        SyncVectorEnv,
        AsyncVectorEnv,
        partial(AsyncVectorEnv, shared_memory=False),
    ],
    ids=["Sync", "Async(shared_memory=True)", "Async(shared_memory=False)"],
)
def test_autoreset_next_step(vectoriser):
    envs = vectoriser(
        [
            lambda: GenericTestEnv(
                action_space=Discrete(5),
                observation_space=Discrete(5),
                reset_func=count_reset,
                step_func=count_step,
            )
            for _ in range(3)
        ],
        autoreset_mode=AutoresetMode.NEXT_STEP,
    )
    assert envs.metadata["autoreset_mode"] == AutoresetMode.NEXT_STEP
    envs.set_attr("max_count", [2, 3, 3])

    obs, info = envs.reset()
    assert np.all(obs == [0, 0, 0])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [1, 1, 1])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [False, False, False])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [2, 2, 2])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [True, False, False])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [0, 3, 3])
    assert np.all(rewards == [0, 2, 3])
    assert np.all(terminations == [False, True, True])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [1, 0, 0])
    assert np.all(rewards == [1, 0, 0])
    assert np.all(terminations == [False, False, False])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    envs.close()


@pytest.mark.parametrize(
    "vectoriser",
    [
        SyncVectorEnv,
        AsyncVectorEnv,
        partial(AsyncVectorEnv, shared_memory=False),
    ],
    ids=["Sync", "Async(shared_memory=True)", "Async(shared_memory=False)"],
)
def test_autoreset_within_step(vectoriser):
    envs = vectoriser(
        [
            lambda: GenericTestEnv(
                action_space=Discrete(5),
                observation_space=Discrete(5),
                reset_func=count_reset,
                step_func=count_step,
            )
            for _ in range(3)
        ],
        autoreset_mode=AutoresetMode.SAME_STEP,
    )
    assert envs.metadata["autoreset_mode"] == AutoresetMode.SAME_STEP
    envs.set_attr("max_count", [2, 3, 3])

    obs, info = envs.reset()
    assert np.all(obs == [0, 0, 0])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [1, 1, 1])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [False, False, False])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [0, 2, 2])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [True, False, False])
    assert np.all(truncations == [False, False, False])
    assert data_equivalence(
        info,
        {
            "final_obs": np.array([2, None, None], dtype=object),
            "final_info": {},
            "_final_obs": np.array([True, False, False]),
            "_final_info": np.array([True, False, False]),
        },
    )

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [1, 0, 0])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [False, True, True])
    assert np.all(truncations == [False, False, False])
    assert data_equivalence(
        info,
        {
            "final_obs": np.array([None, 3, 3], dtype=object),
            "final_info": {},
            "_final_obs": np.array([False, True, True]),
            "_final_info": np.array([False, True, True]),
        },
    )

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [0, 1, 1])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [True, False, False])
    assert np.all(truncations == [False, False, False])
    assert data_equivalence(
        info,
        {
            "final_obs": np.array([2, None, None], dtype=object),
            "final_info": {},
            "_final_obs": np.array([True, False, False]),
            "_final_info": np.array([True, False, False]),
        },
    )

    envs.close()


@pytest.mark.parametrize(
    "vectoriser",
    [
        SyncVectorEnv,
        AsyncVectorEnv,
        partial(AsyncVectorEnv, shared_memory=False),
    ],
    ids=["Sync", "Async(shared_memory=True)", "Async(shared_memory=False)"],
)
def test_autoreset_disabled(vectoriser):
    envs = vectoriser(
        [
            lambda: GenericTestEnv(
                action_space=Discrete(5),
                observation_space=Discrete(5),
                reset_func=count_reset,
                step_func=count_step,
            )
            for _ in range(3)
        ],
        autoreset_mode=AutoresetMode.DISABLED,
    )
    assert envs.metadata["autoreset_mode"] == AutoresetMode.DISABLED
    envs.set_attr("max_count", [2, 3, 3])

    obs, info = envs.reset()
    assert np.all(obs == [0, 0, 0])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [1, 1, 1])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [False, False, False])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [2, 2, 2])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [True, False, False])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    obs, info = envs.reset(options={"reset_mask": terminations})
    assert np.all(obs == [0, 2, 2])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [1, 3, 3])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [False, True, True])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    obs, info = envs.reset(options={"reset_mask": terminations})
    assert np.all(obs == [1, 0, 0])
    assert info == {}

    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert np.all(obs == [2, 1, 1])
    assert np.all(rewards == [1, 2, 3])
    assert np.all(terminations == [True, False, False])
    assert np.all(truncations == [False, False, False])
    assert info == {}

    envs.close()


@pytest.mark.parametrize(
    "vectoriser",
    [
        SyncVectorEnv,
        AsyncVectorEnv,
        partial(AsyncVectorEnv, shared_memory=False),
    ],
    ids=["Sync", "Async(shared_memory=True)", "Async(shared_memory=False)"],
)
@pytest.mark.parametrize(
    "autoreset_mode",
    [AutoresetMode.NEXT_STEP, AutoresetMode.DISABLED, AutoresetMode.SAME_STEP],
)
def test_autoreset_metadata(vectoriser, autoreset_mode):
    envs = vectoriser(
        [lambda: GenericTestEnv(), lambda: GenericTestEnv()],
        autoreset_mode=autoreset_mode,
    )
    assert envs.metadata["autoreset_mode"] == autoreset_mode
    envs.close()

    envs = vectoriser(
        [lambda: GenericTestEnv(), lambda: GenericTestEnv()],
        autoreset_mode=autoreset_mode.value,
    )
    assert envs.metadata["autoreset_mode"] == autoreset_mode
    envs.close()


@pytest.mark.parametrize(
    "vectorization_mode", [VectorizeMode.SYNC, VectorizeMode.ASYNC]
)
@pytest.mark.parametrize(
    "autoreset_mode",
    [AutoresetMode.NEXT_STEP, AutoresetMode.DISABLED, AutoresetMode.SAME_STEP],
)
def test_make_vec_autoreset(vectorization_mode, autoreset_mode):
    envs = gym.make_vec(
        "CartPole-v1",
        vectorization_mode=vectorization_mode,
        vector_kwargs={"autoreset_mode": autoreset_mode},
    )
    envs.metadata["autoreset_mode"] = autoreset_mode
    envs.close()

    envs = gym.make_vec(
        "CartPole-v1",
        vectorization_mode=vectorization_mode,
        vector_kwargs={"autoreset_mode": autoreset_mode.value},
    )
    envs.metadata["autoreset_mode"] = autoreset_mode
    envs.close()


def count_reset_obs(
    self: GenericTestEnv, seed: int | None = None, options: dict | None = None
):
    super(GenericTestEnv, self).reset(seed=seed)

    self.count = seed if seed is not None else 0
    return self.observation_space.sample(), {}


def count_step_obs(self: GenericTestEnv, action):
    self.count += 1

    return (
        self.observation_space.sample(),
        action,
        self.count == self.max_count,
        False,
        {},
    )


@pytest.mark.parametrize("obs_space", TESTING_SPACES, ids=TESTING_SPACES_IDS)
def test_same_step_final_obs(obs_space):
    envs = SyncVectorEnv(
        [
            lambda: GenericTestEnv(
                action_space=Discrete(5),
                observation_space=obs_space,
                reset_func=count_reset_obs,
                step_func=count_step_obs,
            )
            for _ in range(3)
        ],
        autoreset_mode=AutoresetMode.SAME_STEP,
    )
    assert envs.metadata["autoreset_mode"] == AutoresetMode.SAME_STEP
    envs.set_attr("max_count", [2, 3, 3])

    envs.reset()
    envs.step([1, 2, 3])
    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert info["final_obs"][0] in envs.single_observation_space
    obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])
    assert info["final_obs"][1] in envs.single_observation_space
    assert info["final_obs"][2] in envs.single_observation_space
Add support for different vector autoreset modes (#1227) 2024-11-28 12:18:55 +00:00			`from __future__ import annotations`

			`from functools import partial`

			`import numpy as np`
			`import pytest`

			`import gymnasium as gym`
			`from gymnasium import VectorizeMode`
			`from gymnasium.spaces import Discrete`
			`from gymnasium.utils.env_checker import data_equivalence`
			`from gymnasium.vector import AsyncVectorEnv, SyncVectorEnv`
			`from gymnasium.vector.vector_env import AutoresetMode`
			`from tests.spaces.utils import TESTING_SPACES, TESTING_SPACES_IDS`
			`from tests.testing_env import GenericTestEnv`


			`def count_reset(`
			`self: GenericTestEnv, seed: int \| None = None, options: dict \| None = None`
			`):`
			`super(GenericTestEnv, self).reset(seed=seed)`

			`self.count = seed if seed is not None else 0`
			`return self.count, {}`


			`def count_step(self: GenericTestEnv, action):`
			`self.count += 1`

			`return self.count, action, self.count == self.max_count, False, {}`


			`@pytest.mark.parametrize(`
			`"vectoriser",`
			`[`
			`SyncVectorEnv,`
			`AsyncVectorEnv,`
			`partial(AsyncVectorEnv, shared_memory=False),`
			`],`
			`ids=["Sync", "Async(shared_memory=True)", "Async(shared_memory=False)"],`
			`)`
			`def test_autoreset_next_step(vectoriser):`
			`envs = vectoriser(`
			`[`
			`lambda: GenericTestEnv(`
			`action_space=Discrete(5),`
			`observation_space=Discrete(5),`
			`reset_func=count_reset,`
			`step_func=count_step,`
			`)`
			`for _ in range(3)`
			`],`
			`autoreset_mode=AutoresetMode.NEXT_STEP,`
			`)`
			`assert envs.metadata["autoreset_mode"] == AutoresetMode.NEXT_STEP`
			`envs.set_attr("max_count", [2, 3, 3])`

			`obs, info = envs.reset()`
			`assert np.all(obs == [0, 0, 0])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [1, 1, 1])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [False, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [2, 2, 2])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [True, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [0, 3, 3])`
			`assert np.all(rewards == [0, 2, 3])`
			`assert np.all(terminations == [False, True, True])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [1, 0, 0])`
			`assert np.all(rewards == [1, 0, 0])`
			`assert np.all(terminations == [False, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`envs.close()`


			`@pytest.mark.parametrize(`
			`"vectoriser",`
			`[`
			`SyncVectorEnv,`
			`AsyncVectorEnv,`
			`partial(AsyncVectorEnv, shared_memory=False),`
			`],`
			`ids=["Sync", "Async(shared_memory=True)", "Async(shared_memory=False)"],`
			`)`
			`def test_autoreset_within_step(vectoriser):`
			`envs = vectoriser(`
			`[`
			`lambda: GenericTestEnv(`
			`action_space=Discrete(5),`
			`observation_space=Discrete(5),`
			`reset_func=count_reset,`
			`step_func=count_step,`
			`)`
			`for _ in range(3)`
			`],`
			`autoreset_mode=AutoresetMode.SAME_STEP,`
			`)`
			`assert envs.metadata["autoreset_mode"] == AutoresetMode.SAME_STEP`
			`envs.set_attr("max_count", [2, 3, 3])`

			`obs, info = envs.reset()`
			`assert np.all(obs == [0, 0, 0])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [1, 1, 1])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [False, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [0, 2, 2])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [True, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert data_equivalence(`
			`info,`
			`{`
			`"final_obs": np.array([2, None, None], dtype=object),`
			`"final_info": {},`
			`"_final_obs": np.array([True, False, False]),`
			`"_final_info": np.array([True, False, False]),`
			`},`
			`)`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [1, 0, 0])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [False, True, True])`
			`assert np.all(truncations == [False, False, False])`
			`assert data_equivalence(`
			`info,`
			`{`
			`"final_obs": np.array([None, 3, 3], dtype=object),`
			`"final_info": {},`
			`"_final_obs": np.array([False, True, True]),`
			`"_final_info": np.array([False, True, True]),`
			`},`
			`)`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [0, 1, 1])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [True, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert data_equivalence(`
			`info,`
			`{`
			`"final_obs": np.array([2, None, None], dtype=object),`
			`"final_info": {},`
			`"_final_obs": np.array([True, False, False]),`
			`"_final_info": np.array([True, False, False]),`
			`},`
			`)`

			`envs.close()`


			`@pytest.mark.parametrize(`
			`"vectoriser",`
			`[`
			`SyncVectorEnv,`
			`AsyncVectorEnv,`
			`partial(AsyncVectorEnv, shared_memory=False),`
			`],`
			`ids=["Sync", "Async(shared_memory=True)", "Async(shared_memory=False)"],`
			`)`
			`def test_autoreset_disabled(vectoriser):`
			`envs = vectoriser(`
			`[`
			`lambda: GenericTestEnv(`
			`action_space=Discrete(5),`
			`observation_space=Discrete(5),`
			`reset_func=count_reset,`
			`step_func=count_step,`
			`)`
			`for _ in range(3)`
			`],`
			`autoreset_mode=AutoresetMode.DISABLED,`
			`)`
			`assert envs.metadata["autoreset_mode"] == AutoresetMode.DISABLED`
			`envs.set_attr("max_count", [2, 3, 3])`

			`obs, info = envs.reset()`
			`assert np.all(obs == [0, 0, 0])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [1, 1, 1])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [False, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [2, 2, 2])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [True, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`obs, info = envs.reset(options={"reset_mask": terminations})`
			`assert np.all(obs == [0, 2, 2])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [1, 3, 3])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [False, True, True])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`obs, info = envs.reset(options={"reset_mask": terminations})`
			`assert np.all(obs == [1, 0, 0])`
			`assert info == {}`

			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert np.all(obs == [2, 1, 1])`
			`assert np.all(rewards == [1, 2, 3])`
			`assert np.all(terminations == [True, False, False])`
			`assert np.all(truncations == [False, False, False])`
			`assert info == {}`

			`envs.close()`


			`@pytest.mark.parametrize(`
			`"vectoriser",`
			`[`
			`SyncVectorEnv,`
			`AsyncVectorEnv,`
			`partial(AsyncVectorEnv, shared_memory=False),`
			`],`
			`ids=["Sync", "Async(shared_memory=True)", "Async(shared_memory=False)"],`
			`)`
			`@pytest.mark.parametrize(`
			`"autoreset_mode",`
			`[AutoresetMode.NEXT_STEP, AutoresetMode.DISABLED, AutoresetMode.SAME_STEP],`
			`)`
			`def test_autoreset_metadata(vectoriser, autoreset_mode):`
			`envs = vectoriser(`
			`[lambda: GenericTestEnv(), lambda: GenericTestEnv()],`
			`autoreset_mode=autoreset_mode,`
			`)`
			`assert envs.metadata["autoreset_mode"] == autoreset_mode`
			`envs.close()`

			`envs = vectoriser(`
			`[lambda: GenericTestEnv(), lambda: GenericTestEnv()],`
			`autoreset_mode=autoreset_mode.value,`
			`)`
			`assert envs.metadata["autoreset_mode"] == autoreset_mode`
			`envs.close()`


			`@pytest.mark.parametrize(`
			`"vectorization_mode", [VectorizeMode.SYNC, VectorizeMode.ASYNC]`
			`)`
			`@pytest.mark.parametrize(`
			`"autoreset_mode",`
			`[AutoresetMode.NEXT_STEP, AutoresetMode.DISABLED, AutoresetMode.SAME_STEP],`
			`)`
			`def test_make_vec_autoreset(vectorization_mode, autoreset_mode):`
			`envs = gym.make_vec(`
			`"CartPole-v1",`
			`vectorization_mode=vectorization_mode,`
			`vector_kwargs={"autoreset_mode": autoreset_mode},`
			`)`
			`envs.metadata["autoreset_mode"] = autoreset_mode`
			`envs.close()`

			`envs = gym.make_vec(`
			`"CartPole-v1",`
			`vectorization_mode=vectorization_mode,`
			`vector_kwargs={"autoreset_mode": autoreset_mode.value},`
			`)`
			`envs.metadata["autoreset_mode"] = autoreset_mode`
			`envs.close()`


			`def count_reset_obs(`
			`self: GenericTestEnv, seed: int \| None = None, options: dict \| None = None`
			`):`
			`super(GenericTestEnv, self).reset(seed=seed)`

			`self.count = seed if seed is not None else 0`
			`return self.observation_space.sample(), {}`


			`def count_step_obs(self: GenericTestEnv, action):`
			`self.count += 1`

			`return (`
			`self.observation_space.sample(),`
			`action,`
			`self.count == self.max_count,`
			`False,`
			`{},`
			`)`


			`@pytest.mark.parametrize("obs_space", TESTING_SPACES, ids=TESTING_SPACES_IDS)`
			`def test_same_step_final_obs(obs_space):`
			`envs = SyncVectorEnv(`
			`[`
			`lambda: GenericTestEnv(`
			`action_space=Discrete(5),`
			`observation_space=obs_space,`
			`reset_func=count_reset_obs,`
			`step_func=count_step_obs,`
			`)`
			`for _ in range(3)`
			`],`
			`autoreset_mode=AutoresetMode.SAME_STEP,`
			`)`
			`assert envs.metadata["autoreset_mode"] == AutoresetMode.SAME_STEP`
			`envs.set_attr("max_count", [2, 3, 3])`

			`envs.reset()`
			`envs.step([1, 2, 3])`
			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert info["final_obs"][0] in envs.single_observation_space`
			`obs, rewards, terminations, truncations, info = envs.step([1, 2, 3])`
			`assert info["final_obs"][1] in envs.single_observation_space`
			`assert info["final_obs"][2] in envs.single_observation_space`