Updated gymnasium to be equivalent to gym v26.2 (#36)

2025-08-22 07:02:19 +00:00 · 2022-10-05 17:53:45 +01:00
parent dc60cdc4af
commit f6489c38b3
21 changed files with 190 additions and 13 deletions
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -1,10 +1,13 @@
+from functools import partial
+
 import numpy as np
 import pytest

-from gymnasium.spaces import Tuple
+from gymnasium.spaces import Discrete, Tuple
 from gymnasium.vector.async_vector_env import AsyncVectorEnv
 from gymnasium.vector.sync_vector_env import SyncVectorEnv
 from gymnasium.vector.vector_env import VectorEnv
+from tests.testing_env import GenericTestEnv
 from tests.vector.utils import CustomSpace, make_env


@@ -58,3 +61,65 @@ def test_custom_space_vector_env():

    assert isinstance(env.single_action_space, CustomSpace)
    assert isinstance(env.action_space, Tuple)
+
+
+@pytest.mark.parametrize(
+    "vectoriser",
+    (
+        SyncVectorEnv,
+        partial(AsyncVectorEnv, shared_memory=True),
+        partial(AsyncVectorEnv, shared_memory=False),
+    ),
+    ids=["Sync", "Async with shared memory", "Async without shared memory"],
+)
+def test_final_obs_info(vectoriser):
+    """Tests that the vector environments correctly return the final observation and info."""
+
+    def reset_fn(self, seed=None, options=None):
+        return 0, {"reset": True}
+
+    def thunk():
+        return GenericTestEnv(
+            action_space=Discrete(4),
+            observation_space=Discrete(4),
+            reset_fn=reset_fn,
+            step_fn=lambda self, action: (
+                action if action < 3 else 0,
+                0,
+                action >= 3,
+                False,
+                {"action": action},
+            ),
+        )
+
+    env = vectoriser([thunk])
+    obs, info = env.reset()
+    assert obs == np.array([0]) and info == {
+        "reset": np.array([True]),
+        "_reset": np.array([True]),
+    }
+
+    obs, _, termination, _, info = env.step([1])
+    assert (
+        obs == np.array([1])
+        and termination == np.array([False])
+        and info == {"action": np.array([1]), "_action": np.array([True])}
+    )
+
+    obs, _, termination, _, info = env.step([2])
+    assert (
+        obs == np.array([2])
+        and termination == np.array([False])
+        and info == {"action": np.array([2]), "_action": np.array([True])}
+    )
+
+    obs, _, termination, _, info = env.step([3])
+    assert (
+        obs == np.array([0])
+        and termination == np.array([True])
+        and info["reset"] == np.array([True])
+    )
+    assert "final_observation" in info and "final_info" in info
+    assert info["final_observation"] == np.array([0]) and info["final_info"] == {
+        "action": 3
+    }