Lunar lander code clarity (#354)

2025-07-31 05:44:31 +00:00 · 2023-03-03 12:13:46 +01:00
parent 046e858a85
commit 4d3a65e765
1 changed files with 82 additions and 17 deletions
--- a/gymnasium/envs/box2d/lunar_lander.py
+++ b/gymnasium/envs/box2d/lunar_lander.py
@@ -47,8 +47,11 @@ LEG_DOWN = 18
 LEG_W, LEG_H = 2, 8
 LEG_SPRING_TORQUE = 40

-SIDE_ENGINE_HEIGHT = 14.0
-SIDE_ENGINE_AWAY = 12.0
+SIDE_ENGINE_HEIGHT = 14
+SIDE_ENGINE_AWAY = 12
+MAIN_ENGINE_Y_LOCATION = (
+    4  # The Y location of the main engine on the body of the Lander.
+)

 VIEWPORT_W = 600
 VIEWPORT_H = 400
@@ -183,6 +186,34 @@ class LunarLander(gym.Env, EzPickle):
        renormalized to 200; harder initial random push.
    - v0: Initial version

+
+    ## Notes
+
+    There are several unexpected bugs with the implementation of the environment.
+
+    1. The position of the side thursters on the body of the lander changes, depending on the orientation of the lander.
+    This in turn results in an orientation depentant torque being applied to the lander.
+
+    2. The units of the state are not consistent. I.e.
+    * The angular velocity is in units of 0.4 radians per second. In order to convert to radians per second, the value needs to be multiplied by a factor of 2.5.
+
+    For the default values of VIEWPORT_W, VIEWPORT_H, SCALE, and FPS, the scale factors equal:
+    'x': 10
+    'y': 6.666
+    'vx': 5
+    'vy': 7.5
+    'angle': 1
+    'angular velocity': 2.5
+
+    After the correction has been made, the units of the state are as follows:
+    'x': (units)
+    'y': (units)
+    'vx': (units/second)
+    'vy': (units/second)
+    'angle': (radians)
+    'angular velocity': (radians/second)
+
+
    <!-- ## References -->

    ## Credits
@@ -327,7 +358,7 @@ class LunarLander(gym.Env, EzPickle):
        W = VIEWPORT_W / SCALE
        H = VIEWPORT_H / SCALE

-        # terrain
+        # Create Terrain
        CHUNKS = 11
        height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1,))
        chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
@@ -357,9 +388,11 @@ class LunarLander(gym.Env, EzPickle):
        self.moon.color1 = (0.0, 0.0, 0.0)
        self.moon.color2 = (0.0, 0.0, 0.0)

+        # Create Lander body
        initial_y = VIEWPORT_H / SCALE
+        initial_x = VIEWPORT_W / SCALE / 2
        self.lander: Box2D.b2Body = self.world.CreateDynamicBody(
-            position=(VIEWPORT_W / SCALE / 2, initial_y),
+            position=(initial_x, initial_y),
            angle=0.0,
            fixtures=fixtureDef(
                shape=polygonShape(
@@ -374,6 +407,8 @@ class LunarLander(gym.Env, EzPickle):
        )
        self.lander.color1 = (128, 102, 230)
        self.lander.color2 = (77, 77, 128)
+
+        # Apply the initial random impulse to the lander
        self.lander.ApplyForceToCenter(
            (
                self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
@@ -382,10 +417,11 @@ class LunarLander(gym.Env, EzPickle):
            True,
        )

+        # Create Lander Legs
        self.legs = []
        for i in [-1, +1]:
            leg = self.world.CreateDynamicBody(
-                position=(VIEWPORT_W / SCALE / 2 - i * LEG_AWAY / SCALE, initial_y),
+                position=(initial_x - i * LEG_AWAY / SCALE, initial_y),
                angle=(i * 0.05),
                fixtures=fixtureDef(
                    shape=polygonShape(box=(LEG_W / SCALE, LEG_H / SCALE)),
@@ -450,7 +486,7 @@ class LunarLander(gym.Env, EzPickle):
    def step(self, action):
        assert self.lander is not None

-        # Update wind
+        # Update wind and apply to the lander
        assert self.lander is not None, "You forgot to call reset()"
        if self.enable_wind and not (
            self.legs[0].ground_contact or self.legs[1].ground_contact
@@ -489,9 +525,15 @@ class LunarLander(gym.Env, EzPickle):
                action
            ), f"{action!r} ({type(action)}) invalid "

-        # Engines
+        # Apply Engine Impulses
+
+        # Tip is a the (X and Y) components of the rotation of the lander.
        tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
+
+        # Side is the (-Y and X) components of the rotation of the lander.
        side = (-tip[1], tip[0])
+
+        # Generate two random numbers between -1/SCALE and 1/SCALE.
        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]

        m_power = 0.0
@@ -504,12 +546,21 @@ class LunarLander(gym.Env, EzPickle):
                assert m_power >= 0.5 and m_power <= 1.0
            else:
                m_power = 1.0
+
            # 4 is move a bit downwards, +-2 for randomness
-            ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
-            oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
+            # The components of the impulse to be applied by the main engine.
+            ox = (
+                tip[0] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0])
+                + side[0] * dispersion[1]
+            )
+            oy = (
+                -tip[1] * (MAIN_ENGINE_Y_LOCATION / SCALE + 2 * dispersion[0])
+                - side[1] * dispersion[1]
+            )
+
            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
            if self.render_mode is not None:
-                # particles are just a decoration, so don't add them when not rendering
+                # particles are just a decoration, with no impact on the physics, so don't add them when not rendering
                p = self._create_particle(
                    3.5,  # 3.5 is here to make particle speed adequate
                    impulse_pos[0],
@@ -534,30 +585,43 @@ class LunarLander(gym.Env, EzPickle):
        if (self.continuous and np.abs(action[1]) > 0.5) or (
            not self.continuous and action in [1, 3]
        ):
-            # Orientation engines
+            # Orientation/Side engines
            if self.continuous:
                direction = np.sign(action[1])
                s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
                assert s_power >= 0.5 and s_power <= 1.0
            else:
+                # action = 1 is left, action = 3 is right
                direction = action - 2
                s_power = 1.0
+
+            # The components of the impulse to be applied by the side engines.
            ox = tip[0] * dispersion[0] + side[0] * (
                3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
            )
            oy = -tip[1] * dispersion[0] - side[1] * (
                3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
            )
+
+            # The constant 17 is a constant, that is presumably meant to be SIDE_ENGINE_HEIGHT.
+            # However, SIDE_ENGINE_HEIGHT is defined as 14
+            # This casuses the position of the thurst on the body of the lander to change, depending on the orientation of the lander.
+            # This in turn results in an orientation depentant torque being applied to the lander.
            impulse_pos = (
                self.lander.position[0] + ox - tip[0] * 17 / SCALE,
                self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
            )
-            p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
-            p.ApplyLinearImpulse(
-                (ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
-                impulse_pos,
-                True,
-            )
+            if self.render_mode is not None:
+                # particles are just a decoration, with no impact on the physics, so don't add them when not rendering
+                p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
+                p.ApplyLinearImpulse(
+                    (
+                        ox * SIDE_ENGINE_POWER * s_power,
+                        oy * SIDE_ENGINE_POWER * s_power,
+                    ),
+                    impulse_pos,
+                    True,
+                )
            self.lander.ApplyLinearImpulse(
                (-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
                impulse_pos,
@@ -568,6 +632,7 @@ class LunarLander(gym.Env, EzPickle):

        pos = self.lander.position
        vel = self.lander.linearVelocity
+
        state = [
            (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
            (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),