diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py index f58781330..9b477aa39 100644 --- a/gymnasium/envs/classic_control/pendulum.py +++ b/gymnasium/envs/classic_control/pendulum.py @@ -59,7 +59,7 @@ class PendulumEnv(gym.Env): *r = -(theta2 + 0.1 * theta_dt2 + 0.001 * torque2)* - where `$\theta$` is the pendulum's angle normalized between *[-pi, pi]* (with 0 being in the upright position). + where `theta` is the pendulum's angle normalized between *[-pi, pi]* (with 0 being in the upright position). Based on the above equation, the minimum reward that can be obtained is *-(pi2 + 0.1 * 82 + 0.001 * 22) = -16.2736044*, while the maximum reward is zero (pendulum is upright with zero velocity and no torque applied).