diff --git a/gym/envs/box2d/bipedal_walker.py b/gym/envs/box2d/bipedal_walker.py
index 08e4a8d1d..ac033fd4f 100644
--- a/gym/envs/box2d/bipedal_walker.py
+++ b/gym/envs/box2d/bipedal_walker.py
@@ -108,8 +108,6 @@ class BipedalWalker(gym.Env, EzPickle):
     python gym/envs/box2d/bipedal_walker.py
     ```
 
-    ![BipedalWalker Episode Example](./bipedal_walker.jpg)
-
     ### Action Space
     Actions are motor speed values in the [-1, 1] range for each of the
     4 joints at both hips and knees.
diff --git a/gym/envs/box2d/car_racing.py b/gym/envs/box2d/car_racing.py
index 54a326d6d..df2b1bdc5 100644
--- a/gym/envs/box2d/car_racing.py
+++ b/gym/envs/box2d/car_racing.py
@@ -87,7 +87,7 @@ class FrictionDetector(contactListener):
 
 class CarRacing(gym.Env, EzPickle):
     """
-    ### Description
+    ## Description
     Easiest continuous control task to learn from pixels, a top-down
     racing environment. Discreet control is reasonable in this environment as
     well, on/off discretisation is fine.
@@ -105,39 +105,37 @@ class CarRacing(gym.Env, EzPickle):
     Remember it's a powerful rear-wheel drive car - don't press the accelerator
     and turn at the same time.
 
-    ![CarRacing Episode Example](./car_racing.jpg)
-
-    ### Action Space
+    ## Action Space
     There are 3 actions: steering (-1 is full left, +1 is full right), gas,
     and breaking.
 
-    ### Observation Space
+    ## Observation Space
     State consists of 96x96 pixels.
 
-    ### Rewards
+    ## Rewards
     The reward is -0.1 every frame and +1000/N for every track tile visited,
     where N is the total number of tiles visited in the track. For example,
     if you have finished in 732 frames, your reward is
     1000 - 0.1*732 = 926.8 points.
 
-    ### Starting State
+    ## Starting State
     The car starts stopped at the center of the road.
 
-    ### Episode Termination
+    ## Episode Termination
     The episode finishes when all the tiles are visited. The car also can go
     outside of the playfield - that is far off the track, then it will
     get -100 and die.
 
-    ### Arguments
+    ## Arguments
     There are no arguments supported in constructing the environment.
 
-    ### Version History
+    ## Version History
     - v0: current version
 
-    ### References
+    ## References
     - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.
 
-    ### Credits
+    ## Credits
     Created by Oleg Klimov
     """
 
diff --git a/gym/envs/box2d/lunar_lander.py b/gym/envs/box2d/lunar_lander.py
index 9fa7fb7d0..644f57398 100644
--- a/gym/envs/box2d/lunar_lander.py
+++ b/gym/envs/box2d/lunar_lander.py
@@ -83,18 +83,16 @@ class LunarLander(gym.Env, EzPickle):
     <!-- To play yourself, run: -->
     <!-- python examples/agents/keyboard_agent.py LunarLander-v2 -->
 
-    ![LunarLander Episode Example](./lunar_lander.jpg)
-
-    ### Action Space
+    ## Action Space
     There are four discrete actions available: do nothing, fire left
     orientation engine, fire main engine, fire right orientation engine.
 
-    ### Observation Space
+    ## Observation Space
     There are 8 states: the coordinates of the lander in `x` & `y`, its linear
     velocities in `x` & `y`, its angle, its angular velocity, and two boleans
     showing if each leg is in contact with the ground or not.
 
-    ### Rewards
+    ## Rewards
     Reward for moving from the top of the screen to the landing pad and zero
     speed is about 100..140 points.
     If the lander moves away from the landing pad it loses reward.
@@ -104,11 +102,11 @@ class LunarLander(gym.Env, EzPickle):
     Firing the main engine is -0.3 points each frame. Firing the side engine
     is -0.03 points each frame. Solved is 200 points.
 
-    ### Starting State
+    ## Starting State
     The lander starts at the top center of the viewport with a random initial
     force applied to its center of mass.
 
-    ### Episode Termination
+    ## Episode Termination
     The episode finishes if:
     1) the lander crashes (the lander body gets in contact with the moon);
     2) the lander gets outside of the viewport (`x` coordinate is greater than 1);
@@ -121,7 +119,7 @@ class LunarLander(gym.Env, EzPickle):
     > wakes up. Bodies will also wake up if a joint or contact attached to
     > them is destroyed.
 
-    ### Arguments
+    ## Arguments
     To use to the _continuous_ environment, you need to specify the
     `continuous"=True` argument like below:
     ```python
@@ -135,7 +133,7 @@ class LunarLander(gym.Env, EzPickle):
 
     <!-- ### References -->
 
-    ### Credits
+    ## Credits
     Created by Oleg Klimov
     """
 
diff --git a/gym/envs/classic_control/acrobot.py b/gym/envs/classic_control/acrobot.py
index 31db2a92a..21da054b4 100644
--- a/gym/envs/classic_control/acrobot.py
+++ b/gym/envs/classic_control/acrobot.py
@@ -24,19 +24,16 @@ __author__ = "Christoph Dann <cdann@cdann.de>"
 
 class AcrobotEnv(core.Env):
     """
-    ### Description
+    ## Description
     The Acrobot system includes two joints and two links, where the joint between the two links is actuated. Initially, the
     links are hanging downwards, and the goal is to swing the end of the lower link up to a given height by applying changes
     to torque on the actuated joint (middle).
 
-
-    ![Acrobot Episode Example](./acrobot.png)
-
-    **Image**: two blue pendulum links connected by two green joints. The joint in between the two pendulum links is acted
+    **Gif**: two blue pendulum links connected by two green joints. The joint in between the two pendulum links is acted
     upon by the agent via changes in torque. The goal is to swing the end of the outer-link to reach the target height
     (black horizontal line above system).
 
-    ### Action Space
+    ## Action Space
 
     The action is either applying +1, 0 or -1 torque on the joint between the two pendulum links.
 
@@ -46,7 +43,7 @@ class AcrobotEnv(core.Env):
     | 1   | apply 0 torque to the joint |
     | 2   | apply 1 torque to the joint |
 
-    ### Observation Space
+    ## Observation Space
 
     The observation space gives information about the two rotational joint angles `theta1` and `theta2`, as well as their
     angular velocities:
@@ -70,24 +67,24 @@ class AcrobotEnv(core.Env):
     or `[cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2]`. As an example, a state of
     `[1, 0, 1, 0, ..., ...]` indicates that both links are pointing downwards.
 
-    ### Rewards
+    ## Rewards
 
     All steps that do not reach the goal (termination criteria) incur a reward of -1. Achieving the target height and
     terminating incurs a reward of 0. The reward threshold is -100.
 
-    ### Starting State
+    ## Starting State
 
     At start, each parameter in the underlying state (`theta1`, `theta2`, and the two angular velocities) is initialized
     uniformly at random between -0.1 and 0.1. This means both links are pointing roughly downwards.
 
-    ### Episode Termination
+    ## Episode Termination
     The episode terminates of one of the following occurs:
 
     1. The target height is achieved. As constructed, this occurs when
     `-cos(theta1) - cos(theta2 + theta1) > 1.0`
     2. Episode length is greater than 500 (200 for v0)
 
-    ### Arguments
+    ## Arguments
 
     There are no arguments supported in constructing the environment. As an example:
 
@@ -118,14 +115,14 @@ class AcrobotEnv(core.Env):
     ```
 
 
-    ### Version History
+    ## Version History
 
     - v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of
     `theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the
     sin and cosin of each angle instead.
     - v0: Initial versions release (1.0.0) (removed from gym for v1)
 
-    ### References
+    ## References
     - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding. In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8). MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf
     - Sutton, R. S., Barto, A. G. (2018 ). Reinforcement Learning: An Introduction. The MIT Press.
     """
diff --git a/gym/envs/classic_control/continuous_mountain_car.py b/gym/envs/classic_control/continuous_mountain_car.py
index c37230528..21e8c607e 100644
--- a/gym/envs/classic_control/continuous_mountain_car.py
+++ b/gym/envs/classic_control/continuous_mountain_car.py
@@ -38,26 +38,34 @@ class Continuous_MountainCarEnv(gym.Env):
     }
     ```
 
-    Observation space is a 2-dim vector, where the 1st element represents the "car position" and the 2nd element represents the "car velocity".
+    ## Observation Space
 
-    Action: The actual driving force is calculated by multiplying the power coef by power (0.0015)
+    The observation space is a 2-dim vector, where the 1st element represents the "car position" and the 2nd element represents the "car velocity".
 
-    Reward: Reward of 100 is awarded if the agent reached the flag (position = 0.45)
+    ## Action
+
+    The actual driving force is calculated by multiplying the power coef by power (0.0015)
+
+    ## Reward
+
+    Reward of 100 is awarded if the agent reached the flag (position = 0.45)
     on top of the mountain. Reward is decrease based on amount of energy consumed each step.
 
-    Starting State: The position of the car is assigned a uniform random value in [-0.6 , -0.4]. The starting velocity of the car is always assigned to 0.
+    ## Starting State
 
-    Episode Termination: The car position is more than 0.45. Episode length is greater than 200
+    The position of the car is assigned a uniform random value in [-0.6 , -0.4]. The starting velocity of the car is always assigned to 0.
 
+    ## Episode Termination
 
+    The car position is more than 0.45. Episode length is greater than 200
 
-    ### Arguments
+    ## Arguments
 
     ```
     gym.make('MountainCarContinuous-v0')
     ```
 
-    ### Version History
+    ## Version History
 
     * v0: Initial versions release (1.0.0)
     """
diff --git a/gym/envs/classic_control/pendulum.py b/gym/envs/classic_control/pendulum.py
index d031e6f70..44a9750a7 100644
--- a/gym/envs/classic_control/pendulum.py
+++ b/gym/envs/classic_control/pendulum.py
@@ -11,7 +11,7 @@ from os import path
 
 class PendulumEnv(gym.Env):
     """
-    ### Description
+    ## Description
 
     The inverted pendulum swingup problem is a classic problem in the control literature. In this
     version of the problem, the pendulum starts in a random position, and the goal is to swing it up so
@@ -26,7 +26,7 @@ class PendulumEnv(gym.Env):
     - `theta`: angle in radians.
     - `tau`: torque in `N * m`. Defined as positive _counter-clockwise_.
 
-    ### Action Space
+    ## Action Space
     The action is the torque applied to the pendulum.
 
     | Num | Action | Min  | Max |
@@ -34,7 +34,7 @@ class PendulumEnv(gym.Env):
     | 0   | Torque | -2.0 | 2.0 |
 
 
-    ### Observation Space
+    ## Observation Space
     The observations correspond to the x-y coordinate of the pendulum's end, and its angular velocity.
 
     | Num | Observation      | Min  | Max |
@@ -43,7 +43,7 @@ class PendulumEnv(gym.Env):
     | 1   | y = sin(angle)   | -1.0 | 1.0 |
     | 2   | Angular Velocity | -8.0 | 8.0 |
 
-    ### Rewards
+    ## Rewards
     The reward is defined as:
     ```
     r = -(theta^2 + 0.1*theta_dt^2 + 0.001*torque^2)
@@ -53,13 +53,13 @@ class PendulumEnv(gym.Env):
     0.001*2^2) = -16.2736044`, while the maximum reward is zero (pendulum is
     upright with zero velocity and no torque being applied).
 
-    ### Starting State
+    ## Starting State
     The starting state is a random angle in `[-pi, pi]` and a random angular velocity in `[-1,1]`.
 
-    ### Episode Termination
+    ## Episode Termination
     An episode terminates after 200 steps. There's no other criteria for termination.
 
-    ### Arguments
+    ## Arguments
     - `g`: acceleration of gravity measured in `(m/s^2)` used to calculate the pendulum dynamics. The default is
     `g=10.0`.
 
@@ -67,7 +67,7 @@ class PendulumEnv(gym.Env):
     gym.make('CartPole-v1', g=9.81)
     ```
 
-    ### Version History
+    ## Version History
 
     * v1: Simplify the math equations, no difference in behavior.
     * v0: Initial versions release (1.0.0)