mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 22:11:25 +00:00
Randomize LunarLander wind generation at reset to gain statistical independence between episodes (#959)
This commit is contained in:
@@ -267,7 +267,7 @@ class A2C(nn.Module):
|
||||
# The simplest way to create vector environments is by calling `gym.vector.make`, which creates multiple instances of the same environment:
|
||||
#
|
||||
|
||||
envs = gym.vector.make("LunarLander-v2", num_envs=3, max_episode_steps=600)
|
||||
envs = gym.vector.make("LunarLander-v3", num_envs=3, max_episode_steps=600)
|
||||
|
||||
|
||||
# %%
|
||||
@@ -277,13 +277,13 @@ envs = gym.vector.make("LunarLander-v2", num_envs=3, max_episode_steps=600)
|
||||
# If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
|
||||
# and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
|
||||
#
|
||||
# Manually setting up 3 parallel 'LunarLander-v2' envs with different parameters:
|
||||
# Manually setting up 3 parallel 'LunarLander-v3' envs with different parameters:
|
||||
|
||||
|
||||
envs = gym.vector.AsyncVectorEnv(
|
||||
[
|
||||
lambda: gym.make(
|
||||
"LunarLander-v2",
|
||||
"LunarLander-v3",
|
||||
gravity=-10.0,
|
||||
enable_wind=True,
|
||||
wind_power=15.0,
|
||||
@@ -291,7 +291,7 @@ envs = gym.vector.AsyncVectorEnv(
|
||||
max_episode_steps=600,
|
||||
),
|
||||
lambda: gym.make(
|
||||
"LunarLander-v2",
|
||||
"LunarLander-v3",
|
||||
gravity=-9.8,
|
||||
enable_wind=True,
|
||||
wind_power=10.0,
|
||||
@@ -299,7 +299,7 @@ envs = gym.vector.AsyncVectorEnv(
|
||||
max_episode_steps=600,
|
||||
),
|
||||
lambda: gym.make(
|
||||
"LunarLander-v2", gravity=-7.0, enable_wind=False, max_episode_steps=600
|
||||
"LunarLander-v3", gravity=-7.0, enable_wind=False, max_episode_steps=600
|
||||
),
|
||||
]
|
||||
)
|
||||
@@ -309,14 +309,14 @@ envs = gym.vector.AsyncVectorEnv(
|
||||
#
|
||||
# ------------------------------
|
||||
#
|
||||
# Randomly generating the parameters for 3 parallel 'LunarLander-v2' envs, using `np.clip` to stay in the recommended parameter space:
|
||||
# Randomly generating the parameters for 3 parallel 'LunarLander-v3' envs, using `np.clip` to stay in the recommended parameter space:
|
||||
#
|
||||
|
||||
|
||||
envs = gym.vector.AsyncVectorEnv(
|
||||
[
|
||||
lambda: gym.make(
|
||||
"LunarLander-v2",
|
||||
"LunarLander-v3",
|
||||
gravity=np.clip(
|
||||
np.random.normal(loc=-10.0, scale=1.0), a_min=-11.99, a_max=-0.01
|
||||
),
|
||||
@@ -374,7 +374,7 @@ if randomize_domain:
|
||||
envs = gym.vector.AsyncVectorEnv(
|
||||
[
|
||||
lambda: gym.make(
|
||||
"LunarLander-v2",
|
||||
"LunarLander-v3",
|
||||
gravity=np.clip(
|
||||
np.random.normal(loc=-10.0, scale=1.0), a_min=-11.99, a_max=-0.01
|
||||
),
|
||||
@@ -392,7 +392,7 @@ if randomize_domain:
|
||||
)
|
||||
|
||||
else:
|
||||
envs = gym.vector.make("LunarLander-v2", num_envs=n_envs, max_episode_steps=600)
|
||||
envs = gym.vector.make("LunarLander-v3", num_envs=n_envs, max_episode_steps=600)
|
||||
|
||||
|
||||
obs_shape = envs.single_observation_space.shape[0]
|
||||
@@ -499,7 +499,7 @@ for sample_phase in tqdm(range(n_updates)):
|
||||
rolling_length = 20
|
||||
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 5))
|
||||
fig.suptitle(
|
||||
f"Training plots for {agent.__class__.__name__} in the LunarLander-v2 environment \n \
|
||||
f"Training plots for {agent.__class__.__name__} in the LunarLander-v3 environment \n \
|
||||
(n_envs={n_envs}, n_steps_per_update={n_steps_per_update}, randomize_domain={randomize_domain})"
|
||||
)
|
||||
|
||||
@@ -606,7 +606,7 @@ plt.show()
|
||||
# because the gradients of the environments are good enough after a relatively low number of environments
|
||||
# (especially if the environment is not very complex). In this case, increasing the number of environments
|
||||
# does not increase the learning speed, and actually increases the runtime, possibly due to the additional time
|
||||
# needed to calculate the gradients. For LunarLander-v2, the best performing configuration used a AsyncVectorEnv
|
||||
# needed to calculate the gradients. For LunarLander-v3, the best performing configuration used a AsyncVectorEnv
|
||||
# with 10 parallel environments, but environments with a higher complexity may require more
|
||||
# parallel environments to achieve optimal performance.
|
||||
#
|
||||
@@ -662,7 +662,7 @@ for episode in range(n_showcase_episodes):
|
||||
# create a new sample environment to get new random parameters
|
||||
if randomize_domain:
|
||||
env = gym.make(
|
||||
"LunarLander-v2",
|
||||
"LunarLander-v3",
|
||||
render_mode="human",
|
||||
gravity=np.clip(
|
||||
np.random.normal(loc=-10.0, scale=2.0), a_min=-11.99, a_max=-0.01
|
||||
@@ -677,7 +677,7 @@ for episode in range(n_showcase_episodes):
|
||||
max_episode_steps=500,
|
||||
)
|
||||
else:
|
||||
env = gym.make("LunarLander-v2", render_mode="human", max_episode_steps=500)
|
||||
env = gym.make("LunarLander-v3", render_mode="human", max_episode_steps=500)
|
||||
|
||||
# get an initial state
|
||||
state, info = env.reset()
|
||||
@@ -705,7 +705,7 @@ env.close()
|
||||
|
||||
# from gymnasium.utils.play import play
|
||||
#
|
||||
# play(gym.make('LunarLander-v2', render_mode='rgb_array'),
|
||||
# play(gym.make('LunarLander-v3', render_mode='rgb_array'),
|
||||
# keys_to_action={'w': 2, 'a': 1, 'd': 3}, noop=0)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user