Files
Gymnasium/gym/envs/__init__.py

401 lines
9.6 KiB
Python
Raw Normal View History

2016-04-27 08:00:58 -07:00
from gym.envs.registration import registry, register, make, spec
# Algorithmic
# ----------------------------------------
register(
id='Copy-v0',
entry_point='gym.envs.algorithmic:CopyEnv',
timestep_limit=200,
reward_threshold=25.0,
)
register(
id='RepeatCopy-v0',
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
timestep_limit=200,
reward_threshold=75.0,
)
register(
id='ReversedAddition-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 2},
timestep_limit=200,
reward_threshold=25.0,
)
register(
id='ReversedAddition3-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 3},
timestep_limit=200,
reward_threshold=25.0,
)
register(
id='DuplicatedInput-v0',
entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
timestep_limit=200,
reward_threshold=9.0,
)
register(
id='Reverse-v0',
entry_point='gym.envs.algorithmic:ReverseEnv',
timestep_limit=200,
reward_threshold=25.0,
)
# Classic
# ----------------------------------------
register(
id='CartPole-v0',
entry_point='gym.envs.classic_control:CartPoleEnv',
timestep_limit=200,
2016-05-10 09:18:13 -07:00
reward_threshold=195.0,
2016-04-27 08:00:58 -07:00
)
register(
id='CartPole-v1',
entry_point='gym.envs.classic_control:CartPoleEnv',
timestep_limit=500,
reward_threshold=475.0,
)
2016-04-27 08:00:58 -07:00
register(
id='MountainCar-v0',
entry_point='gym.envs.classic_control:MountainCarEnv',
timestep_limit=200,
2016-05-10 09:18:13 -07:00
reward_threshold=-110.0,
2016-04-27 08:00:58 -07:00
)
register(
id='Pendulum-v0',
entry_point='gym.envs.classic_control:PendulumEnv',
timestep_limit=200,
)
register(
id='Acrobot-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.classic_control:AcrobotEnv',
timestep_limit=500,
2016-04-27 08:00:58 -07:00
)
2016-05-03 22:27:42 +03:00
# Box2d
# ----------------------------------------
register(
id='LunarLander-v2',
2016-05-03 22:27:42 +03:00
entry_point='gym.envs.box2d:LunarLander',
timestep_limit=1000,
reward_threshold=200,
2016-05-03 22:27:42 +03:00
)
register(
id='BipedalWalker-v2',
2016-05-03 22:27:42 +03:00
entry_point='gym.envs.box2d:BipedalWalker',
timestep_limit=1600,
reward_threshold=300,
2016-05-03 22:27:42 +03:00
)
register(
id='BipedalWalkerHardcore-v2',
2016-05-03 22:27:42 +03:00
entry_point='gym.envs.box2d:BipedalWalkerHardcore',
timestep_limit=2000,
reward_threshold=300,
2016-05-03 22:27:42 +03:00
)
register(
id='CarRacing-v0',
entry_point='gym.envs.box2d:CarRacing',
timestep_limit=1000,
reward_threshold=900,
)
2016-04-27 08:00:58 -07:00
# Toy Text
# ----------------------------------------
register(
id='Blackjack-v0',
entry_point='gym.envs.toy_text:BlackjackEnv',
)
2016-04-27 08:00:58 -07:00
register(
id='FrozenLake-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name' : '4x4'},
timestep_limit=100,
2016-05-24 17:58:17 -07:00
reward_threshold=0.78, # optimum = .8196
2016-04-27 08:00:58 -07:00
)
register(
id='FrozenLake8x8-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name' : '8x8'},
timestep_limit=200,
2016-05-24 17:58:17 -07:00
reward_threshold=0.99, # optimum = 1
2016-04-27 08:00:58 -07:00
)
register(
id='NChain-v0',
entry_point='gym.envs.toy_text:NChainEnv',
timestep_limit=1000,
)
2016-04-27 08:00:58 -07:00
register(
id='Roulette-v0',
entry_point='gym.envs.toy_text:RouletteEnv',
timestep_limit=100,
)
register(
id='Taxi-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.toy_text.taxi:TaxiEnv',
timestep_limit=200,
2016-05-24 17:58:17 -07:00
reward_threshold=9.7, # optimum = 10.2
2016-04-27 08:00:58 -07:00
)
register(
id='GuessingGame-v0',
entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
timestep_limit=200,
)
register(
id='HotterColder-v0',
entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
timestep_limit=200,
)
2016-04-27 08:00:58 -07:00
# Mujoco
# ----------------------------------------
# 2D
register(
2016-04-30 22:47:51 -07:00
id='Reacher-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:ReacherEnv',
2016-05-30 19:21:16 -07:00
timestep_limit=50,
2016-05-30 19:20:36 -07:00
reward_threshold=-3.75,
2016-04-27 08:00:58 -07:00
)
register(
2016-04-30 22:47:51 -07:00
id='InvertedPendulum-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
2016-05-30 19:20:36 -07:00
reward_threshold=950.0,
2016-04-27 08:00:58 -07:00
)
register(
2016-04-30 22:47:51 -07:00
id='InvertedDoublePendulum-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
2016-05-30 19:20:36 -07:00
reward_threshold=9100.0,
2016-04-27 08:00:58 -07:00
)
register(
2016-04-30 22:47:51 -07:00
id='HalfCheetah-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:HalfCheetahEnv',
2016-05-10 09:18:13 -07:00
reward_threshold=4800.0,
2016-04-27 08:00:58 -07:00
)
register(
2016-04-30 22:47:51 -07:00
id='Hopper-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:HopperEnv',
2016-05-10 09:18:13 -07:00
reward_threshold=3800.0,
2016-04-27 08:00:58 -07:00
)
register(
2016-04-30 22:47:51 -07:00
id='Swimmer-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:SwimmerEnv',
2016-05-10 09:18:13 -07:00
reward_threshold=360.0,
2016-04-27 08:00:58 -07:00
)
register(
2016-04-30 22:47:51 -07:00
id='Walker2d-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:Walker2dEnv',
)
register(
2016-04-30 22:47:51 -07:00
id='Ant-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:AntEnv',
2016-05-10 09:18:13 -07:00
reward_threshold=6000.0,
2016-04-27 08:00:58 -07:00
)
register(
2016-04-30 22:47:51 -07:00
id='Humanoid-v1',
2016-04-27 08:00:58 -07:00
entry_point='gym.envs.mujoco:HumanoidEnv',
)
2016-05-23 15:01:25 +08:00
register(
2016-05-30 09:47:58 +08:00
id='HumanoidStandup-v1',
2016-05-23 15:01:25 +08:00
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
)
2016-04-27 08:00:58 -07:00
# Atari
# ----------------------------------------
# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
for obs_type in ['image', 'ram']:
# space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
name = ''.join([g.capitalize() for g in game.split('_')])
if obs_type == 'ram':
name = '{}-ram'.format(name)
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
nondeterministic = False
if game == 'elevator_action' and obs_type == 'ram':
# ElevatorAction-ram-v0 seems to yield slightly
# non-deterministic observations about 10% of the time. We
# should track this down eventually, but for now we just
2016-05-29 09:11:49 -07:00
# mark it as nondeterministic.
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
nondeterministic = True
2016-04-27 08:00:58 -07:00
register(
id='{}-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type},
timestep_limit=10000,
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
nondeterministic=nondeterministic,
2016-04-27 08:00:58 -07:00
)
# Board games
# ----------------------------------------
register(
id='Go9x9-v0',
entry_point='gym.envs.board_game:GoEnv',
kwargs={
'player_color': 'black',
'opponent': 'pachi:uct:_2400',
'observation_type': 'image3c',
'illegal_move_mode': 'lose',
'board_size': 9,
},
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
# The pachi player seems not to be determistic given a fixed seed.
# (Reproduce by running 'import gym; h = gym.make('Go9x9-v0'); h.seed(1); h.reset(); h.step(15); h.step(16); h.step(17)' a few times.)
#
# This is probably due to a computation time limit.
2016-05-29 09:11:49 -07:00
nondeterministic=True,
2016-04-27 08:00:58 -07:00
)
register(
id='Go19x19-v0',
entry_point='gym.envs.board_game:GoEnv',
kwargs={
'player_color': 'black',
'opponent': 'pachi:uct:_2400',
'observation_type': 'image3c',
'illegal_move_mode': 'lose',
'board_size': 19,
},
2016-05-29 09:11:49 -07:00
nondeterministic=True,
2016-04-27 08:00:58 -07:00
)
register(
id='Hex9x9-v0',
entry_point='gym.envs.board_game:HexEnv',
kwargs={
'player_color': 'black',
'opponent': 'random',
'observation_type': 'numpy3c',
'illegal_move_mode': 'lose',
'board_size': 9,
},
)
2016-05-17 00:46:03 -07:00
# Debugging
# ----------------------------------------
register(
id='OneRoundDeterministicReward-v0',
entry_point='gym.envs.debugging:OneRoundDeterministicRewardEnv',
local_only=True
)
register(
id='TwoRoundDeterministicReward-v0',
entry_point='gym.envs.debugging:TwoRoundDeterministicRewardEnv',
local_only=True
)
register(
id='OneRoundNondeterministicReward-v0',
entry_point='gym.envs.debugging:OneRoundNondeterministicRewardEnv',
local_only=True
)
register(
id='TwoRoundNondeterministicReward-v0',
entry_point='gym.envs.debugging:TwoRoundNondeterministicRewardEnv',
local_only=True,
)
# Parameter tuning
# ----------------------------------------
register(
id='ConvergenceControl-v0',
entry_point='gym.envs.parameter_tuning:ConvergenceControl',
)
register(
id='CNNClassifierTraining-v0',
entry_point='gym.envs.parameter_tuning:CNNClassifierTraining',
)
# Safety
# ----------------------------------------
# interpretability envs
register(
2016-06-14 15:57:25 -07:00
id='PredictActionsCartpole-v0',
entry_point='gym.envs.safety:PredictActionsCartpoleEnv',
timestep_limit=200,
)
register(
2016-06-14 15:57:25 -07:00
id='PredictObsCartpole-v0',
entry_point='gym.envs.safety:PredictObsCartpoleEnv',
timestep_limit=200,
)
# semi_supervised envs
# probably the easiest:
register(
2016-06-14 15:57:25 -07:00
id='SemisuperPendulumNoise-v0',
entry_point='gym.envs.safety:SemisuperPendulumNoiseEnv',
timestep_limit=200,
)
# somewhat harder because of higher variance:
register(
2016-06-14 15:57:25 -07:00
id='SemisuperPendulumRandom-v0',
entry_point='gym.envs.safety:SemisuperPendulumRandomEnv',
timestep_limit=200,
)
# probably the hardest because you only get a constant number of rewards in total:
register(
2016-06-14 15:57:25 -07:00
id='SemisuperPendulumDecay-v0',
entry_point='gym.envs.safety:SemisuperPendulumDecayEnv',
timestep_limit=200,
)
# off_switch envs
register(
id='OffSwitchCartpole-v0',
entry_point='gym.envs.safety:OffSwitchCartpoleEnv',
timestep_limit=200,
)
register(
id='OffSwitchCartpoleProb-v0',
entry_point='gym.envs.safety:OffSwitchCartpoleProbEnv',
timestep_limit=200,
)