2016-04-27 08:00:58 -07:00
|
|
|
from gym.envs.registration import registry, register, make, spec
|
|
|
|
|
|
|
|
# Algorithmic
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Copy-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:CopyEnv',
|
|
|
|
timestep_limit=200,
|
|
|
|
reward_threshold=25.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='RepeatCopy-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
|
|
|
|
timestep_limit=200,
|
|
|
|
reward_threshold=75.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='ReversedAddition-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
|
|
|
|
kwargs={'rows' : 2},
|
|
|
|
timestep_limit=200,
|
|
|
|
reward_threshold=25.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='ReversedAddition3-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
|
|
|
|
kwargs={'rows' : 3},
|
|
|
|
timestep_limit=200,
|
|
|
|
reward_threshold=25.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DuplicatedInput-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
|
|
|
|
timestep_limit=200,
|
|
|
|
reward_threshold=9.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Reverse-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:ReverseEnv',
|
|
|
|
timestep_limit=200,
|
|
|
|
reward_threshold=25.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Classic
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='CartPole-v0',
|
|
|
|
entry_point='gym.envs.classic_control:CartPoleEnv',
|
|
|
|
timestep_limit=200,
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=195.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='MountainCar-v0',
|
|
|
|
entry_point='gym.envs.classic_control:MountainCarEnv',
|
|
|
|
timestep_limit=200,
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=-110.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Pendulum-v0',
|
|
|
|
entry_point='gym.envs.classic_control:PendulumEnv',
|
|
|
|
timestep_limit=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Acrobot-v0',
|
|
|
|
entry_point='gym.envs.classic_control:AcrobotEnv',
|
|
|
|
timestep_limit=200,
|
2016-04-27 09:17:05 -07:00
|
|
|
reward_threshold=-100
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
# Box2d
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
2016-05-25 23:19:15 +03:00
|
|
|
id='LunarLander-v2',
|
2016-05-03 22:27:42 +03:00
|
|
|
entry_point='gym.envs.box2d:LunarLander',
|
2016-05-16 17:12:44 +03:00
|
|
|
timestep_limit=1000,
|
|
|
|
reward_threshold=200,
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-05-25 23:19:15 +03:00
|
|
|
id='BipedalWalker-v2',
|
2016-05-03 22:27:42 +03:00
|
|
|
entry_point='gym.envs.box2d:BipedalWalker',
|
2016-05-16 17:12:44 +03:00
|
|
|
timestep_limit=1600,
|
|
|
|
reward_threshold=300,
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-05-25 23:19:15 +03:00
|
|
|
id='BipedalWalkerHardcore-v2',
|
2016-05-03 22:27:42 +03:00
|
|
|
entry_point='gym.envs.box2d:BipedalWalkerHardcore',
|
2016-05-16 17:12:44 +03:00
|
|
|
timestep_limit=2000,
|
|
|
|
reward_threshold=300,
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
|
|
|
|
2016-05-26 21:39:57 +03:00
|
|
|
register(
|
|
|
|
id='CarRacing-v0',
|
|
|
|
entry_point='gym.envs.box2d:CarRacing',
|
|
|
|
timestep_limit=1000,
|
|
|
|
reward_threshold=900,
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
# Toy Text
|
|
|
|
# ----------------------------------------
|
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
register(
|
|
|
|
id='Blackjack-v0',
|
|
|
|
entry_point='gym.envs.toy_text:BlackjackEnv',
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
|
|
|
id='FrozenLake-v0',
|
|
|
|
entry_point='gym.envs.toy_text:FrozenLakeEnv',
|
|
|
|
kwargs={'map_name' : '4x4'},
|
|
|
|
timestep_limit=100,
|
2016-05-24 17:58:17 -07:00
|
|
|
reward_threshold=0.78, # optimum = .8196
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='FrozenLake8x8-v0',
|
|
|
|
entry_point='gym.envs.toy_text:FrozenLakeEnv',
|
|
|
|
kwargs={'map_name' : '8x8'},
|
|
|
|
timestep_limit=200,
|
2016-05-24 17:58:17 -07:00
|
|
|
reward_threshold=0.99, # optimum = 1
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-05-09 22:04:34 -07:00
|
|
|
register(
|
|
|
|
id='NChain-v0',
|
|
|
|
entry_point='gym.envs.toy_text:NChainEnv',
|
|
|
|
timestep_limit=1000,
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
|
|
|
id='Roulette-v0',
|
|
|
|
entry_point='gym.envs.toy_text:RouletteEnv',
|
|
|
|
timestep_limit=100,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-05-07 20:52:51 -04:00
|
|
|
id='Taxi-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.toy_text.taxi:TaxiEnv',
|
|
|
|
timestep_limit=200,
|
2016-05-24 17:58:17 -07:00
|
|
|
reward_threshold=9.7, # optimum = 10.2
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-06-29 18:58:25 +10:00
|
|
|
register(
|
|
|
|
id='GuessingGame-v0',
|
|
|
|
entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
|
|
|
|
timestep_limit=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='HotterColder-v0',
|
|
|
|
entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
|
|
|
|
timestep_limit=200,
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
# Mujoco
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
# 2D
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Reacher-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:ReacherEnv',
|
2016-05-30 19:21:16 -07:00
|
|
|
timestep_limit=50,
|
2016-05-30 19:20:36 -07:00
|
|
|
reward_threshold=-3.75,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='InvertedPendulum-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
|
2016-05-30 19:20:36 -07:00
|
|
|
reward_threshold=950.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='InvertedDoublePendulum-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
|
2016-05-30 19:20:36 -07:00
|
|
|
reward_threshold=9100.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='HalfCheetah-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:HalfCheetahEnv',
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=4800.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Hopper-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:HopperEnv',
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=3800.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Swimmer-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:SwimmerEnv',
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=360.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Walker2d-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:Walker2dEnv',
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Ant-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:AntEnv',
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=6000.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Humanoid-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:HumanoidEnv',
|
|
|
|
)
|
2016-05-23 15:01:25 +08:00
|
|
|
register(
|
2016-05-30 09:47:58 +08:00
|
|
|
id='HumanoidStandup-v1',
|
2016-05-23 15:01:25 +08:00
|
|
|
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
|
|
|
|
)
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
# Atari
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
|
|
|
|
for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
|
|
|
|
'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
|
|
|
|
'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
|
|
|
|
'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
|
|
|
|
'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
|
|
|
|
'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
|
|
|
|
'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
|
|
|
|
'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
|
|
|
|
'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
|
|
|
|
for obs_type in ['image', 'ram']:
|
|
|
|
# space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
|
|
|
|
name = ''.join([g.capitalize() for g in game.split('_')])
|
|
|
|
if obs_type == 'ram':
|
|
|
|
name = '{}-ram'.format(name)
|
2016-05-29 09:07:09 -07:00
|
|
|
|
|
|
|
nondeterministic = False
|
|
|
|
if game == 'elevator_action' and obs_type == 'ram':
|
|
|
|
# ElevatorAction-ram-v0 seems to yield slightly
|
|
|
|
# non-deterministic observations about 10% of the time. We
|
|
|
|
# should track this down eventually, but for now we just
|
2016-05-29 09:11:49 -07:00
|
|
|
# mark it as nondeterministic.
|
2016-05-29 09:07:09 -07:00
|
|
|
nondeterministic = True
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
|
|
|
id='{}-v0'.format(name),
|
|
|
|
entry_point='gym.envs.atari:AtariEnv',
|
|
|
|
kwargs={'game': game, 'obs_type': obs_type},
|
|
|
|
timestep_limit=10000,
|
2016-05-29 09:07:09 -07:00
|
|
|
nondeterministic=nondeterministic,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
# Board games
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Go9x9-v0',
|
|
|
|
entry_point='gym.envs.board_game:GoEnv',
|
|
|
|
kwargs={
|
|
|
|
'player_color': 'black',
|
|
|
|
'opponent': 'pachi:uct:_2400',
|
|
|
|
'observation_type': 'image3c',
|
|
|
|
'illegal_move_mode': 'lose',
|
|
|
|
'board_size': 9,
|
|
|
|
},
|
2016-05-29 09:07:09 -07:00
|
|
|
# The pachi player seems not to be determistic given a fixed seed.
|
|
|
|
# (Reproduce by running 'import gym; h = gym.make('Go9x9-v0'); h.seed(1); h.reset(); h.step(15); h.step(16); h.step(17)' a few times.)
|
|
|
|
#
|
|
|
|
# This is probably due to a computation time limit.
|
2016-05-29 09:11:49 -07:00
|
|
|
nondeterministic=True,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Go19x19-v0',
|
|
|
|
entry_point='gym.envs.board_game:GoEnv',
|
|
|
|
kwargs={
|
|
|
|
'player_color': 'black',
|
|
|
|
'opponent': 'pachi:uct:_2400',
|
|
|
|
'observation_type': 'image3c',
|
|
|
|
'illegal_move_mode': 'lose',
|
|
|
|
'board_size': 19,
|
|
|
|
},
|
2016-05-29 09:11:49 -07:00
|
|
|
nondeterministic=True,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
2016-05-03 20:10:00 +01:00
|
|
|
|
|
|
|
register(
|
|
|
|
id='Hex9x9-v0',
|
|
|
|
entry_point='gym.envs.board_game:HexEnv',
|
|
|
|
kwargs={
|
|
|
|
'player_color': 'black',
|
|
|
|
'opponent': 'random',
|
|
|
|
'observation_type': 'numpy3c',
|
|
|
|
'illegal_move_mode': 'lose',
|
|
|
|
'board_size': 9,
|
|
|
|
},
|
|
|
|
)
|
2016-05-17 00:46:03 -07:00
|
|
|
|
|
|
|
# Doom
|
|
|
|
# ----------------------------------------
|
|
|
|
|
2016-06-14 19:21:58 -04:00
|
|
|
register(
|
|
|
|
id='meta-Doom-v0',
|
|
|
|
entry_point='gym.envs.doom:MetaDoomEnv',
|
|
|
|
timestep_limit=999999,
|
|
|
|
reward_threshold=9000.0,
|
|
|
|
kwargs={
|
|
|
|
'average_over': 3,
|
|
|
|
'passing_grade': 600,
|
|
|
|
'min_tries_for_avg': 3
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
2016-05-17 00:46:03 -07:00
|
|
|
register(
|
|
|
|
id='DoomBasic-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomBasicEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=10.0,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DoomCorridor-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomCorridorEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=1000.0,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DoomDefendCenter-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomDefendCenterEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=10.0,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DoomDefendLine-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomDefendLineEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=15.0,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DoomHealthGathering-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomHealthGatheringEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=1000.0,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DoomMyWayHome-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomMyWayHomeEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=0.5,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DoomPredictPosition-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomPredictPositionEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=0.5,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DoomTakeCover-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomTakeCoverEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=750.0,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DoomDeathmatch-v0',
|
|
|
|
entry_point='gym.envs.doom:DoomDeathmatchEnv',
|
2016-06-14 18:57:47 -04:00
|
|
|
timestep_limit=10000,
|
|
|
|
reward_threshold=20.0,
|
2016-05-17 00:46:03 -07:00
|
|
|
)
|
2016-05-31 00:25:50 -07:00
|
|
|
|
|
|
|
# Debugging
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='OneRoundDeterministicReward-v0',
|
|
|
|
entry_point='gym.envs.debugging:OneRoundDeterministicRewardEnv',
|
|
|
|
local_only=True
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='TwoRoundDeterministicReward-v0',
|
|
|
|
entry_point='gym.envs.debugging:TwoRoundDeterministicRewardEnv',
|
|
|
|
local_only=True
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='OneRoundNondeterministicReward-v0',
|
|
|
|
entry_point='gym.envs.debugging:OneRoundNondeterministicRewardEnv',
|
|
|
|
local_only=True
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='TwoRoundNondeterministicReward-v0',
|
|
|
|
entry_point='gym.envs.debugging:TwoRoundNondeterministicRewardEnv',
|
2016-05-31 00:31:50 -07:00
|
|
|
local_only=True,
|
|
|
|
)
|
2016-05-16 21:10:27 +02:00
|
|
|
|
2016-05-31 20:34:40 +02:00
|
|
|
# Parameter tuning
|
2016-05-16 21:10:27 +02:00
|
|
|
# ----------------------------------------
|
|
|
|
register(
|
|
|
|
id='ConvergenceControl-v0',
|
|
|
|
entry_point='gym.envs.parameter_tuning:ConvergenceControl',
|
2016-05-31 00:25:50 -07:00
|
|
|
)
|
2016-05-31 20:34:40 +02:00
|
|
|
|
|
|
|
register(
|
|
|
|
id='CNNClassifierTraining-v0',
|
|
|
|
entry_point='gym.envs.parameter_tuning:CNNClassifierTraining',
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
# Safety
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
# interpretability envs
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='PredictActionsCartpole-v0',
|
|
|
|
entry_point='gym.envs.safety:PredictActionsCartpoleEnv',
|
2016-06-27 18:56:13 -05:00
|
|
|
timestep_limit=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='PredictObsCartpole-v0',
|
|
|
|
entry_point='gym.envs.safety:PredictObsCartpoleEnv',
|
2016-06-27 18:56:13 -05:00
|
|
|
timestep_limit=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
# semi_supervised envs
|
|
|
|
# probably the easiest:
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='SemisuperPendulumNoise-v0',
|
|
|
|
entry_point='gym.envs.safety:SemisuperPendulumNoiseEnv',
|
2016-06-27 18:56:13 -05:00
|
|
|
timestep_limit=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
# somewhat harder because of higher variance:
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='SemisuperPendulumRandom-v0',
|
|
|
|
entry_point='gym.envs.safety:SemisuperPendulumRandomEnv',
|
2016-06-27 18:56:13 -05:00
|
|
|
timestep_limit=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
# probably the hardest because you only get a constant number of rewards in total:
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='SemisuperPendulumDecay-v0',
|
|
|
|
entry_point='gym.envs.safety:SemisuperPendulumDecayEnv',
|
2016-06-27 18:56:13 -05:00
|
|
|
timestep_limit=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
# off_switch envs
|
|
|
|
register(
|
|
|
|
id='OffSwitchCartpole-v0',
|
|
|
|
entry_point='gym.envs.safety:OffSwitchCartpoleEnv',
|
2016-06-17 18:38:36 -07:00
|
|
|
timestep_limit=200,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='OffSwitchCartpoleProb-v0',
|
|
|
|
entry_point='gym.envs.safety:OffSwitchCartpoleProbEnv',
|
|
|
|
timestep_limit=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|