mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-24 07:22:43 +00:00
38 lines
899 B
Python
38 lines
899 B
Python
![]() |
"""
|
||
|
Simple environment with known optimal policy and value function.
|
||
|
|
||
|
This environment has just two actions.
|
||
|
Action 0 yields 0 reward and then terminates the session.
|
||
|
Action 1 yields 1 reward and then terminates the session.
|
||
|
|
||
|
Optimal policy: action 1.
|
||
|
|
||
|
Optimal value function: v(0)=1 (there is only one state, state 0)
|
||
|
"""
|
||
|
|
||
|
import gym
|
||
|
import random
|
||
|
from gym import spaces
|
||
|
|
||
|
class OneRoundDeterministicRewardEnv(gym.Env):
|
||
|
def __init__(self):
|
||
|
self.action_space = spaces.Discrete(2)
|
||
|
self.observation_space = spaces.Discrete(1)
|
||
|
self._reset()
|
||
|
|
||
|
def _step(self, action):
|
||
|
assert(self.action_space.contains(action))
|
||
|
if action:
|
||
|
reward = 1
|
||
|
else:
|
||
|
reward = 0
|
||
|
|
||
|
done = True
|
||
|
return self._get_obs(), reward, done, {}
|
||
|
|
||
|
def _get_obs(self):
|
||
|
return 0
|
||
|
|
||
|
def _reset(self):
|
||
|
return self._get_obs()
|