mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-02 06:16:32 +00:00
62 lines
1.5 KiB
Python
62 lines
1.5 KiB
Python
import numpy as np
|
|
|
|
from gym import Env, spaces
|
|
from gym.utils import seeding
|
|
|
|
|
|
def categorical_sample(prob_n, np_random):
|
|
"""
|
|
Sample from categorical distribution
|
|
Each row specifies class probabilities
|
|
"""
|
|
prob_n = np.asarray(prob_n)
|
|
csprob_n = np.cumsum(prob_n)
|
|
return (csprob_n > np_random.rand()).argmax()
|
|
|
|
|
|
class DiscreteEnv(Env):
|
|
|
|
"""
|
|
Has the following members
|
|
- nS: number of states
|
|
- nA: number of actions
|
|
- P: transitions (*)
|
|
- isd: initial state distribution (**)
|
|
|
|
(*) dictionary of lists, where
|
|
P[s][a] == [(probability, nextstate, reward, done), ...]
|
|
(**) list or array of length nS
|
|
|
|
|
|
"""
|
|
|
|
def __init__(self, nS, nA, P, isd):
|
|
self.P = P
|
|
self.isd = isd
|
|
self.lastaction = None # for rendering
|
|
self.nS = nS
|
|
self.nA = nA
|
|
|
|
self.action_space = spaces.Discrete(self.nA)
|
|
self.observation_space = spaces.Discrete(self.nS)
|
|
|
|
self.seed()
|
|
self.s = categorical_sample(self.isd, self.np_random)
|
|
|
|
def seed(self, seed=None):
|
|
self.np_random, seed = seeding.np_random(seed)
|
|
return [seed]
|
|
|
|
def reset(self):
|
|
self.s = categorical_sample(self.isd, self.np_random)
|
|
self.lastaction = None
|
|
return int(self.s)
|
|
|
|
def step(self, a):
|
|
transitions = self.P[self.s][a]
|
|
i = categorical_sample([t[0] for t in transitions], self.np_random)
|
|
p, s, r, d = transitions[i]
|
|
self.s = s
|
|
self.lastaction = a
|
|
return (int(s), r, d, {"prob": p})
|