mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-02 14:26:33 +00:00
128 lines
3.9 KiB
Python
128 lines
3.9 KiB
Python
import numpy as np
|
|
import StringIO, sys
|
|
|
|
from gym import utils
|
|
from gym.envs.toy_text import discrete
|
|
|
|
UP = 0
|
|
RIGHT = 1
|
|
DOWN = 2
|
|
LEFT = 3
|
|
|
|
MAPS = {
|
|
"4x4": [
|
|
"SFFF",
|
|
"FHFH",
|
|
"FFFH",
|
|
"HFFG"
|
|
],
|
|
"8x8": [
|
|
"SFFFFFFF",
|
|
"FFFFFFFF",
|
|
"FFFHFFFF",
|
|
"FFFFFHFF",
|
|
"FFFHFFFF",
|
|
"FHHFFFHF",
|
|
"FHFFHFHF",
|
|
"FFFHFFFG"
|
|
],
|
|
}
|
|
|
|
class FrozenLakeEnv(discrete.DiscreteEnv):
|
|
"""
|
|
Winter is here. You and your friends were tossing around a frisbee at the park
|
|
when you made a wild throw that left the frisbee out in the middle of the lake.
|
|
The water is mostly frozen, but there are a few holes where the ice has melted.
|
|
If you step into one of those holes, you'll fall into the freezing water.
|
|
At this time, there's an international frisbee shortage, so it's absolutely imperative that
|
|
you navigate across the lake and retrieve the disc.
|
|
However, the ice is slippery, so you won't always move in the direction you intend.
|
|
The surface is described using a grid like the following
|
|
|
|
SFFF
|
|
FHFH
|
|
FFFH
|
|
HFFG
|
|
|
|
S : starting point, safe
|
|
F : frozen surface, safe
|
|
H : hole, fall to your doom
|
|
G : goal, where the frisbee is located
|
|
|
|
The episode ends when you reach the goal or fall in a hole.
|
|
You receive a reward of 1 if you reach the goal, and zero otherwise.
|
|
|
|
"""
|
|
|
|
metadata = {'render.modes': ['human', 'ansi']}
|
|
|
|
def __init__(self, desc=None, map_name="4x4",is_slippery=True):
|
|
if desc is None and map_name is None:
|
|
raise ValueError('Must provide either desc or map_name')
|
|
elif desc is None:
|
|
desc = MAPS[map_name]
|
|
self.desc = desc = np.asarray(desc,dtype='c')
|
|
self.nrow, self.ncol = nrow, ncol = desc.shape
|
|
|
|
nA = 4
|
|
nS = nrow * ncol
|
|
|
|
isd = (desc == 'S').ravel().astype('float64')
|
|
isd /= isd.sum()
|
|
|
|
P = {s : {a : [] for a in xrange(nA)} for s in xrange(nS)}
|
|
|
|
def to_s(row, col):
|
|
return row*ncol + col
|
|
def inc(row, col, a):
|
|
if a==0:
|
|
col = max(col-1,0)
|
|
elif a==1:
|
|
row = min(row+1,nrow-1)
|
|
elif a==2:
|
|
col = min(col+1,ncol-1)
|
|
elif a==3:
|
|
row = max(row-1,0)
|
|
return (row, col)
|
|
|
|
for row in xrange(nrow):
|
|
for col in xrange(ncol):
|
|
s = to_s(row, col)
|
|
for a in xrange(4):
|
|
li = P[s][a]
|
|
if is_slippery:
|
|
for b in [(a-1)%4, a, (a+1)%4]:
|
|
newrow, newcol = inc(row, col, b)
|
|
newstate = to_s(newrow, newcol)
|
|
letter = desc[newrow, newcol]
|
|
done = letter in 'GH'
|
|
rew = float(letter == 'G')
|
|
li.append((1.0/3.0, newstate, rew, done))
|
|
else:
|
|
newrow, newcol = inc(row, col, a)
|
|
newstate = to_s(newrow, newcol)
|
|
letter = desc[newrow, newcol]
|
|
done = letter in 'GH'
|
|
rew = float(letter == 'G')
|
|
li.append((1.0/3.0, newstate, rew, done))
|
|
|
|
super(FrozenLakeEnv, self).__init__(nrow * ncol, 4, P, isd)
|
|
|
|
def _render(self, mode='human', close=False):
|
|
if close:
|
|
return
|
|
|
|
outfile = StringIO.StringIO() if mode == 'ansi' else sys.stdout
|
|
|
|
row, col = self.s // self.ncol, self.s % self.ncol
|
|
desc = self.desc.tolist()
|
|
desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
|
|
|
|
outfile.write("\n".join("".join(row) for row in desc)+"\n")
|
|
if self.lastaction is not None:
|
|
outfile.write(" ({})\n".format(["Left","Down","Right","Up"][self.lastaction]))
|
|
else:
|
|
outfile.write("\n")
|
|
|
|
return outfile
|