Return output from render method in a right way (#1248)

* Close output StringIO after returning value

* Test render output is immutable
This commit is contained in:
Misha Behersky
2019-02-09 02:58:51 +02:00
committed by pzhokhov
parent 5f73c5dff5
commit 0659520f8c
6 changed files with 87 additions and 63 deletions

View File

@@ -33,10 +33,11 @@ leveling up many times to reach their reward threshold.
from gym import Env, logger
from gym.spaces import Discrete, Tuple
from gym.utils import colorize, seeding
import sys
from contextlib import closing
import numpy as np
from six import StringIO
import sys
import math
class AlgorithmicEnv(Env):
@@ -112,7 +113,6 @@ class AlgorithmicEnv(Env):
raise NotImplementedError
def render(self, mode='human'):
outfile = StringIO() if mode == 'ansi' else sys.stdout
inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time)
outfile.write(inp)
@@ -149,7 +149,10 @@ class AlgorithmicEnv(Env):
outfile.write(" prediction: %s)\n" % pred_str)
else:
outfile.write("\n" * 5)
return outfile
if mode != 'human':
with closing(outfile):
return outfile.getvalue()
@property
def input_width(self):
@@ -234,6 +237,7 @@ class AlgorithmicEnv(Env):
def _move(self, movement):
raise NotImplemented
class TapeAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 1-d input tape."""
MOVEMENTS = ['left', 'right']
@@ -269,6 +273,7 @@ class TapeAlgorithmicEnv(AlgorithmicEnv):
x_str += "\n"
return x_str
class GridAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 2-d input grid."""
MOVEMENTS = ['left', 'right', 'up', 'down']

View File

@@ -48,3 +48,17 @@ def test_random_rollout():
(ob, _reward, done, _info) = env.step(a)
if done: break
env.close()
def test_env_render_result_is_immutable():
environs = [
envs.make('Taxi-v2'),
envs.make('FrozenLake-v0'),
envs.make('Reverse-v0'),
]
for env in environs:
env.reset()
output = env.render(mode='ansi')
assert isinstance(output, str)
env.close()

View File

@@ -31,7 +31,7 @@ class DiscreteEnv(Env):
def __init__(self, nS, nA, P, isd):
self.P = P
self.isd = isd
self.lastaction=None # for rendering
self.lastaction = None # for rendering
self.nS = nS
self.nA = nA
@@ -47,7 +47,7 @@ class DiscreteEnv(Env):
def reset(self):
self.s = categorical_sample(self.isd, self.np_random)
self.lastaction=None
self.lastaction = None
return self.s
def step(self, a):
@@ -55,5 +55,5 @@ class DiscreteEnv(Env):
i = categorical_sample([t[0] for t in transitions], self.np_random)
p, s, r, d= transitions[i]
self.s = s
self.lastaction=a
self.lastaction = a
return (s, r, d, {"prob" : p})

View File

@@ -1,5 +1,7 @@
import numpy as np
import sys
from contextlib import closing
import numpy as np
from six import StringIO, b
from gym import utils
@@ -129,4 +131,5 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
outfile.write("\n".join(''.join(line) for line in desc)+"\n")
if mode != 'human':
return outfile
with closing(outfile):
return outfile.getvalue()

View File

@@ -1,4 +1,5 @@
import sys
from contextlib import closing
from six import StringIO
from gym import utils
from gym.envs.toy_text import discrete
@@ -149,4 +150,5 @@ class TaxiEnv(discrete.DiscreteEnv):
# No need to return anything for human
if mode != 'human':
return outfile
with closing(outfile):
return outfile.getvalue()