Fixed sequence env minor (#333)

minor changes to FixedSequenceEnv to allow full score
This commit is contained in:
John Schulman
2019-04-20 10:08:09 -07:00
committed by Peter Zhokhov
parent a93dde3b2b
commit 967fc8c37f

View File

@@ -9,18 +9,16 @@ class FixedSequenceEnv(Env):
n_actions=10, n_actions=10,
episode_len=100 episode_len=100
): ):
self.np_random = np.random.RandomState()
self.sequence = None
self.action_space = Discrete(n_actions) self.action_space = Discrete(n_actions)
self.observation_space = Discrete(1) self.observation_space = Discrete(1)
self.np_random = np.random.RandomState(0)
self.episode_len = episode_len self.episode_len = episode_len
self.sequence = [self.np_random.randint(0, self.action_space.n)
for _ in range(self.episode_len)]
self.time = 0 self.time = 0
def reset(self): def reset(self):
if self.sequence is None:
self.sequence = [self.np_random.randint(0, self.action_space.n-1) for _ in range(self.episode_len)]
self.time = 0 self.time = 0
return 0 return 0
@@ -29,7 +27,6 @@ class FixedSequenceEnv(Env):
self._choose_next_state() self._choose_next_state()
done = False done = False
if self.episode_len and self.time >= self.episode_len: if self.episode_len and self.time >= self.episode_len:
rew = 0
done = True done = True
return 0, rew, done, {} return 0, rew, done, {}