diff --git a/baselines/common/tests/envs/fixed_sequence_env.py b/baselines/common/tests/envs/fixed_sequence_env.py index f5460d5..b3fe396 100644 --- a/baselines/common/tests/envs/fixed_sequence_env.py +++ b/baselines/common/tests/envs/fixed_sequence_env.py @@ -9,18 +9,16 @@ class FixedSequenceEnv(Env): n_actions=10, episode_len=100 ): - self.np_random = np.random.RandomState() - self.sequence = None - self.action_space = Discrete(n_actions) self.observation_space = Discrete(1) - + self.np_random = np.random.RandomState(0) self.episode_len = episode_len + self.sequence = [self.np_random.randint(0, self.action_space.n) + for _ in range(self.episode_len)] self.time = 0 + def reset(self): - if self.sequence is None: - self.sequence = [self.np_random.randint(0, self.action_space.n-1) for _ in range(self.episode_len)] self.time = 0 return 0 @@ -29,7 +27,6 @@ class FixedSequenceEnv(Env): self._choose_next_state() done = False if self.episode_len and self.time >= self.episode_len: - rew = 0 done = True return 0, rew, done, {}