import gym from gym.spaces import Tuple from gym.vector.utils.spaces import batch_space __all__ = ['VectorEnv'] class VectorEnv(gym.Env): """Base class for vectorized environments. Parameters ---------- num_envs : int Number of environments in the vectorized environment. observation_space : `gym.spaces.Space` instance Observation space of a single environment. action_space : `gym.spaces.Space` instance Action space of a single environment. """ def __init__(self, num_envs, observation_space, action_space): super(VectorEnv, self).__init__() self.num_envs = num_envs self.observation_space = batch_space(observation_space, n=num_envs) self.action_space = Tuple((action_space,) * num_envs) self.closed = False self.viewer = None # The observation and action spaces of a single environment are # kept in separate properties self.single_observation_space = observation_space self.single_action_space = action_space def reset_async(self): pass def reset_wait(self, **kwargs): raise NotImplementedError() def reset(self): """ Returns ------- observations : sample from `observation_space` A batch of observations from the vectorized environment. """ self.reset_async() return self.reset_wait() def step_async(self, actions): pass def step_wait(self, **kwargs): raise NotImplementedError() def step(self, actions): """ Parameters ---------- actions : iterable of samples from `action_space` List of actions. Returns ------- observations : sample from `observation_space` A batch of observations from the vectorized environment. rewards : `np.ndarray` instance (dtype `np.float_`) A vector of rewards from the vectorized environment. dones : `np.ndarray` instance (dtype `np.bool_`) A vector whose entries indicate whether the episode has ended. infos : list of dict A list of auxiliary diagnostic informations. """ self.step_async(actions) return self.step_wait() def seed(self, seeds=None): """ Parameters ---------- seeds : list of int, or int, optional Random seed for each individual environment. If `seeds` is a list of length `num_envs`, then the items of the list are chosen as random seeds. If `seeds` is an int, then each environment uses the random seed `seeds + n`, where `n` is the index of the environment (between `0` and `num_envs - 1`). """ pass def __del__(self): if hasattr(self, 'closed'): if not self.closed: self.close()