2023-11-07 13:27:25 +00:00
""" Implementation of a synchronous (for loop) vectorization method of any environment. """
2024-06-10 17:07:47 +01:00
2023-11-07 13:27:25 +00:00
from __future__ import annotations
2025-06-07 17:57:58 +01:00
from collections . abc import Callable , Iterator , Sequence
2022-03-31 12:50:38 -07:00
from copy import deepcopy
2025-06-07 17:57:58 +01:00
from typing import Any
2021-12-08 22:14:15 +01:00
2019-06-21 17:29:44 -04:00
import numpy as np
2024-08-29 16:52:43 +01:00
from gymnasium import Env , Space
2023-11-07 13:27:25 +00:00
from gymnasium . core import ActType , ObsType , RenderFrame
2024-08-29 16:52:43 +01:00
from gymnasium . spaces . utils import is_space_dtype_shape_equiv
from gymnasium . vector . utils import (
batch_differing_spaces ,
batch_space ,
concatenate ,
create_empty_array ,
iterate ,
)
2024-11-28 12:18:55 +00:00
from gymnasium . vector . vector_env import ArrayType , AutoresetMode , VectorEnv
2019-06-21 17:29:44 -04:00
2022-12-04 22:24:02 +08:00
2021-07-29 02:26:34 +02:00
__all__ = [ " SyncVectorEnv " ]
2019-06-21 17:29:44 -04:00
class SyncVectorEnv ( VectorEnv ) :
""" Vectorized environment that serially runs multiple environments.
2023-01-23 11:30:00 +01:00
Example :
2022-09-16 23:41:27 +01:00
>> > import gymnasium as gym
2023-11-07 13:27:25 +00:00
>> > envs = gym . make_vec ( " Pendulum-v1 " , num_envs = 2 , vectorization_mode = " sync " )
>> > envs
SyncVectorEnv ( Pendulum - v1 , num_envs = 2 )
>> > envs = gym . vector . SyncVectorEnv ( [
2023-01-20 14:28:09 +01:00
. . . lambda : gym . make ( " Pendulum-v1 " , g = 9.81 ) ,
. . . lambda : gym . make ( " Pendulum-v1 " , g = 1.62 )
2021-11-14 08:59:04 -05:00
. . . ] )
2023-11-07 13:27:25 +00:00
>> > envs
SyncVectorEnv ( num_envs = 2 )
>> > obs , infos = envs . reset ( seed = 42 )
>> > obs
array ( [ [ - 0.14995256 , 0.9886932 , - 0.12224312 ] ,
[ 0.5760367 , 0.8174238 , - 0.91244936 ] ] , dtype = float32 )
>> > infos
{ }
>> > _ = envs . action_space . seed ( 42 )
>> > actions = envs . action_space . sample ( )
>> > obs , rewards , terminates , truncates , infos = envs . step ( actions )
>> > obs
array ( [ [ - 0.1878752 , 0.98219293 , 0.7695615 ] ,
[ 0.6102389 , 0.79221743 , - 0.8498053 ] ] , dtype = float32 )
>> > rewards
array ( [ - 2.96562607 , - 0.99902063 ] )
>> > terminates
array ( [ False , False ] )
>> > truncates
array ( [ False , False ] )
>> > infos
{ }
>> > envs . close ( )
2019-06-21 17:29:44 -04:00
"""
2021-07-29 02:26:34 +02:00
2022-05-20 14:49:30 +01:00
def __init__ (
self ,
2023-11-07 13:27:25 +00:00
env_fns : Iterator [ Callable [ [ ] , Env ] ] | Sequence [ Callable [ [ ] , Env ] ] ,
2022-05-20 14:49:30 +01:00
copy : bool = True ,
2024-08-29 16:52:43 +01:00
observation_mode : str | Space = " same " ,
2024-11-28 12:18:55 +00:00
autoreset_mode : str | AutoresetMode = AutoresetMode . NEXT_STEP ,
2022-05-20 14:49:30 +01:00
) :
""" Vectorized environment that serially runs multiple environments.
Args :
env_fns : iterable of callable functions that create the environments .
copy : If ` ` True ` ` , then the : meth : ` reset ` and : meth : ` step ` methods return a copy of the observations .
2024-08-29 16:52:43 +01:00
observation_mode : Defines how environment observation spaces should be batched . ' same ' defines that there should be ` ` n ` ` copies of identical spaces .
' different ' defines that there can be multiple observation spaces with the same length but different high / low values batched together . Passing a ` ` Space ` ` object
allows the user to set some custom observation space mode not covered by ' same ' or ' different. '
2025-02-25 17:12:08 +00:00
autoreset_mode : The Autoreset Mode used , see https : / / farama . org / Vector - Autoreset - Mode for more information .
2024-11-28 12:18:55 +00:00
2022-05-20 14:49:30 +01:00
Raises :
2022-05-25 14:46:41 +01:00
RuntimeError : If the observation space of some sub - environment does not match observation_space
( or , by default , the observation space of the first sub - environment ) .
2022-05-20 14:49:30 +01:00
"""
2024-11-28 12:18:55 +00:00
super ( ) . __init__ ( )
2019-06-21 17:29:44 -04:00
self . env_fns = env_fns
2024-11-28 12:18:55 +00:00
self . copy = copy
2024-08-29 16:52:43 +01:00
self . observation_mode = observation_mode
2024-11-28 12:18:55 +00:00
self . autoreset_mode = (
autoreset_mode
if isinstance ( autoreset_mode , AutoresetMode )
else AutoresetMode ( autoreset_mode )
)
2023-11-07 13:27:25 +00:00
# Initialise all sub-environments
2019-06-21 17:29:44 -04:00
self . envs = [ env_fn ( ) for env_fn in env_fns ]
2023-11-07 13:27:25 +00:00
# Define core attributes using the sub-environments
# As we support `make_vec(spec)` then we can't include a `spec = self.envs[0].spec` as this doesn't guarantee we can actual recreate the vector env.
self . num_envs = len ( self . envs )
2021-08-18 16:36:40 -04:00
self . metadata = self . envs [ 0 ] . metadata
2024-11-28 12:18:55 +00:00
self . metadata [ " autoreset_mode " ] = self . autoreset_mode
2023-11-07 13:27:25 +00:00
self . render_mode = self . envs [ 0 ] . render_mode
2021-07-29 02:26:34 +02:00
2023-11-07 13:27:25 +00:00
self . single_action_space = self . envs [ 0 ] . action_space
self . action_space = batch_space ( self . single_action_space , self . num_envs )
2019-06-21 17:29:44 -04:00
2024-08-29 16:52:43 +01:00
if isinstance ( observation_mode , tuple ) and len ( observation_mode ) == 2 :
assert isinstance ( observation_mode [ 0 ] , Space )
assert isinstance ( observation_mode [ 1 ] , Space )
self . observation_space , self . single_observation_space = observation_mode
else :
if observation_mode == " same " :
self . single_observation_space = self . envs [ 0 ] . observation_space
self . observation_space = batch_space (
self . single_observation_space , self . num_envs
)
elif observation_mode == " different " :
self . single_observation_space = self . envs [ 0 ] . observation_space
self . observation_space = batch_differing_spaces (
[ env . observation_space for env in self . envs ]
)
else :
raise ValueError (
f " Invalid `observation_mode`, expected: ' same ' or ' different ' or tuple of single and batch observation space, actual got { observation_mode } "
)
# check sub-environment obs and action spaces
for env in self . envs :
if observation_mode == " same " :
assert (
env . observation_space == self . single_observation_space
) , f " SyncVectorEnv(..., observation_mode= ' same ' ) however the sub-environments observation spaces are not equivalent. single_observation_space= { self . single_observation_space } , sub-environment observation_space= { env . observation_space } . If this is intentional, use `observation_mode= ' different ' ` instead. "
else :
assert is_space_dtype_shape_equiv (
env . observation_space , self . single_observation_space
) , f " SyncVectorEnv(..., observation_mode= ' different ' or custom space) however the sub-environments observation spaces do not share a common shape and dtype, single_observation_space= { self . single_observation_space } , sub-environment observation space= { env . observation_space } "
assert (
env . action_space == self . single_action_space
) , f " Sub-environment action space doesn ' t make the `single_action_space`, action_space= { env . action_space } , single_action_space= { self . single_action_space } "
2023-11-07 13:27:25 +00:00
# Initialise attributes used in `step` and `reset`
2024-11-28 12:18:55 +00:00
self . _env_obs = [ None for _ in range ( self . num_envs ) ]
2023-11-07 13:27:25 +00:00
self . _observations = create_empty_array (
2021-07-29 15:39:42 -04:00
self . single_observation_space , n = self . num_envs , fn = np . zeros
)
2019-06-21 17:29:44 -04:00
self . _rewards = np . zeros ( ( self . num_envs , ) , dtype = np . float64 )
2023-11-07 13:27:25 +00:00
self . _terminations = np . zeros ( ( self . num_envs , ) , dtype = np . bool_ )
self . _truncations = np . zeros ( ( self . num_envs , ) , dtype = np . bool_ )
2022-05-20 14:49:30 +01:00
2023-12-03 19:50:18 +01:00
self . _autoreset_envs = np . zeros ( ( self . num_envs , ) , dtype = np . bool_ )
2024-02-26 13:00:18 +01:00
@property
def np_random_seed ( self ) - > tuple [ int , . . . ] :
2024-04-17 14:52:41 +01:00
""" Returns a tuple of np random seeds for the wrapped envs. """
2024-02-26 13:00:18 +01:00
return self . get_attr ( " np_random_seed " )
@property
def np_random ( self ) - > tuple [ np . random . Generator , . . . ] :
2024-04-17 14:52:41 +01:00
""" Returns a tuple of the numpy random number generators for the wrapped envs. """
2024-02-26 13:00:18 +01:00
return self . get_attr ( " np_random " )
2023-11-07 13:27:25 +00:00
def reset (
2022-01-19 23:28:59 +01:00
self ,
2023-11-07 13:27:25 +00:00
* ,
2025-06-07 10:31:31 -04:00
seed : int | list [ int | None ] | None = None ,
2023-11-07 13:27:25 +00:00
options : dict [ str , Any ] | None = None ,
) - > tuple [ ObsType , dict [ str , Any ] ] :
""" Resets each of the sub-environments and concatenate the results together.
2022-05-20 14:49:30 +01:00
Args :
2023-11-07 13:27:25 +00:00
seed : Seeds used to reset the sub - environments , either
* ` ` None ` ` - random seeds for all environment
* ` ` int ` ` - ` ` [ seed , seed + 1 , . . . , seed + n ] ` `
* List of ints - ` ` [ 1 , 2 , 3 , . . . , n ] ` `
options : Option information used for each sub - environment
2022-05-20 14:49:30 +01:00
Returns :
2023-11-07 13:27:25 +00:00
Concatenated observations and info from each sub - environment
2022-05-20 14:49:30 +01:00
"""
2021-12-08 22:14:15 +01:00
if seed is None :
seed = [ None for _ in range ( self . num_envs ) ]
2023-11-07 13:27:25 +00:00
elif isinstance ( seed , int ) :
2021-12-08 22:14:15 +01:00
seed = [ seed + i for i in range ( self . num_envs ) ]
2024-02-26 13:00:18 +01:00
assert (
len ( seed ) == self . num_envs
) , f " If seeds are passed as a list the length must match num_envs= { self . num_envs } but got length= { len ( seed ) } . "
2019-06-21 17:29:44 -04:00
2024-11-28 12:18:55 +00:00
if options is not None and " reset_mask " in options :
reset_mask = options . pop ( " reset_mask " )
assert isinstance (
reset_mask , np . ndarray
) , f " `options[ ' reset_mask ' : mask]` must be a numpy array, got { type ( reset_mask ) } "
assert reset_mask . shape == (
self . num_envs ,
) , f " `options[ ' reset_mask ' : mask]` must have shape `( { self . num_envs } ,)`, got { reset_mask . shape } "
assert (
reset_mask . dtype == np . bool_
) , f " `options[ ' reset_mask ' : mask]` must have `dtype=np.bool_`, got { reset_mask . dtype } "
assert np . any (
reset_mask
) , f " `options[ ' reset_mask ' : mask]` must contain a boolean array, got reset_mask= { reset_mask } "
self . _terminations [ reset_mask ] = False
self . _truncations [ reset_mask ] = False
self . _autoreset_envs [ reset_mask ] = False
infos = { }
for i , ( env , single_seed , env_mask ) in enumerate (
zip ( self . envs , seed , reset_mask )
) :
if env_mask :
self . _env_obs [ i ] , env_info = env . reset (
seed = single_seed , options = options
)
infos = self . _add_info ( infos , env_info , i )
else :
self . _terminations = np . zeros ( ( self . num_envs , ) , dtype = np . bool_ )
self . _truncations = np . zeros ( ( self . num_envs , ) , dtype = np . bool_ )
self . _autoreset_envs = np . zeros ( ( self . num_envs , ) , dtype = np . bool_ )
infos = { }
for i , ( env , single_seed ) in enumerate ( zip ( self . envs , seed ) ) :
self . _env_obs [ i ] , env_info = env . reset (
seed = single_seed , options = options
)
2023-11-07 13:27:25 +00:00
2024-11-28 12:18:55 +00:00
infos = self . _add_info ( infos , env_info , i )
2023-11-07 13:27:25 +00:00
# Concatenate the observations
self . _observations = concatenate (
2024-11-28 12:18:55 +00:00
self . single_observation_space , self . _env_obs , self . _observations
2021-07-29 15:39:42 -04:00
)
2023-11-07 13:27:25 +00:00
return deepcopy ( self . _observations ) if self . copy else self . _observations , infos
2019-06-21 17:29:44 -04:00
2023-11-07 13:27:25 +00:00
def step (
self , actions : ActType
) - > tuple [ ObsType , ArrayType , ArrayType , ArrayType , dict [ str , Any ] ] :
2022-05-20 14:49:30 +01:00
""" Steps through each of the environments returning the batched results.
Returns :
The batched environment step results
"""
2023-11-07 13:27:25 +00:00
actions = iterate ( self . action_space , actions )
2024-11-28 12:18:55 +00:00
infos = { }
2025-07-22 17:45:23 -04:00
for i , ( action , _ ) in enumerate ( zip ( actions , self . envs , strict = True ) ) :
2024-11-28 12:18:55 +00:00
if self . autoreset_mode == AutoresetMode . NEXT_STEP :
if self . _autoreset_envs [ i ] :
self . _env_obs [ i ] , env_info = self . envs [ i ] . reset ( )
self . _rewards [ i ] = 0.0
self . _terminations [ i ] = False
self . _truncations [ i ] = False
else :
(
self . _env_obs [ i ] ,
self . _rewards [ i ] ,
self . _terminations [ i ] ,
self . _truncations [ i ] ,
env_info ,
) = self . envs [ i ] . step ( action )
elif self . autoreset_mode == AutoresetMode . DISABLED :
# assumes that the user has correctly autoreset
assert not self . _autoreset_envs [ i ] , f " { self . _autoreset_envs =} "
2023-12-03 19:50:18 +01:00
(
2024-11-28 12:18:55 +00:00
self . _env_obs [ i ] ,
2023-12-03 19:50:18 +01:00
self . _rewards [ i ] ,
self . _terminations [ i ] ,
self . _truncations [ i ] ,
env_info ,
) = self . envs [ i ] . step ( action )
2024-11-28 12:18:55 +00:00
elif self . autoreset_mode == AutoresetMode . SAME_STEP :
(
self . _env_obs [ i ] ,
self . _rewards [ i ] ,
self . _terminations [ i ] ,
self . _truncations [ i ] ,
env_info ,
) = self . envs [ i ] . step ( action )
if self . _terminations [ i ] or self . _truncations [ i ] :
infos = self . _add_info (
infos ,
{ " final_obs " : self . _env_obs [ i ] , " final_info " : env_info } ,
i ,
)
self . _env_obs [ i ] , env_info = self . envs [ i ] . reset ( )
else :
raise ValueError ( f " Unexpected autoreset mode, { self . autoreset_mode } " )
2023-11-07 13:27:25 +00:00
infos = self . _add_info ( infos , env_info , i )
# Concatenate the observations
self . _observations = concatenate (
2024-11-28 12:18:55 +00:00
self . single_observation_space , self . _env_obs , self . _observations
2021-07-29 15:39:42 -04:00
)
2023-12-03 19:50:18 +01:00
self . _autoreset_envs = np . logical_or ( self . _terminations , self . _truncations )
2019-06-21 17:29:44 -04:00
2022-08-30 19:41:59 +05:30
return (
2023-11-07 13:27:25 +00:00
deepcopy ( self . _observations ) if self . copy else self . _observations ,
2022-08-30 19:41:59 +05:30
np . copy ( self . _rewards ) ,
2023-11-07 13:27:25 +00:00
np . copy ( self . _terminations ) ,
np . copy ( self . _truncations ) ,
2022-08-30 19:41:59 +05:30
infos ,
2021-07-29 02:26:34 +02:00
)
2019-06-21 17:29:44 -04:00
2023-11-07 13:27:25 +00:00
def render ( self ) - > tuple [ RenderFrame , . . . ] | None :
""" Returns the rendered frames from the environments. """
return tuple ( env . render ( ) for env in self . envs )
def call ( self , name : str , * args : Any , * * kwargs : Any ) - > tuple [ Any , . . . ] :
""" Calls a sub-environment method with name and applies args and kwargs.
2022-05-20 14:49:30 +01:00
Args :
name : The method name
* args : The method args
* * kwargs : The method kwargs
Returns :
Tuple of results
"""
2022-01-29 12:32:35 -05:00
results = [ ]
for env in self . envs :
2023-11-07 13:27:25 +00:00
function = env . get_wrapper_attr ( name )
2022-01-29 12:32:35 -05:00
if callable ( function ) :
results . append ( function ( * args , * * kwargs ) )
else :
results . append ( function )
return tuple ( results )
2024-02-26 13:00:18 +01:00
def get_attr ( self , name : str ) - > tuple [ Any , . . . ] :
2023-11-07 13:27:25 +00:00
""" Get a property from each parallel environment.
Args :
name ( str ) : Name of the property to get from each individual environment .
Returns :
The property with name
"""
return self . call ( name )
def set_attr ( self , name : str , values : list [ Any ] | tuple [ Any , . . . ] | Any ) :
2022-05-20 14:49:30 +01:00
""" Sets an attribute of the sub-environments.
Args :
name : The property name to change
values : Values of the property to be set to . If ` ` values ` ` is a list or
tuple , then it corresponds to the values for each individual
environment , otherwise , a single value is set for all environments .
2022-05-25 14:46:41 +01:00
Raises :
ValueError : Values must be a list or tuple with length equal to the number of environments .
2022-05-20 14:49:30 +01:00
"""
2022-01-29 12:32:35 -05:00
if not isinstance ( values , ( list , tuple ) ) :
values = [ values for _ in range ( self . num_envs ) ]
2023-11-07 13:27:25 +00:00
2022-01-29 12:32:35 -05:00
if len ( values ) != self . num_envs :
raise ValueError (
2023-11-07 13:27:25 +00:00
" Values must be a list or tuple with length equal to the number of environments. "
f " Got ` { len ( values ) } ` values for { self . num_envs } environments. "
2022-01-29 12:32:35 -05:00
)
for env , value in zip ( self . envs , values ) :
2023-11-07 13:27:25 +00:00
env . set_wrapper_attr ( name , value )
2022-01-29 12:32:35 -05:00
2023-11-07 13:27:25 +00:00
def close_extras ( self , * * kwargs : Any ) :
2021-11-14 08:59:04 -05:00
""" Close the environments. """
2024-04-17 14:55:46 +01:00
if hasattr ( self , " envs " ) :
[ env . close ( ) for env in self . envs ]