Symshapes - gives codegen ability to evaluate same algo on envs with different ob/ac shapes (#262)
* finish cherry-pick td3 test commit * removed graph simplification error ingore * merge delayed logger config * merge updated baselines logger * lazy_mpi load * cleanups * use lazy mpi imports in codegen * more lazy mpi * don't pretend that class is a module, just use it as a class * mass-replace mpi4py imports * flake8 * fix previous lazy_mpi imports * removed extra printouts from TdLayer op * silly recursion * running codegen cc experiment * wip * more wip * use actor is input for critic targets, instead of the action taken * batch size 100 * tweak update parameters * tweaking td3 runs * wip * use nenvs=2 for contcontrol (to be comparable with ppo_metal) * wip. Doubts about usefulness of actor in critic target * delayed actor in ActorLoss * score is average of last 100 * skip lack of losses or too many action distributions * 16 envs for contcontrol, replay buffer size equal to horizon (no point in making it longer) * syntax * microfixes * minifixes * run in process logic to bypass tensorflow freezes/failures (per Oleg's suggestion) * random physics for mujoco * random parts sizes with range 0.4 * add notebook with results into x/peterz * variations of ant * roboschool use gym.make kwargs * use float as lowest score after rank transform * rcall from master * wip * re-enable dynamic routing * wip * squash-merge master, resolve conflicts * remove erroneous file * restore normal MPI imports * move wrappers around a little bit * autopep8 * cleanups * cleanup mpi_eda, autopep8 * make activation function of action distribution customizable * cleanups; preparation for a pr * syntax * merge latest master, resolve conflicts * wrap MPI import with try/except * allow import of modules through env id im baselines cmd_util * flake8 complaints * only wrap box action spaces with ClipActionsWrapper * flake8 * fixes to algo_prob according to Oleg's suggestions * use apply_without_scope flag in ActorLoss * remove extra line in algo/core.py * multi-task support * autopep8 * symbolic suffix-shapes (not B,T yet) * test_with_mpi -> with_mpi rename * remove extra blank lines in algo/core * remove extra blank lines in algo/core * remove more blank lines * symbolify shapes in existing algorithms * minor output changes * cleaning up merge conflicts * cleaning up merge conflicts * cleaning up more merge conflicts * restore mpi_map.py from master
This commit is contained in:
@@ -21,6 +21,7 @@ from baselines.common.wrappers import ClipActionsWrapper
|
||||
|
||||
def make_vec_env(env_id, env_type, num_env, seed,
|
||||
wrapper_kwargs=None,
|
||||
env_kwargs=None,
|
||||
start_index=0,
|
||||
reward_scale=1.0,
|
||||
flatten_dict_observations=True,
|
||||
@@ -29,6 +30,7 @@ def make_vec_env(env_id, env_type, num_env, seed,
|
||||
Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo.
|
||||
"""
|
||||
wrapper_kwargs = wrapper_kwargs or {}
|
||||
env_kwargs = env_kwargs or {}
|
||||
mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
|
||||
seed = seed + 10000 * mpi_rank if seed is not None else None
|
||||
logger_dir = logger.get_dir()
|
||||
@@ -43,6 +45,7 @@ def make_vec_env(env_id, env_type, num_env, seed,
|
||||
gamestate=gamestate,
|
||||
flatten_dict_observations=flatten_dict_observations,
|
||||
wrapper_kwargs=wrapper_kwargs,
|
||||
env_kwargs=env_kwargs,
|
||||
logger_dir=logger_dir
|
||||
)
|
||||
|
||||
@@ -53,15 +56,15 @@ def make_vec_env(env_id, env_type, num_env, seed,
|
||||
return DummyVecEnv([make_thunk(start_index)])
|
||||
|
||||
|
||||
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None):
|
||||
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None):
|
||||
wrapper_kwargs = wrapper_kwargs or {}
|
||||
env_kwargs = env_kwargs or {}
|
||||
if ':' in env_id:
|
||||
import re
|
||||
import importlib
|
||||
module_name = re.sub(':.*','',env_id)
|
||||
env_id = re.sub('.*:', '', env_id)
|
||||
importlib.import_module(module_name)
|
||||
|
||||
if env_type == 'atari':
|
||||
env = make_atari(env_id)
|
||||
elif env_type == 'retro':
|
||||
@@ -69,7 +72,7 @@ def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.
|
||||
gamestate = gamestate or retro.State.DEFAULT
|
||||
env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
|
||||
else:
|
||||
env = gym.make(env_id)
|
||||
env = gym.make(env_id, **env_kwargs)
|
||||
|
||||
if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
|
||||
keys = env.observation_space.spaces.keys()
|
||||
|
@@ -1,10 +1,10 @@
|
||||
from baselines.common import mpi_util
|
||||
from mpi4py import MPI
|
||||
from baselines import logger
|
||||
from baselines.common.tests.test_with_mpi import with_mpi
|
||||
from baselines.common import mpi_util
|
||||
|
||||
@with_mpi()
|
||||
def test_mpi_weighted_mean():
|
||||
from mpi4py import MPI
|
||||
comm = MPI.COMM_WORLD
|
||||
with logger.scoped_configure(comm=comm):
|
||||
if comm.rank == 0:
|
||||
|
@@ -217,7 +217,9 @@ def learn(network, env,
|
||||
stats = agent.get_stats()
|
||||
combined_stats = stats.copy()
|
||||
combined_stats['rollout/return'] = np.mean(epoch_episode_rewards)
|
||||
combined_stats['rollout/return_std'] = np.std(epoch_episode_rewards)
|
||||
combined_stats['rollout/return_history'] = np.mean(episode_rewards_history)
|
||||
combined_stats['rollout/return_history_std'] = np.std(episode_rewards_history)
|
||||
combined_stats['rollout/episode_steps'] = np.mean(epoch_episode_steps)
|
||||
combined_stats['rollout/actions_mean'] = np.mean(epoch_actions)
|
||||
combined_stats['rollout/Q_mean'] = np.mean(epoch_qs)
|
||||
|
Reference in New Issue
Block a user