* exported rl-algs * more stuff from rl-algs * run slow tests * re-exported rl_algs * re-exported rl_algs - fixed problems with serialization test and test_cartpole * replaced atari_arg_parser with common_arg_parser * run.py can run algos from both baselines and rl_algs * added approximate humanoid reward with ppo2 into the README for reference * dummy commit to RUN BENCHMARKS * dummy commit to RUN BENCHMARKS * dummy commit to RUN BENCHMARKS * dummy commit to RUN BENCHMARKS * very dummy commit to RUN BENCHMARKS * serialize variables as a dict, not as a list * running_mean_std uses tensorflow variables * fixed import in vec_normalize * dummy commit to RUN BENCHMARKS * dummy commit to RUN BENCHMARKS * flake8 complaints * save all variables to make sure we save the vec_normalize normalization * benchmarks on ppo2 only RUN BENCHMARKS * make_atari_env compatible with mpi * run ppo_mpi benchmarks only RUN BENCHMARKS * hardcode names of retro environments * add defaults * changed default ppo2 lr schedule to linear RUN BENCHMARKS * non-tf normalization benchmark RUN BENCHMARKS * use ncpu=1 for mujoco sessions - gives a bit of a performance speedup * reverted running_mean_std to user property decorators for mean, var, count * reverted VecNormalize to use RunningMeanStd (no tf) * reverted VecNormalize to use RunningMeanStd (no tf) * profiling wip * use VecNormalize with regular RunningMeanStd * added acer runner (missing import) * flake8 complaints * added a note in README about TfRunningMeanStd and serialization of VecNormalize * dummy commit to RUN BENCHMARKS * merged benchmarks branch
52 lines
1.3 KiB
Python
52 lines
1.3 KiB
Python
import pytest
|
|
from baselines.common.tests.envs.fixed_sequence_env import FixedSequenceEnv
|
|
|
|
from baselines.common.tests.util import simple_test
|
|
from baselines.run import get_learn_function
|
|
|
|
common_kwargs = dict(
|
|
seed=0,
|
|
total_timesteps=50000,
|
|
)
|
|
|
|
learn_kwargs = {
|
|
'a2c': {},
|
|
'ppo2': dict(nsteps=10, ent_coef=0.0, nminibatches=1),
|
|
# TODO enable sequential models for trpo_mpi (proper handling of nbatch and nsteps)
|
|
# github issue: https://github.com/openai/baselines/issues/188
|
|
# 'trpo_mpi': lambda e, p: trpo_mpi.learn(policy_fn=p(env=e), env=e, max_timesteps=30000, timesteps_per_batch=100, cg_iters=10, gamma=0.9, lam=1.0, max_kl=0.001)
|
|
}
|
|
|
|
|
|
alg_list = learn_kwargs.keys()
|
|
rnn_list = ['lstm']
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.parametrize("alg", alg_list)
|
|
@pytest.mark.parametrize("rnn", rnn_list)
|
|
def test_fixed_sequence(alg, rnn):
|
|
'''
|
|
Test if the algorithm (with a given policy)
|
|
can learn an identity transformation (i.e. return observation as an action)
|
|
'''
|
|
|
|
kwargs = learn_kwargs[alg]
|
|
kwargs.update(common_kwargs)
|
|
|
|
episode_len = 5
|
|
env_fn = lambda: FixedSequenceEnv(10, episode_len=episode_len)
|
|
learn = lambda e: get_learn_function(alg)(
|
|
env=e,
|
|
network=rnn,
|
|
**kwargs
|
|
)
|
|
|
|
simple_test(env_fn, learn, 0.7)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
test_fixed_sequence('ppo2', 'lstm')
|
|
|
|
|
|
|