add comparison for expert/BC/gail
@@ -15,6 +15,7 @@ import tensorflow as tf
|
||||
import run_mujoco
|
||||
import mlp_policy
|
||||
from baselines.common import set_global_seeds, tf_util as U
|
||||
from baselines.common.misc_util import boolean_flag
|
||||
from dataset.mujoco_dset import Mujoco_Dset
|
||||
|
||||
|
||||
@@ -35,14 +36,15 @@ def argsparser():
|
||||
parser.add_argument('--policy_hidden_size', type=int, default=100)
|
||||
parser.add_argument('--env', type=str, choices=['Hopper', 'Walker2d', 'HalfCheetah',
|
||||
'Humanoid', 'HumanoidStandup'])
|
||||
boolean_flag(parser, 'stochastic_policy', default=False, help='use stochastic/deterministic policy to evaluate')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def evaluate_env(env_name, seed, policy_hidden_size):
|
||||
def evaluate_env(env_name, seed, policy_hidden_size, stochastic, reuse, prefix):
|
||||
|
||||
def get_checkpoint_dir(checkpoint_list, limit):
|
||||
def get_checkpoint_dir(checkpoint_list, limit, prefix):
|
||||
for checkpoint in checkpoint_list:
|
||||
if 'transition_limitation_'+str(limit) in checkpoint:
|
||||
if ('limitation_'+str(limit) in checkpoint) and (prefix in checkpoint):
|
||||
return checkpoint
|
||||
return None
|
||||
|
||||
@@ -63,7 +65,7 @@ def evaluate_env(env_name, seed, policy_hidden_size):
|
||||
for i, limit in enumerate(CONFIG['traj_limitation']):
|
||||
# Do one evaluation
|
||||
upper_bound = sum(dataset.rets[:limit])/limit
|
||||
checkpoint_dir = get_checkpoint_dir(checkpoint_list, limit)
|
||||
checkpoint_dir = get_checkpoint_dir(checkpoint_list, limit, prefix=prefix)
|
||||
checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
|
||||
env = gym.make(env_name + '-v1')
|
||||
env.seed(seed)
|
||||
@@ -72,9 +74,9 @@ def evaluate_env(env_name, seed, policy_hidden_size):
|
||||
policy_fn,
|
||||
checkpoint_path,
|
||||
timesteps_per_batch=1024,
|
||||
number_trajs=100,
|
||||
stochastic_policy=False,
|
||||
reuse=(i != 0))
|
||||
number_trajs=10,
|
||||
stochastic_policy=stochastic,
|
||||
reuse=((i != 0) or reuse))
|
||||
normalized_ret = avg_ret/upper_bound
|
||||
print('Upper bound: {}, evaluation returns: {}, normalized scores: {}'.format(
|
||||
upper_bound, avg_ret, normalized_ret))
|
||||
@@ -87,25 +89,37 @@ def evaluate_env(env_name, seed, policy_hidden_size):
|
||||
return log
|
||||
|
||||
|
||||
def plot(env_name, log):
|
||||
upper_bound = log['upper_bound']
|
||||
avg_ret = log['avg_ret']
|
||||
def plot(env_name, bc_log, gail_log, stochastic):
|
||||
upper_bound = bc_log['upper_bound']
|
||||
bc_avg_ret = bc_log['avg_ret']
|
||||
gail_avg_ret = gail_log['avg_ret']
|
||||
plt.plot(CONFIG['traj_limitation'], upper_bound)
|
||||
plt.plot(CONFIG['traj_limitation'], avg_ret)
|
||||
plt.plot(CONFIG['traj_limitation'], bc_avg_ret)
|
||||
plt.plot(CONFIG['traj_limitation'], gail_avg_ret)
|
||||
plt.title('{} unnormalized scores'.format(env_name))
|
||||
plt.legend(['expert', 'imitator'], loc='lower left')
|
||||
plt.legend(['expert', 'bc-imitator', 'gail-imitator'], loc='lower right')
|
||||
plt.grid(b=True, which='major', color='gray', linestyle='--')
|
||||
plt.savefig('result/{}-unnormalized-scores.png'.format(env_name))
|
||||
if stochastic:
|
||||
title_name = 'result/{}-unnormalized-stochastic-scores.png'.format(env_name)
|
||||
else:
|
||||
title_name = 'result/{}-unnormalized-deterministic-scores.png'.format(env_name)
|
||||
plt.savefig(title_name)
|
||||
plt.close()
|
||||
|
||||
normalized_ret = log['normalized_ret']
|
||||
bc_normalized_ret = bc_log['normalized_ret']
|
||||
gail_normalized_ret = gail_log['normalized_ret']
|
||||
plt.plot(CONFIG['traj_limitation'], np.ones(len(CONFIG['traj_limitation'])))
|
||||
plt.plot(CONFIG['traj_limitation'], normalized_ret)
|
||||
plt.plot(CONFIG['traj_limitation'], bc_normalized_ret)
|
||||
plt.plot(CONFIG['traj_limitation'], gail_normalized_ret)
|
||||
plt.title('{} normalized scores'.format(env_name))
|
||||
plt.legend(['expert', 'imitator'], loc='lower left')
|
||||
plt.legend(['expert', 'bc-imitator', 'gail-imitator'], loc='lower right')
|
||||
plt.grid(b=True, which='major', color='gray', linestyle='--')
|
||||
if stochastic:
|
||||
title_name = 'result/{}-normalized-stochastic-scores.png'.format(env_name)
|
||||
else:
|
||||
title_name = 'result/{}-normalized-deterministic-scores.png'.format(env_name)
|
||||
plt.ylim(0, 1.6)
|
||||
plt.savefig('result/{}-normalized-scores.png'.format(env_name))
|
||||
plt.savefig(title_name)
|
||||
plt.close()
|
||||
|
||||
|
||||
@@ -113,10 +127,15 @@ def main(args):
|
||||
U.make_session(num_cpu=1).__enter__()
|
||||
set_global_seeds(args.seed)
|
||||
print('Evaluating {}'.format(args.env))
|
||||
log = evaluate_env(args.env, args.seed, args.policy_hidden_size)
|
||||
bc_log = evaluate_env(args.env, args.seed, args.policy_hidden_size,
|
||||
args.stochastic_policy, False, 'BC')
|
||||
print('Evaluation for {}'.format(args.env))
|
||||
print(log)
|
||||
plot(args.env, log)
|
||||
print(bc_log)
|
||||
gail_log = evaluate_env(args.env, args.seed, args.policy_hidden_size,
|
||||
args.stochastic_policy, True, 'gail')
|
||||
print('Evaluation for {}'.format(args.env))
|
||||
print(gail_log)
|
||||
plot(args.env, bc_log, gail_log, args.stochastic_policy)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 43 KiB |
Before Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 44 KiB |
BIN
baselines/gail/result/Hopper-normalized-deterministic-scores.png
Normal file
After Width: | Height: | Size: 30 KiB |
Before Width: | Height: | Size: 25 KiB |
BIN
baselines/gail/result/Hopper-normalized-stochastic-scores.png
Normal file
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 40 KiB |
BIN
baselines/gail/result/Hopper-unnormalized-stochastic-scores.png
Normal file
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 35 KiB |
Before Width: | Height: | Size: 30 KiB |
BIN
baselines/gail/result/Humanoid-normalized-stochastic-scores.png
Normal file
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 43 KiB |
Before Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 45 KiB |
Before Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 31 KiB |
Before Width: | Height: | Size: 27 KiB |
BIN
baselines/gail/result/Walker2d-normalized-stochastic-scores.png
Normal file
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 38 KiB |
Before Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 38 KiB |
@@ -1,19 +1,35 @@
|
||||
# Results for GAIL on Mujoco
|
||||
|
||||
Here's the extensive results for applying GAIL on Mujoco environment, including
|
||||
Hopper, Walker2d, HalfCheetah, Humanoid, HumanoidStandup. For all environments, the
|
||||
Here's the extensive results for applying GAIL/BC on Mujoco environments, including
|
||||
Hopper, Walker2d, HalfCheetah, Humanoid, HumanoidStandup. Eery imitator is evaluated with seed to be 0.
|
||||
|
||||
## details about GAIL imitator
|
||||
|
||||
For all environments, the
|
||||
imitator is trained with 1, 5, 10, 50 trajectories, where each trajectory contains at most
|
||||
1024 transitions, and seed 0, 1, 2, 3, respectively.
|
||||
|
||||
### details about the BC imitators
|
||||
|
||||
All BC imitators are trained with seed 0.
|
||||
|
||||
## Results
|
||||
|
||||
### Determinstic Polciy (Set std=0)
|
||||
| | Un-normalized | Normalized |
|
||||
|---|---|---|
|
||||
| Hopper-v1 | <img src='Hopper-unnormalized-scores.png'> | <img src='Hopper-normalized-scores.png'> |
|
||||
| HalfCheetah-v1 | <img src='HalfCheetah-unnormalized-scores.png'> | <img src='HalfCheetah-normalized-scores.png'> |
|
||||
| Walker2d-v1 | <img src='Walker2d-unnormalized-scores.png'> | <img src='Walker2d-normalized-scores.png'> |
|
||||
| Humanoid-v1 | <img src='Humanoid-unnormalized-scores.png'> | <img src='Humanoid-normalized-scores.png'> |
|
||||
| HumanoidStandup-v1 | <img src='HumanoidStandup-unnormalized-scores.png'> | <img src='HumanoidStandup-normalized-scores.png'> |
|
||||
| Hopper-v1 | <img src='Hopper-unnormalized-deterministic-scores.png'> | <img src='Hopper-normalized-deterministic-scores.png'> |
|
||||
| HalfCheetah-v1 | <img src='HalfCheetah-unnormalized-deterministic-scores.png'> | <img src='HalfCheetah-normalized-deterministic-scores.png'> |
|
||||
| Walker2d-v1 | <img src='Walker2d-unnormalized-deterministic-scores.png'> | <img src='Walker2d-normalized-deterministic-scores.png'> |
|
||||
| Humanoid-v1 | <img src='Humanoid-unnormalized-deterministic-scores.png'> | <img src='Humanoid-normalized-deterministic-scores.png'> |
|
||||
| HumanoidStandup-v1 | <img src='HumanoidStandup-unnormalized-deterministic-scores.png'> | <img src='HumanoidStandup-normalized-deterministic-scores.png'> |
|
||||
|
||||
### Stochatic Policy
|
||||
| | Un-normalized | Normalized |
|
||||
|---|---|---|
|
||||
| Hopper-v1 | <img src='Hopper-unnormalized-stochastic-scores.png'> | <img src='Hopper-normalized-stochastic-scores.png'> |
|
||||
| HalfCheetah-v1 | <img src='HalfCheetah-unnormalized-stochastic-scores.png'> | <img src='HalfCheetah-normalized-stochastic-scores.png'> |
|
||||
| Walker2d-v1 | <img src='Walker2d-unnormalized-stochastic-scores.png'> | <img src='Walker2d-normalized-stochastic-scores.png'> |
|
||||
| Humanoid-v1 | <img src='Humanoid-unnormalized-stochastic-scores.png'> | <img src='Humanoid-normalized-stochastic-scores.png'> |
|
||||
| HumanoidStandup-v1 | <img src='HumanoidStandup-unnormalized-stochastic-scores.png'> | <img src='HumanoidStandup-normalized-stochastic-scores.png'> |
|
||||
|
||||
### details
|
||||
Each imitator is evaluated with random seed equals to 0.
|
||||
|