add comparison for expert/BC/gail
@@ -15,6 +15,7 @@ import tensorflow as tf
|
|||||||
import run_mujoco
|
import run_mujoco
|
||||||
import mlp_policy
|
import mlp_policy
|
||||||
from baselines.common import set_global_seeds, tf_util as U
|
from baselines.common import set_global_seeds, tf_util as U
|
||||||
|
from baselines.common.misc_util import boolean_flag
|
||||||
from dataset.mujoco_dset import Mujoco_Dset
|
from dataset.mujoco_dset import Mujoco_Dset
|
||||||
|
|
||||||
|
|
||||||
@@ -35,14 +36,15 @@ def argsparser():
|
|||||||
parser.add_argument('--policy_hidden_size', type=int, default=100)
|
parser.add_argument('--policy_hidden_size', type=int, default=100)
|
||||||
parser.add_argument('--env', type=str, choices=['Hopper', 'Walker2d', 'HalfCheetah',
|
parser.add_argument('--env', type=str, choices=['Hopper', 'Walker2d', 'HalfCheetah',
|
||||||
'Humanoid', 'HumanoidStandup'])
|
'Humanoid', 'HumanoidStandup'])
|
||||||
|
boolean_flag(parser, 'stochastic_policy', default=False, help='use stochastic/deterministic policy to evaluate')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def evaluate_env(env_name, seed, policy_hidden_size):
|
def evaluate_env(env_name, seed, policy_hidden_size, stochastic, reuse, prefix):
|
||||||
|
|
||||||
def get_checkpoint_dir(checkpoint_list, limit):
|
def get_checkpoint_dir(checkpoint_list, limit, prefix):
|
||||||
for checkpoint in checkpoint_list:
|
for checkpoint in checkpoint_list:
|
||||||
if 'transition_limitation_'+str(limit) in checkpoint:
|
if ('limitation_'+str(limit) in checkpoint) and (prefix in checkpoint):
|
||||||
return checkpoint
|
return checkpoint
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -63,7 +65,7 @@ def evaluate_env(env_name, seed, policy_hidden_size):
|
|||||||
for i, limit in enumerate(CONFIG['traj_limitation']):
|
for i, limit in enumerate(CONFIG['traj_limitation']):
|
||||||
# Do one evaluation
|
# Do one evaluation
|
||||||
upper_bound = sum(dataset.rets[:limit])/limit
|
upper_bound = sum(dataset.rets[:limit])/limit
|
||||||
checkpoint_dir = get_checkpoint_dir(checkpoint_list, limit)
|
checkpoint_dir = get_checkpoint_dir(checkpoint_list, limit, prefix=prefix)
|
||||||
checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
|
checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
|
||||||
env = gym.make(env_name + '-v1')
|
env = gym.make(env_name + '-v1')
|
||||||
env.seed(seed)
|
env.seed(seed)
|
||||||
@@ -72,9 +74,9 @@ def evaluate_env(env_name, seed, policy_hidden_size):
|
|||||||
policy_fn,
|
policy_fn,
|
||||||
checkpoint_path,
|
checkpoint_path,
|
||||||
timesteps_per_batch=1024,
|
timesteps_per_batch=1024,
|
||||||
number_trajs=100,
|
number_trajs=10,
|
||||||
stochastic_policy=False,
|
stochastic_policy=stochastic,
|
||||||
reuse=(i != 0))
|
reuse=((i != 0) or reuse))
|
||||||
normalized_ret = avg_ret/upper_bound
|
normalized_ret = avg_ret/upper_bound
|
||||||
print('Upper bound: {}, evaluation returns: {}, normalized scores: {}'.format(
|
print('Upper bound: {}, evaluation returns: {}, normalized scores: {}'.format(
|
||||||
upper_bound, avg_ret, normalized_ret))
|
upper_bound, avg_ret, normalized_ret))
|
||||||
@@ -87,25 +89,37 @@ def evaluate_env(env_name, seed, policy_hidden_size):
|
|||||||
return log
|
return log
|
||||||
|
|
||||||
|
|
||||||
def plot(env_name, log):
|
def plot(env_name, bc_log, gail_log, stochastic):
|
||||||
upper_bound = log['upper_bound']
|
upper_bound = bc_log['upper_bound']
|
||||||
avg_ret = log['avg_ret']
|
bc_avg_ret = bc_log['avg_ret']
|
||||||
|
gail_avg_ret = gail_log['avg_ret']
|
||||||
plt.plot(CONFIG['traj_limitation'], upper_bound)
|
plt.plot(CONFIG['traj_limitation'], upper_bound)
|
||||||
plt.plot(CONFIG['traj_limitation'], avg_ret)
|
plt.plot(CONFIG['traj_limitation'], bc_avg_ret)
|
||||||
|
plt.plot(CONFIG['traj_limitation'], gail_avg_ret)
|
||||||
plt.title('{} unnormalized scores'.format(env_name))
|
plt.title('{} unnormalized scores'.format(env_name))
|
||||||
plt.legend(['expert', 'imitator'], loc='lower left')
|
plt.legend(['expert', 'bc-imitator', 'gail-imitator'], loc='lower right')
|
||||||
plt.grid(b=True, which='major', color='gray', linestyle='--')
|
plt.grid(b=True, which='major', color='gray', linestyle='--')
|
||||||
plt.savefig('result/{}-unnormalized-scores.png'.format(env_name))
|
if stochastic:
|
||||||
|
title_name = 'result/{}-unnormalized-stochastic-scores.png'.format(env_name)
|
||||||
|
else:
|
||||||
|
title_name = 'result/{}-unnormalized-deterministic-scores.png'.format(env_name)
|
||||||
|
plt.savefig(title_name)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
normalized_ret = log['normalized_ret']
|
bc_normalized_ret = bc_log['normalized_ret']
|
||||||
|
gail_normalized_ret = gail_log['normalized_ret']
|
||||||
plt.plot(CONFIG['traj_limitation'], np.ones(len(CONFIG['traj_limitation'])))
|
plt.plot(CONFIG['traj_limitation'], np.ones(len(CONFIG['traj_limitation'])))
|
||||||
plt.plot(CONFIG['traj_limitation'], normalized_ret)
|
plt.plot(CONFIG['traj_limitation'], bc_normalized_ret)
|
||||||
|
plt.plot(CONFIG['traj_limitation'], gail_normalized_ret)
|
||||||
plt.title('{} normalized scores'.format(env_name))
|
plt.title('{} normalized scores'.format(env_name))
|
||||||
plt.legend(['expert', 'imitator'], loc='lower left')
|
plt.legend(['expert', 'bc-imitator', 'gail-imitator'], loc='lower right')
|
||||||
plt.grid(b=True, which='major', color='gray', linestyle='--')
|
plt.grid(b=True, which='major', color='gray', linestyle='--')
|
||||||
|
if stochastic:
|
||||||
|
title_name = 'result/{}-normalized-stochastic-scores.png'.format(env_name)
|
||||||
|
else:
|
||||||
|
title_name = 'result/{}-normalized-deterministic-scores.png'.format(env_name)
|
||||||
plt.ylim(0, 1.6)
|
plt.ylim(0, 1.6)
|
||||||
plt.savefig('result/{}-normalized-scores.png'.format(env_name))
|
plt.savefig(title_name)
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
@@ -113,10 +127,15 @@ def main(args):
|
|||||||
U.make_session(num_cpu=1).__enter__()
|
U.make_session(num_cpu=1).__enter__()
|
||||||
set_global_seeds(args.seed)
|
set_global_seeds(args.seed)
|
||||||
print('Evaluating {}'.format(args.env))
|
print('Evaluating {}'.format(args.env))
|
||||||
log = evaluate_env(args.env, args.seed, args.policy_hidden_size)
|
bc_log = evaluate_env(args.env, args.seed, args.policy_hidden_size,
|
||||||
|
args.stochastic_policy, False, 'BC')
|
||||||
print('Evaluation for {}'.format(args.env))
|
print('Evaluation for {}'.format(args.env))
|
||||||
print(log)
|
print(bc_log)
|
||||||
plot(args.env, log)
|
gail_log = evaluate_env(args.env, args.seed, args.policy_hidden_size,
|
||||||
|
args.stochastic_policy, True, 'gail')
|
||||||
|
print('Evaluation for {}'.format(args.env))
|
||||||
|
print(gail_log)
|
||||||
|
plot(args.env, bc_log, gail_log, args.stochastic_policy)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 43 KiB |
Before Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 44 KiB |
BIN
baselines/gail/result/Hopper-normalized-deterministic-scores.png
Normal file
After Width: | Height: | Size: 30 KiB |
Before Width: | Height: | Size: 25 KiB |
BIN
baselines/gail/result/Hopper-normalized-stochastic-scores.png
Normal file
After Width: | Height: | Size: 34 KiB |
After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 40 KiB |
BIN
baselines/gail/result/Hopper-unnormalized-stochastic-scores.png
Normal file
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 35 KiB |
Before Width: | Height: | Size: 30 KiB |
BIN
baselines/gail/result/Humanoid-normalized-stochastic-scores.png
Normal file
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 43 KiB |
Before Width: | Height: | Size: 37 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 45 KiB |
Before Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 31 KiB |
Before Width: | Height: | Size: 27 KiB |
BIN
baselines/gail/result/Walker2d-normalized-stochastic-scores.png
Normal file
After Width: | Height: | Size: 32 KiB |
After Width: | Height: | Size: 38 KiB |
Before Width: | Height: | Size: 43 KiB |
After Width: | Height: | Size: 38 KiB |
@@ -1,19 +1,35 @@
|
|||||||
# Results for GAIL on Mujoco
|
# Results for GAIL on Mujoco
|
||||||
|
|
||||||
Here's the extensive results for applying GAIL on Mujoco environment, including
|
Here's the extensive results for applying GAIL/BC on Mujoco environments, including
|
||||||
Hopper, Walker2d, HalfCheetah, Humanoid, HumanoidStandup. For all environments, the
|
Hopper, Walker2d, HalfCheetah, Humanoid, HumanoidStandup. Eery imitator is evaluated with seed to be 0.
|
||||||
|
|
||||||
|
## details about GAIL imitator
|
||||||
|
|
||||||
|
For all environments, the
|
||||||
imitator is trained with 1, 5, 10, 50 trajectories, where each trajectory contains at most
|
imitator is trained with 1, 5, 10, 50 trajectories, where each trajectory contains at most
|
||||||
1024 transitions, and seed 0, 1, 2, 3, respectively.
|
1024 transitions, and seed 0, 1, 2, 3, respectively.
|
||||||
|
|
||||||
|
### details about the BC imitators
|
||||||
|
|
||||||
|
All BC imitators are trained with seed 0.
|
||||||
|
|
||||||
## Results
|
## Results
|
||||||
|
|
||||||
|
### Determinstic Polciy (Set std=0)
|
||||||
| | Un-normalized | Normalized |
|
| | Un-normalized | Normalized |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| Hopper-v1 | <img src='Hopper-unnormalized-scores.png'> | <img src='Hopper-normalized-scores.png'> |
|
| Hopper-v1 | <img src='Hopper-unnormalized-deterministic-scores.png'> | <img src='Hopper-normalized-deterministic-scores.png'> |
|
||||||
| HalfCheetah-v1 | <img src='HalfCheetah-unnormalized-scores.png'> | <img src='HalfCheetah-normalized-scores.png'> |
|
| HalfCheetah-v1 | <img src='HalfCheetah-unnormalized-deterministic-scores.png'> | <img src='HalfCheetah-normalized-deterministic-scores.png'> |
|
||||||
| Walker2d-v1 | <img src='Walker2d-unnormalized-scores.png'> | <img src='Walker2d-normalized-scores.png'> |
|
| Walker2d-v1 | <img src='Walker2d-unnormalized-deterministic-scores.png'> | <img src='Walker2d-normalized-deterministic-scores.png'> |
|
||||||
| Humanoid-v1 | <img src='Humanoid-unnormalized-scores.png'> | <img src='Humanoid-normalized-scores.png'> |
|
| Humanoid-v1 | <img src='Humanoid-unnormalized-deterministic-scores.png'> | <img src='Humanoid-normalized-deterministic-scores.png'> |
|
||||||
| HumanoidStandup-v1 | <img src='HumanoidStandup-unnormalized-scores.png'> | <img src='HumanoidStandup-normalized-scores.png'> |
|
| HumanoidStandup-v1 | <img src='HumanoidStandup-unnormalized-deterministic-scores.png'> | <img src='HumanoidStandup-normalized-deterministic-scores.png'> |
|
||||||
|
|
||||||
|
### Stochatic Policy
|
||||||
|
| | Un-normalized | Normalized |
|
||||||
|
|---|---|---|
|
||||||
|
| Hopper-v1 | <img src='Hopper-unnormalized-stochastic-scores.png'> | <img src='Hopper-normalized-stochastic-scores.png'> |
|
||||||
|
| HalfCheetah-v1 | <img src='HalfCheetah-unnormalized-stochastic-scores.png'> | <img src='HalfCheetah-normalized-stochastic-scores.png'> |
|
||||||
|
| Walker2d-v1 | <img src='Walker2d-unnormalized-stochastic-scores.png'> | <img src='Walker2d-normalized-stochastic-scores.png'> |
|
||||||
|
| Humanoid-v1 | <img src='Humanoid-unnormalized-stochastic-scores.png'> | <img src='Humanoid-normalized-stochastic-scores.png'> |
|
||||||
|
| HumanoidStandup-v1 | <img src='HumanoidStandup-unnormalized-stochastic-scores.png'> | <img src='HumanoidStandup-normalized-stochastic-scores.png'> |
|
||||||
|
|
||||||
### details
|
|
||||||
Each imitator is evaluated with random seed equals to 0.
|
|
||||||
|