add download link to readme and add description to python file

2017-12-07 12:08:20 -08:00
parent 000033973b
commit 11604f7cc9
8 changed files with 40 additions and 6 deletions
--- a/baselines/gail/README.md
+++ b/baselines/gail/README.md
@@ -1,14 +1,20 @@
-# GAIL
+# Generative Adversarial Imitation Learning (GAIL)

 - Original paper: https://arxiv.org/abs/1606.03476

+For results benchmarking on MuJoCo, please navigate to [here](result/gail-result.md)
+
 ## If you want to train an imitation learning agent

 ### Step 1: Download expert data

 Download the expert data into `./data`

-### Step 2: Imitation learning
+Download [Link](https://drive.google.com/drive/folders/1h-bK09Emrteu7vUXllZLRqTx7iNOaxGI?usp=sharing)
+
+### Step 2: Run GAIL
+
+Run with single thread:

 ```bash
 python -m baselines.gail.run_mujoco
@@ -22,4 +28,14 @@ mpirun -np 16 python -m baselines.gail.run_mujoco

 See help (`-h`) for more options.

+#### In case you want to run Behavior Cloning (BC)

+```bash
+python -m baselines.gail.behavior_clone
+```
+
+See help (`-h`) for more options.
+
+## Reference repositories
+- @openai/imitation
+- @carpedm20/deep-rl-tensorflow
--- a/baselines/gail/adversary.py
+++ b/baselines/gail/adversary.py
@@ -1,4 +1,7 @@
-# Reference: https://github.com/openai/imitation
+'''
+Reference: https://github.com/openai/imitation
+I follow the architecture from the official repository
+'''
 import tensorflow as tf
 import numpy as np

--- a/baselines/gail/behavior_clone.py
+++ b/baselines/gail/behavior_clone.py
@@ -1,3 +1,7 @@
+'''
+The code is used to train BC imitator, or pretrained GAIL imitator
+'''
+
 import argparse
 import tempfile
 import os.path as osp
--- a/baselines/gail/dataset/mujoco_dset.py
+++ b/baselines/gail/dataset/mujoco_dset.py
@@ -1,3 +1,10 @@
+'''
+Data structure of the input .npz:
+the data is save in python dictionary format with keys: 'acs', 'ep_rets', 'rews', 'obs'
+the values of each item is a list storing the expert trajectory sequentially
+a transition can be: (data['obs'][t], data['acs'][t], data['obs'][t+1]) and get reward data['rews'][t]
+'''
+
 from baselines import logger
 import numpy as np

--- a/baselines/gail/gail-eval.py
+++ b/baselines/gail/gail-eval.py
@@ -1,5 +1,5 @@
 '''
-This code is used to evalaute the imitators trained with different trajectories
+This code is used to evalaute the imitators trained with different number of trajectories
 and plot the results in the same figure for easy comparison.
 '''

--- a/baselines/gail/mlp_policy.py
+++ b/baselines/gail/mlp_policy.py
@@ -1,5 +1,5 @@
 '''
-from baselines/ppo1/mlp_policy.py
+from baselines/ppo1/mlp_policy.py and add simple modification
 (1) add reuse argument
 (2) cache the `stochastic` placeholder
 '''
--- a/baselines/gail/run_mujoco.py
+++ b/baselines/gail/run_mujoco.py
@@ -1,5 +1,5 @@
 '''
-Disclaimer: this code is highly based on trpo_mpi at openai/baselines and openai/imitation
+Disclaimer: this code is highly based on trpo_mpi at @openai/baselines and @openai/imitation
 '''

 import argparse
--- a/baselines/gail/trpo_mpi.py
+++ b/baselines/gail/trpo_mpi.py
@@ -1,3 +1,7 @@
+'''
+Disclaimer: The trpo part highly rely on trpo_mpi at @openai/baselines
+'''
+
 import time
 import os
 from contextlib import contextmanager