add download link to readme and add description to python file

2017-12-07 12:08:20 -08:00
parent 000033973b
commit 11604f7cc9
8 changed files with 40 additions and 6 deletions
--- a/baselines/gail/README.md
+++ b/baselines/gail/README.md
@@ -1,14 +1,20 @@
-# GAIL
+# Generative Adversarial Imitation Learning (GAIL)
 - Original paper: https://arxiv.org/abs/1606.03476
 For results benchmarking on MuJoCo, please navigate to [here](result/gail-result.md)
 ## If you want to train an imitation learning agent
 ### Step 1: Download expert data
 Download the expert data into `./data`
-### Step 2: Imitation learning
+Download [Link](https://drive.google.com/drive/folders/1h-bK09Emrteu7vUXllZLRqTx7iNOaxGI?usp=sharing)
 ### Step 2: Run GAIL
 Run with single thread:
 ```bash
 python -m baselines.gail.run_mujoco
@@ -22,4 +28,14 @@ mpirun -np 16 python -m baselines.gail.run_mujoco
 See help (`-h`) for more options.
 #### In case you want to run Behavior Cloning (BC)
 ```bash
 python -m baselines.gail.behavior_clone
 ```
 See help (`-h`) for more options.
 ## Reference repositories
 - @openai/imitation
 - @carpedm20/deep-rl-tensorflow
--- a/baselines/gail/adversary.py
+++ b/baselines/gail/adversary.py
@@ -1,4 +1,7 @@
-# Reference: https://github.com/openai/imitation
+'''
 Reference: https://github.com/openai/imitation
 I follow the architecture from the official repository
 '''
 import tensorflow as tf
 import numpy as np
--- a/baselines/gail/behavior_clone.py
+++ b/baselines/gail/behavior_clone.py
@@ -1,3 +1,7 @@
 '''
 The code is used to train BC imitator, or pretrained GAIL imitator
 '''
 import argparse
 import tempfile
 import os.path as osp
--- a/baselines/gail/dataset/mujoco_dset.py
+++ b/baselines/gail/dataset/mujoco_dset.py
@@ -1,3 +1,10 @@
 '''
 Data structure of the input .npz:
 the data is save in python dictionary format with keys: 'acs', 'ep_rets', 'rews', 'obs'
 the values of each item is a list storing the expert trajectory sequentially
 a transition can be: (data['obs'][t], data['acs'][t], data['obs'][t+1]) and get reward data['rews'][t]
 '''
 from baselines import logger
 import numpy as np
--- a/baselines/gail/gail-eval.py
+++ b/baselines/gail/gail-eval.py
@@ -1,5 +1,5 @@
 '''
-This code is used to evalaute the imitators trained with different trajectories
+This code is used to evalaute the imitators trained with different number of trajectories
 and plot the results in the same figure for easy comparison.
 '''
--- a/baselines/gail/mlp_policy.py
+++ b/baselines/gail/mlp_policy.py
@@ -1,5 +1,5 @@
 '''
-from baselines/ppo1/mlp_policy.py
+from baselines/ppo1/mlp_policy.py and add simple modification
 (1) add reuse argument
 (2) cache the `stochastic` placeholder
 '''
--- a/baselines/gail/run_mujoco.py
+++ b/baselines/gail/run_mujoco.py
@@ -1,5 +1,5 @@
 '''
-Disclaimer: this code is highly based on trpo_mpi at openai/baselines and openai/imitation
+Disclaimer: this code is highly based on trpo_mpi at @openai/baselines and @openai/imitation
 '''
 import argparse
--- a/baselines/gail/trpo_mpi.py
+++ b/baselines/gail/trpo_mpi.py
@@ -1,3 +1,7 @@
 '''
 Disclaimer: The trpo part highly rely on trpo_mpi at @openai/baselines
 '''
 import time
 import os
 from contextlib import contextmanager