Merge pull request #22 from ngc92/doc_fixes

docstring and comment fixes
2017-06-04 00:41:34 -07:00
parent fba0ac30ca 02919483f2
commit 184440ffd3
2 changed files with 5 additions and 5 deletions
--- a/baselines/deepq/build_graph.py
+++ b/baselines/deepq/build_graph.py
@@ -27,7 +27,7 @@ The functions in this file can are used to create the following functions:
    Function that takes a transition (s,a,r,s') and optimizes Bellman equation's error:

        td_error = Q(s,a) - (r + gamma * max_a' Q(s', a'))
-        loss = hauber_loss[td_error]
+        loss = huber_loss[td_error]

    Parameters
    ----------
@@ -127,12 +127,12 @@ def build_act(make_obs_ph, q_func, num_actions, scope="deepq", reuse=None):


 def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=None, gamma=1.0, double_q=True, scope="deepq", reuse=None):
-    """Creates the act function:
+    """Creates the train function:

    Parameters
    ----------
    make_obs_ph: str -> tf.placeholder or TfInput
-        a function that take a name and creates a placeholder of input with that name
+        a function that takes a name and creates a placeholder of input with that name
    q_func: (tf.Variable, int, str, bool) -> tf.Variable
        the model that takes the following inputs:
            observation_in: object
@@ -150,7 +150,7 @@ def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=
    optimizer: tf.train.Optimizer
        optimizer to use for the Q-learning objective.
    grad_norm_clipping: float or None
-        clip graident norms to this value. If None no clipping is performed.
+        clip gradient norms to this value. If None no clipping is performed.
    gamma: float
        discount rate.
    double_q: bool
--- a/baselines/deepq/simple.py
+++ b/baselines/deepq/simple.py
@@ -123,11 +123,11 @@ def learn(env,
        final value of random action probability
    train_freq: int
        update the model every `train_freq` steps.
-        set to None to disable printing
    batch_size: int
        size of a batched sampled from replay buffer for training
    print_freq: int
        how often to print out training progress
+        set to None to disable printing
    checkpoint_freq: int
        how often to save the model. This is so that the best version is restored
        at the end of the training. If you do not wish to restore the best version at