Merge pull request #22 from ngc92/doc_fixes

docstring and comment fixes
This commit is contained in:
Szymon Sidor
2017-06-04 00:41:34 -07:00
committed by GitHub
2 changed files with 5 additions and 5 deletions

View File

@@ -27,7 +27,7 @@ The functions in this file can are used to create the following functions:
Function that takes a transition (s,a,r,s') and optimizes Bellman equation's error:
td_error = Q(s,a) - (r + gamma * max_a' Q(s', a'))
loss = hauber_loss[td_error]
loss = huber_loss[td_error]
Parameters
----------
@@ -127,12 +127,12 @@ def build_act(make_obs_ph, q_func, num_actions, scope="deepq", reuse=None):
def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=None, gamma=1.0, double_q=True, scope="deepq", reuse=None):
"""Creates the act function:
"""Creates the train function:
Parameters
----------
make_obs_ph: str -> tf.placeholder or TfInput
a function that take a name and creates a placeholder of input with that name
a function that takes a name and creates a placeholder of input with that name
q_func: (tf.Variable, int, str, bool) -> tf.Variable
the model that takes the following inputs:
observation_in: object
@@ -150,7 +150,7 @@ def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=
optimizer: tf.train.Optimizer
optimizer to use for the Q-learning objective.
grad_norm_clipping: float or None
clip graident norms to this value. If None no clipping is performed.
clip gradient norms to this value. If None no clipping is performed.
gamma: float
discount rate.
double_q: bool

View File

@@ -123,11 +123,11 @@ def learn(env,
final value of random action probability
train_freq: int
update the model every `train_freq` steps.
set to None to disable printing
batch_size: int
size of a batched sampled from replay buffer for training
print_freq: int
how often to print out training progress
set to None to disable printing
checkpoint_freq: int
how often to save the model. This is so that the best version is restored
at the end of the training. If you do not wish to restore the best version at