updated docstring for deepq

2018-10-19 17:50:54 -07:00
parent d0cc325e14
commit bd390c2ade
1 changed files with 6 additions and 10 deletions
--- a/baselines/deepq/deepq.py
+++ b/baselines/deepq/deepq.py
@@ -124,16 +124,12 @@ def learn(env,
    -------
    env: gym.Env
        environment to train on
-    q_func: (tf.Variable, int, str, bool) -> tf.Variable
-        the model that takes the following inputs:
-            observation_in: object
-                the output of observation placeholder
-            num_actions: int
-                number of actions
-            scope: str
-            reuse: bool
-                should be passed to outer variable scope
-        and returns a tensor of shape (batch_size, num_actions) with values of every action.
+    network: string or a function
+        neural network to use as a q function approximator. If string, has to be one of the names of registered models in baselines.common.models
+        (mlp, cnn, conv_only). If a function, should take an observation tensor and return a latent variable tensor, which
+        will be mapped to the Q function heads (see build_q_func in baselines.deepq.models for details on that)
+    seed: int or None
+        prng seed. The runs with the same seed "should" give the same results. If None, no seeding is used.
    lr: float
        learning rate for adam optimizer
    total_timesteps: int