Fixes according to feedback to pull #104

2025-08-01 22:11:25 +00:00 · 2016-05-28 11:30:40 +02:00
parent 7371debe38
commit 5c8e34029d
4 changed files with 52 additions and 24 deletions
--- a/gym/envs/parameter_tuning/test_tuning.py
+++ b/gym/envs/parameter_tuning/test_tuning.py
@@ -1,23 +0,0 @@
-'''
-Created on May 14, 2016
-
-@author: iaroslav
-'''
-import gym
-import time
-
-env = gym.make('ConvergenceControl-v0')
-for i_episode in xrange(20):
-    
-    observation = env.reset()
-    
-    for t in xrange(100):
-             
-        action = env.action_space.sample()        
-        observation, reward, done, info = env.step(action)
-        env.render()
-                
-        if done:
-            print "Final result:"
-            env.render()
-            break
--- a/gym/envs/tests/test_envs.py
+++ b/gym/envs/tests/test_envs.py
@@ -27,6 +27,11 @@ def test_env(spec):
        logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
        return

+    # Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104
+    if spec._entry_point.startswith('gym.envs.parameter_tuning:'):
+        logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point))
+        return
+
    env = spec.make()
    ob_space = env.observation_space
    act_space = env.action_space
--- a/gym/scoreboard/init.py
+++ b/gym/scoreboard/init.py
@@ -51,6 +51,12 @@ add_group(
    description='Continuous control tasks, running in a fast physics simulator.'
 )

+add_group(
+    id='parameter_tuning',
+    name='Parameter tuning',
+    description='Tune parameters of costly experiments to obtain better outcomes.'
+)
+
 add_group(
    id='toy_text',
    name='Toy text',
@@ -340,6 +346,44 @@ The robot model was originally created by Tassa et al. [Tassa12]_.
 """,
 )

+# parameter tuning
+
+add_task(
+    id='ConvergenceControl-v0',
+    group='parameter_tuning',
+    experimental=True,
+    summary="Set and adjust training parameters of Deep CNN to improve the end result.",
+    description ="""\
+    Agent can adjust parameters like step size, momentum etc during
+    training of deep convolutional neural net to improve its convergence / quality
+    of end - result. One episode in this environment is a training of one neural net
+    for 20 epochs. Agent can adjust parameters in the beginning of every epoch.
+""",
+    background="""\
+Parameters that agent can adjust are learning rate and momentum coefficients for SGD,
+batch size, l1 and l2 penalty. As a feedback, agent receives # of instances / labels
+in dataset, description of network architecture, and validation accuracy for every epoch.
+
+Architecture of neural network and dataset used are selected randomly at the beginning
+of an episode. Datasets used are MNIST, CIFAR10, CIFAR100. Network architectures contain
+multilayer convnets 66 % of the time, and are [classic] feedforward nets otherwise.
+
+Number of instances in datasets are chosen at random in range from around 100% to 10%
+such that adjustment of l1, l2 penalty coefficients makes more difference.
+
+Let the best accuracy achieved so far at every epoch be denoted as a; Then reward at
+every step is a + a*a. On the one hand side, this encourages fast convergence, as it
+improves cumulative reward over the episode. On the other hand side, improving best
+achieved accuracy is expected to quadratically improve cumulative reward, thus
+encouraging agent to converge fast while achieving high best validation accuracy value.
+
+As the number of labels increases, learning problem becomes more difficult for a fixed
+dataset size. In order to avoid for the agent to ignore more complex datasets, on which
+accuracy is low and concentrate on simple cases which bring bulk of reward, accuracy is
+normalized by the number of labels in a dataset.
+""",
+)
+
 # toy text

 add_task(
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,8 @@ setup(name='gym',
          'all': ['atari_py>=0.0.17', 'Pillow', 'pyglet', 'PyOpenGL',
                  'pachi-py>=0.0.19',
                  'box2d-py',
-                  'mujoco_py>=0.4.3', 'imageio'],
+                  'mujoco_py>=0.4.3', 'imageio',
+                  'keras', 'theano'],

          # Environment-specific dependencies. Keep these in sync with
          # 'all'!
@@ -31,6 +32,7 @@ setup(name='gym',
          'box2d': ['box2d-py'],
          'classic_control': ['pyglet', 'PyOpenGL'],
          'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
+          'parameter_tuning': ['keras', 'theano'],
      },
      package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
      tests_require=['nose2', 'mock'],