Fixes according to feedback to pull #104

2025-08-01 22:11:25 +00:00 · 2016-05-28 11:30:40 +02:00
parent 7371debe38
commit 5c8e34029d
4 changed files with 52 additions and 24 deletions
--- a/gym/envs/parameter_tuning/test_tuning.py
+++ b/gym/envs/parameter_tuning/test_tuning.py
@@ -1,23 +0,0 @@
 '''
 Created on May 14, 2016
@author: iaroslav
 '''
 import gym
 import time
 env = gym.make('ConvergenceControl-v0')
 for i_episode in xrange(20):
    observation = env.reset()
    for t in xrange(100):
        action = env.action_space.sample()        
        observation, reward, done, info = env.step(action)
        env.render()
        if done:
            print "Final result:"
            env.render()
            break
--- a/gym/envs/tests/test_envs.py
+++ b/gym/envs/tests/test_envs.py
@@ -27,6 +27,11 @@ def test_env(spec):
        logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
        return
    # Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104
    if spec._entry_point.startswith('gym.envs.parameter_tuning:'):
        logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point))
        return
    env = spec.make()
    ob_space = env.observation_space
    act_space = env.action_space
--- a/gym/scoreboard/init.py
+++ b/gym/scoreboard/init.py
@@ -51,6 +51,12 @@ add_group(
    description='Continuous control tasks, running in a fast physics simulator.'
 )
 add_group(
    id='parameter_tuning',
    name='Parameter tuning',
    description='Tune parameters of costly experiments to obtain better outcomes.'
 )
 add_group(
    id='toy_text',
    name='Toy text',
@@ -340,6 +346,44 @@ The robot model was originally created by Tassa et al. [Tassa12]_.
 """,
 )
 # parameter tuning
 add_task(
    id='ConvergenceControl-v0',
    group='parameter_tuning',
    experimental=True,
    summary="Set and adjust training parameters of Deep CNN to improve the end result.",
    description ="""\
    Agent can adjust parameters like step size, momentum etc during
    training of deep convolutional neural net to improve its convergence / quality
    of end - result. One episode in this environment is a training of one neural net
    for 20 epochs. Agent can adjust parameters in the beginning of every epoch.
 """,
    background="""\
 Parameters that agent can adjust are learning rate and momentum coefficients for SGD,
 batch size, l1 and l2 penalty. As a feedback, agent receives # of instances / labels
 in dataset, description of network architecture, and validation accuracy for every epoch.
 Architecture of neural network and dataset used are selected randomly at the beginning
 of an episode. Datasets used are MNIST, CIFAR10, CIFAR100. Network architectures contain
 multilayer convnets 66 % of the time, and are [classic] feedforward nets otherwise.
 Number of instances in datasets are chosen at random in range from around 100% to 10%
 such that adjustment of l1, l2 penalty coefficients makes more difference.
 Let the best accuracy achieved so far at every epoch be denoted as a; Then reward at
 every step is a + a*a. On the one hand side, this encourages fast convergence, as it
 improves cumulative reward over the episode. On the other hand side, improving best
 achieved accuracy is expected to quadratically improve cumulative reward, thus
 encouraging agent to converge fast while achieving high best validation accuracy value.
 As the number of labels increases, learning problem becomes more difficult for a fixed
 dataset size. In order to avoid for the agent to ignore more complex datasets, on which
 accuracy is low and concentrate on simple cases which bring bulk of reward, accuracy is
 normalized by the number of labels in a dataset.
 """,
 )
 # toy text
 add_task(
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,8 @@ setup(name='gym',
          'all': ['atari_py>=0.0.17', 'Pillow', 'pyglet', 'PyOpenGL',
                  'pachi-py>=0.0.19',
                  'box2d-py',
-                  'mujoco_py>=0.4.3', 'imageio'],
+                  'mujoco_py>=0.4.3', 'imageio',
                  'keras', 'theano'],
          # Environment-specific dependencies. Keep these in sync with
          # 'all'!
@@ -31,6 +32,7 @@ setup(name='gym',
          'box2d': ['box2d-py'],
          'classic_control': ['pyglet', 'PyOpenGL'],
          'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
          'parameter_tuning': ['keras', 'theano'],
      },
      package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
      tests_require=['nose2', 'mock'],