Fixes according to feedback to pull #104

This commit is contained in:
Iaroslav Shcherbatyi
2016-05-28 11:30:40 +02:00
parent 7371debe38
commit 5c8e34029d
4 changed files with 52 additions and 24 deletions

View File

@@ -1,23 +0,0 @@
'''
Created on May 14, 2016
@author: iaroslav
'''
import gym
import time
env = gym.make('ConvergenceControl-v0')
for i_episode in xrange(20):
observation = env.reset()
for t in xrange(100):
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
env.render()
if done:
print "Final result:"
env.render()
break

View File

@@ -27,6 +27,11 @@ def test_env(spec):
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point)) logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
return return
# Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104
if spec._entry_point.startswith('gym.envs.parameter_tuning:'):
logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point))
return
env = spec.make() env = spec.make()
ob_space = env.observation_space ob_space = env.observation_space
act_space = env.action_space act_space = env.action_space

View File

@@ -51,6 +51,12 @@ add_group(
description='Continuous control tasks, running in a fast physics simulator.' description='Continuous control tasks, running in a fast physics simulator.'
) )
add_group(
id='parameter_tuning',
name='Parameter tuning',
description='Tune parameters of costly experiments to obtain better outcomes.'
)
add_group( add_group(
id='toy_text', id='toy_text',
name='Toy text', name='Toy text',
@@ -340,6 +346,44 @@ The robot model was originally created by Tassa et al. [Tassa12]_.
""", """,
) )
# parameter tuning
add_task(
id='ConvergenceControl-v0',
group='parameter_tuning',
experimental=True,
summary="Set and adjust training parameters of Deep CNN to improve the end result.",
description ="""\
Agent can adjust parameters like step size, momentum etc during
training of deep convolutional neural net to improve its convergence / quality
of end - result. One episode in this environment is a training of one neural net
for 20 epochs. Agent can adjust parameters in the beginning of every epoch.
""",
background="""\
Parameters that agent can adjust are learning rate and momentum coefficients for SGD,
batch size, l1 and l2 penalty. As a feedback, agent receives # of instances / labels
in dataset, description of network architecture, and validation accuracy for every epoch.
Architecture of neural network and dataset used are selected randomly at the beginning
of an episode. Datasets used are MNIST, CIFAR10, CIFAR100. Network architectures contain
multilayer convnets 66 % of the time, and are [classic] feedforward nets otherwise.
Number of instances in datasets are chosen at random in range from around 100% to 10%
such that adjustment of l1, l2 penalty coefficients makes more difference.
Let the best accuracy achieved so far at every epoch be denoted as a; Then reward at
every step is a + a*a. On the one hand side, this encourages fast convergence, as it
improves cumulative reward over the episode. On the other hand side, improving best
achieved accuracy is expected to quadratically improve cumulative reward, thus
encouraging agent to converge fast while achieving high best validation accuracy value.
As the number of labels increases, learning problem becomes more difficult for a fixed
dataset size. In order to avoid for the agent to ignore more complex datasets, on which
accuracy is low and concentrate on simple cases which bring bulk of reward, accuracy is
normalized by the number of labels in a dataset.
""",
)
# toy text # toy text
add_task( add_task(

View File

@@ -22,7 +22,8 @@ setup(name='gym',
'all': ['atari_py>=0.0.17', 'Pillow', 'pyglet', 'PyOpenGL', 'all': ['atari_py>=0.0.17', 'Pillow', 'pyglet', 'PyOpenGL',
'pachi-py>=0.0.19', 'pachi-py>=0.0.19',
'box2d-py', 'box2d-py',
'mujoco_py>=0.4.3', 'imageio'], 'mujoco_py>=0.4.3', 'imageio',
'keras', 'theano'],
# Environment-specific dependencies. Keep these in sync with # Environment-specific dependencies. Keep these in sync with
# 'all'! # 'all'!
@@ -31,6 +32,7 @@ setup(name='gym',
'box2d': ['box2d-py'], 'box2d': ['box2d-py'],
'classic_control': ['pyglet', 'PyOpenGL'], 'classic_control': ['pyglet', 'PyOpenGL'],
'mujoco': ['mujoco_py>=0.4.3', 'imageio'], 'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
'parameter_tuning': ['keras', 'theano'],
}, },
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']}, package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
tests_require=['nose2', 'mock'], tests_require=['nose2', 'mock'],