mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 22:11:25 +00:00
Fixes according to feedback to pull #104
This commit is contained in:
@@ -1,23 +0,0 @@
|
||||
'''
|
||||
Created on May 14, 2016
|
||||
|
||||
@author: iaroslav
|
||||
'''
|
||||
import gym
|
||||
import time
|
||||
|
||||
env = gym.make('ConvergenceControl-v0')
|
||||
for i_episode in xrange(20):
|
||||
|
||||
observation = env.reset()
|
||||
|
||||
for t in xrange(100):
|
||||
|
||||
action = env.action_space.sample()
|
||||
observation, reward, done, info = env.step(action)
|
||||
env.render()
|
||||
|
||||
if done:
|
||||
print "Final result:"
|
||||
env.render()
|
||||
break
|
@@ -27,6 +27,11 @@ def test_env(spec):
|
||||
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
|
||||
return
|
||||
|
||||
# Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104
|
||||
if spec._entry_point.startswith('gym.envs.parameter_tuning:'):
|
||||
logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point))
|
||||
return
|
||||
|
||||
env = spec.make()
|
||||
ob_space = env.observation_space
|
||||
act_space = env.action_space
|
||||
|
@@ -51,6 +51,12 @@ add_group(
|
||||
description='Continuous control tasks, running in a fast physics simulator.'
|
||||
)
|
||||
|
||||
add_group(
|
||||
id='parameter_tuning',
|
||||
name='Parameter tuning',
|
||||
description='Tune parameters of costly experiments to obtain better outcomes.'
|
||||
)
|
||||
|
||||
add_group(
|
||||
id='toy_text',
|
||||
name='Toy text',
|
||||
@@ -340,6 +346,44 @@ The robot model was originally created by Tassa et al. [Tassa12]_.
|
||||
""",
|
||||
)
|
||||
|
||||
# parameter tuning
|
||||
|
||||
add_task(
|
||||
id='ConvergenceControl-v0',
|
||||
group='parameter_tuning',
|
||||
experimental=True,
|
||||
summary="Set and adjust training parameters of Deep CNN to improve the end result.",
|
||||
description ="""\
|
||||
Agent can adjust parameters like step size, momentum etc during
|
||||
training of deep convolutional neural net to improve its convergence / quality
|
||||
of end - result. One episode in this environment is a training of one neural net
|
||||
for 20 epochs. Agent can adjust parameters in the beginning of every epoch.
|
||||
""",
|
||||
background="""\
|
||||
Parameters that agent can adjust are learning rate and momentum coefficients for SGD,
|
||||
batch size, l1 and l2 penalty. As a feedback, agent receives # of instances / labels
|
||||
in dataset, description of network architecture, and validation accuracy for every epoch.
|
||||
|
||||
Architecture of neural network and dataset used are selected randomly at the beginning
|
||||
of an episode. Datasets used are MNIST, CIFAR10, CIFAR100. Network architectures contain
|
||||
multilayer convnets 66 % of the time, and are [classic] feedforward nets otherwise.
|
||||
|
||||
Number of instances in datasets are chosen at random in range from around 100% to 10%
|
||||
such that adjustment of l1, l2 penalty coefficients makes more difference.
|
||||
|
||||
Let the best accuracy achieved so far at every epoch be denoted as a; Then reward at
|
||||
every step is a + a*a. On the one hand side, this encourages fast convergence, as it
|
||||
improves cumulative reward over the episode. On the other hand side, improving best
|
||||
achieved accuracy is expected to quadratically improve cumulative reward, thus
|
||||
encouraging agent to converge fast while achieving high best validation accuracy value.
|
||||
|
||||
As the number of labels increases, learning problem becomes more difficult for a fixed
|
||||
dataset size. In order to avoid for the agent to ignore more complex datasets, on which
|
||||
accuracy is low and concentrate on simple cases which bring bulk of reward, accuracy is
|
||||
normalized by the number of labels in a dataset.
|
||||
""",
|
||||
)
|
||||
|
||||
# toy text
|
||||
|
||||
add_task(
|
||||
|
4
setup.py
4
setup.py
@@ -22,7 +22,8 @@ setup(name='gym',
|
||||
'all': ['atari_py>=0.0.17', 'Pillow', 'pyglet', 'PyOpenGL',
|
||||
'pachi-py>=0.0.19',
|
||||
'box2d-py',
|
||||
'mujoco_py>=0.4.3', 'imageio'],
|
||||
'mujoco_py>=0.4.3', 'imageio',
|
||||
'keras', 'theano'],
|
||||
|
||||
# Environment-specific dependencies. Keep these in sync with
|
||||
# 'all'!
|
||||
@@ -31,6 +32,7 @@ setup(name='gym',
|
||||
'box2d': ['box2d-py'],
|
||||
'classic_control': ['pyglet', 'PyOpenGL'],
|
||||
'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
|
||||
'parameter_tuning': ['keras', 'theano'],
|
||||
},
|
||||
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
|
||||
tests_require=['nose2', 'mock'],
|
||||
|
Reference in New Issue
Block a user