mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 22:11:25 +00:00
Fixes according to feedback to pull #104
This commit is contained in:
@@ -1,23 +0,0 @@
|
|||||||
'''
|
|
||||||
Created on May 14, 2016
|
|
||||||
|
|
||||||
@author: iaroslav
|
|
||||||
'''
|
|
||||||
import gym
|
|
||||||
import time
|
|
||||||
|
|
||||||
env = gym.make('ConvergenceControl-v0')
|
|
||||||
for i_episode in xrange(20):
|
|
||||||
|
|
||||||
observation = env.reset()
|
|
||||||
|
|
||||||
for t in xrange(100):
|
|
||||||
|
|
||||||
action = env.action_space.sample()
|
|
||||||
observation, reward, done, info = env.step(action)
|
|
||||||
env.render()
|
|
||||||
|
|
||||||
if done:
|
|
||||||
print "Final result:"
|
|
||||||
env.render()
|
|
||||||
break
|
|
@@ -27,6 +27,11 @@ def test_env(spec):
|
|||||||
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
|
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104
|
||||||
|
if spec._entry_point.startswith('gym.envs.parameter_tuning:'):
|
||||||
|
logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point))
|
||||||
|
return
|
||||||
|
|
||||||
env = spec.make()
|
env = spec.make()
|
||||||
ob_space = env.observation_space
|
ob_space = env.observation_space
|
||||||
act_space = env.action_space
|
act_space = env.action_space
|
||||||
|
@@ -51,6 +51,12 @@ add_group(
|
|||||||
description='Continuous control tasks, running in a fast physics simulator.'
|
description='Continuous control tasks, running in a fast physics simulator.'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
add_group(
|
||||||
|
id='parameter_tuning',
|
||||||
|
name='Parameter tuning',
|
||||||
|
description='Tune parameters of costly experiments to obtain better outcomes.'
|
||||||
|
)
|
||||||
|
|
||||||
add_group(
|
add_group(
|
||||||
id='toy_text',
|
id='toy_text',
|
||||||
name='Toy text',
|
name='Toy text',
|
||||||
@@ -340,6 +346,44 @@ The robot model was originally created by Tassa et al. [Tassa12]_.
|
|||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# parameter tuning
|
||||||
|
|
||||||
|
add_task(
|
||||||
|
id='ConvergenceControl-v0',
|
||||||
|
group='parameter_tuning',
|
||||||
|
experimental=True,
|
||||||
|
summary="Set and adjust training parameters of Deep CNN to improve the end result.",
|
||||||
|
description ="""\
|
||||||
|
Agent can adjust parameters like step size, momentum etc during
|
||||||
|
training of deep convolutional neural net to improve its convergence / quality
|
||||||
|
of end - result. One episode in this environment is a training of one neural net
|
||||||
|
for 20 epochs. Agent can adjust parameters in the beginning of every epoch.
|
||||||
|
""",
|
||||||
|
background="""\
|
||||||
|
Parameters that agent can adjust are learning rate and momentum coefficients for SGD,
|
||||||
|
batch size, l1 and l2 penalty. As a feedback, agent receives # of instances / labels
|
||||||
|
in dataset, description of network architecture, and validation accuracy for every epoch.
|
||||||
|
|
||||||
|
Architecture of neural network and dataset used are selected randomly at the beginning
|
||||||
|
of an episode. Datasets used are MNIST, CIFAR10, CIFAR100. Network architectures contain
|
||||||
|
multilayer convnets 66 % of the time, and are [classic] feedforward nets otherwise.
|
||||||
|
|
||||||
|
Number of instances in datasets are chosen at random in range from around 100% to 10%
|
||||||
|
such that adjustment of l1, l2 penalty coefficients makes more difference.
|
||||||
|
|
||||||
|
Let the best accuracy achieved so far at every epoch be denoted as a; Then reward at
|
||||||
|
every step is a + a*a. On the one hand side, this encourages fast convergence, as it
|
||||||
|
improves cumulative reward over the episode. On the other hand side, improving best
|
||||||
|
achieved accuracy is expected to quadratically improve cumulative reward, thus
|
||||||
|
encouraging agent to converge fast while achieving high best validation accuracy value.
|
||||||
|
|
||||||
|
As the number of labels increases, learning problem becomes more difficult for a fixed
|
||||||
|
dataset size. In order to avoid for the agent to ignore more complex datasets, on which
|
||||||
|
accuracy is low and concentrate on simple cases which bring bulk of reward, accuracy is
|
||||||
|
normalized by the number of labels in a dataset.
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
# toy text
|
# toy text
|
||||||
|
|
||||||
add_task(
|
add_task(
|
||||||
|
4
setup.py
4
setup.py
@@ -22,7 +22,8 @@ setup(name='gym',
|
|||||||
'all': ['atari_py>=0.0.17', 'Pillow', 'pyglet', 'PyOpenGL',
|
'all': ['atari_py>=0.0.17', 'Pillow', 'pyglet', 'PyOpenGL',
|
||||||
'pachi-py>=0.0.19',
|
'pachi-py>=0.0.19',
|
||||||
'box2d-py',
|
'box2d-py',
|
||||||
'mujoco_py>=0.4.3', 'imageio'],
|
'mujoco_py>=0.4.3', 'imageio',
|
||||||
|
'keras', 'theano'],
|
||||||
|
|
||||||
# Environment-specific dependencies. Keep these in sync with
|
# Environment-specific dependencies. Keep these in sync with
|
||||||
# 'all'!
|
# 'all'!
|
||||||
@@ -31,6 +32,7 @@ setup(name='gym',
|
|||||||
'box2d': ['box2d-py'],
|
'box2d': ['box2d-py'],
|
||||||
'classic_control': ['pyglet', 'PyOpenGL'],
|
'classic_control': ['pyglet', 'PyOpenGL'],
|
||||||
'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
|
'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
|
||||||
|
'parameter_tuning': ['keras', 'theano'],
|
||||||
},
|
},
|
||||||
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
|
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
|
||||||
tests_require=['nose2', 'mock'],
|
tests_require=['nose2', 'mock'],
|
||||||
|
Reference in New Issue
Block a user