Files
baselines/baselines/common/models.py
Karl Cobbe ddcab1606d Procgen Benchmark Updates (#328)
* directory cleanup

* logging, num_experiments

* fixes

* cleanup

* gin fixes

* fix local max gpu

* resid nx

* tweak

* num machines and download params

* rename

* cleanup

* create workbench

* more reorg

* fix

* more logging wrappers

* lint fix

* restore train procgen

* restore train procgen

* pylint fix

* better wrapping

* whackamole walls

* config sweep

* tweak

* args sweep

* tweak

* test workers

* mpi_weight

* train test comm and high difficulty fix

* enjoy show returns

* better joint training

* tweak

* Add —update to args and add gin-config to requirements.txt

* add username to download_file

* removing gin, procgen_parser

* removing gin

* procgen args

* config fixes

* cleanup

* cleanup

* procgen args fix

* fix

* rcall syncing

* lint

* rename mpi_weight

* begin composable game

* more composable game

* tweak

* background alpha

* use username for sync

* fixes

* microbatch fix

* lure composable game

* merge

* proc trans update

* proc trans update (#307)

* finetuning experiment

* Change is_local to use `use_rcall` and fix error of `enjoy.py` with multiple ends

* graphing help

* add --local

* change args_dict['env_name'] to ENV_NAME

* finetune experiments

* tweak

* tweak

* reorg wrappers, remove is_local

* workdir/local fixes

* move finetune experiments

* default dir and graphing

* more graphing

* fix

* pooled syncing

* tweaks

* dir fix

* tweak

* wrapper mpi fix

* wind and turrets

* composability cleanup

* radius cleanup

* composable reorg

* laser gates

* composable tweaks

* soft walls

* tweak

* begin swamp

* more swamp

* more swamp

* fix

* hidden mines

* use maze layout

* tweak

* laser gate tweaks

* tweaks

* tweaks

* lure/propel updates

* composable midnight

* composable coinmaze

* composability difficulty

* tweak

* add step to save_params

* composable offsets

* composable boxpush

* composable combiner

* tweak

* tweak

* always choose correct number of mechanics

* fix

* rcall local fix

* add steps when dump and save parmas

* loading rank 1,2,3.. error fix

* add experiments.py

* fix loading latest weight with no -rest

* support more complex run_id and add more examples

* fix typo

* move post_run_id into experiments.py

* add hp_search example

* error fix

* joint experiments in progress

* joint hp finished

* typo

* error fix

* edit experiments

* Save experiments set up in code and  save weights per step (#319)

* add step to save_params

* add steps when dump and save parmas

* loading rank 1,2,3.. error fix

* add experiments.py

* fix loading latest weight with no -rest

* support more complex run_id and add more examples

* fix typo

* move post_run_id into experiments.py

* add hp_search example

* error fix

* joint experiments in progress

* joint hp finished

* typo

* error fix

* edit experiments

* tweaks

* graph exp WIP

* depth tweaks

* move save_all

* fix

* restore_dir name

* restore depth

* choose max mechanics

* use override mode

* tweak frogger

* lstm default

* fix

* patience is composable

* hunter is composable

* fixed asset seed cleanup

* minesweeper is composable

* eggcatch is composable

* tweak

* applesort is composable

* chaser game

* begin lighter

* lighter game

* tractor game

* boxgather game

* plumber game

* hitcher game

* doorbell game

* lawnmower game

* connecter game

* cannonaim

* outrun game

* encircle game

* spinner game

* tweak

* tweak

* detonator game

* driller

* driller

* mixer

* conveyor

* conveyor game

* joint pcg experiments

* fixes

* pcg sweep experiment

* cannonaim fix

* combiner fix

* store save time

* laseraim fix

* lightup fix

* detonator tweaks

* detonator fixes

* driller fix

* lawnmower calibration

* spinner calibration

* propel fix

* train experiment

* print load time

* system independent hashing

* remove gin configurable

* task ids fix

* test_pcg experiment

* connecter dense reward

* hard_pcg

* num train comms

* mpi splits envs

* tweaks

* tweaks

* graph tweaks

* graph tweaks

* lint fix

* fix tests

* load bugfix

* difficulty timeout tweak

* tweaks

* more graphing

* graph tweaks

* tweak

* download file fix

* pcg train envs list

* cleanup

* tweak

* manually name impala layers

* tweak

* expect fps

* backend arg

* args tweak

* workbench cleanup

* move graph files

* workbench cleanup

* split env name by comma

* workbench cleanup

* ema graph

* remove Dict

* use tf.io.gfile

* comments for auto-killing jobs

* lint fix

* write latest file when not saving all and load it when step=None
2019-05-03 15:54:24 -07:00

276 lines
8.4 KiB
Python

import numpy as np
import tensorflow as tf
from baselines.a2c import utils
from baselines.a2c.utils import conv, fc, conv_to_fc, batch_to_seq, seq_to_batch
from baselines.common.mpi_running_mean_std import RunningMeanStd
mapping = {}
def register(name):
def _thunk(func):
mapping[name] = func
return func
return _thunk
def nature_cnn(unscaled_images, **conv_kwargs):
"""
CNN from Nature paper.
"""
scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
activ = tf.nn.relu
h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2),
**conv_kwargs))
h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))
h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs))
h3 = conv_to_fc(h3)
return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))
def build_impala_cnn(unscaled_images, depths=[16,32,32], **conv_kwargs):
"""
Model used in the paper "IMPALA: Scalable Distributed Deep-RL with
Importance Weighted Actor-Learner Architectures" https://arxiv.org/abs/1802.01561
"""
layer_num = 0
def get_layer_num_str():
nonlocal layer_num
num_str = str(layer_num)
layer_num += 1
return num_str
def conv_layer(out, depth):
return tf.layers.conv2d(out, depth, 3, padding='same', name='layer_' + get_layer_num_str())
def residual_block(inputs):
depth = inputs.get_shape()[-1].value
out = tf.nn.relu(inputs)
out = conv_layer(out, depth)
out = tf.nn.relu(out)
out = conv_layer(out, depth)
return out + inputs
def conv_sequence(inputs, depth):
out = conv_layer(inputs, depth)
out = tf.layers.max_pooling2d(out, pool_size=3, strides=2, padding='same')
out = residual_block(out)
out = residual_block(out)
return out
out = tf.cast(unscaled_images, tf.float32) / 255.
for depth in depths:
out = conv_sequence(out, depth)
out = tf.layers.flatten(out)
out = tf.nn.relu(out)
out = tf.layers.dense(out, 256, activation=tf.nn.relu, name='layer_' + get_layer_num_str())
return out
@register("mlp")
def mlp(num_layers=2, num_hidden=64, activation=tf.tanh, layer_norm=False):
"""
Stack of fully-connected layers to be used in a policy / q-function approximator
Parameters:
----------
num_layers: int number of fully-connected layers (default: 2)
num_hidden: int size of fully-connected layers (default: 64)
activation: activation function (default: tf.tanh)
Returns:
-------
function that builds fully connected network with a given input tensor / placeholder
"""
def network_fn(X):
h = tf.layers.flatten(X)
for i in range(num_layers):
h = fc(h, 'mlp_fc{}'.format(i), nh=num_hidden, init_scale=np.sqrt(2))
if layer_norm:
h = tf.contrib.layers.layer_norm(h, center=True, scale=True)
h = activation(h)
return h
return network_fn
@register("cnn")
def cnn(**conv_kwargs):
def network_fn(X):
return nature_cnn(X, **conv_kwargs)
return network_fn
@register("impala_cnn")
def impala_cnn(**conv_kwargs):
def network_fn(X):
return build_impala_cnn(X)
return network_fn
@register("cnn_small")
def cnn_small(**conv_kwargs):
def network_fn(X):
h = tf.cast(X, tf.float32) / 255.
activ = tf.nn.relu
h = activ(conv(h, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs))
h = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))
h = conv_to_fc(h)
h = activ(fc(h, 'fc1', nh=128, init_scale=np.sqrt(2)))
return h
return network_fn
@register("lstm")
def lstm(nlstm=128, layer_norm=False):
"""
Builds LSTM (Long-Short Term Memory) network to be used in a policy.
Note that the resulting function returns not only the output of the LSTM
(i.e. hidden state of lstm for each step in the sequence), but also a dictionary
with auxiliary tensors to be set as policy attributes.
Specifically,
S is a placeholder to feed current state (LSTM state has to be managed outside policy)
M is a placeholder for the mask (used to mask out observations after the end of the episode, but can be used for other purposes too)
initial_state is a numpy array containing initial lstm state (usually zeros)
state is the output LSTM state (to be fed into S at the next call)
An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example
Parameters:
----------
nlstm: int LSTM hidden state size
layer_norm: bool if True, layer-normalized version of LSTM is used
Returns:
-------
function that builds LSTM with a given input tensor / placeholder
"""
def network_fn(X, nenv=1):
nbatch = X.shape[0]
nsteps = nbatch // nenv
h = tf.layers.flatten(X)
M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
S = tf.placeholder(tf.float32, [nenv, 2*nlstm]) #states
xs = batch_to_seq(h, nenv, nsteps)
ms = batch_to_seq(M, nenv, nsteps)
if layer_norm:
h5, snew = utils.lnlstm(xs, ms, S, scope='lnlstm', nh=nlstm)
else:
h5, snew = utils.lstm(xs, ms, S, scope='lstm', nh=nlstm)
h = seq_to_batch(h5)
initial_state = np.zeros(S.shape.as_list(), dtype=float)
return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
return network_fn
@register("cnn_lstm")
def cnn_lstm(nlstm=128, layer_norm=False, conv_fn=nature_cnn, **conv_kwargs):
def network_fn(X, nenv=1):
nbatch = X.shape[0]
nsteps = nbatch // nenv
h = conv_fn(X, **conv_kwargs)
M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
S = tf.placeholder(tf.float32, [nenv, 2*nlstm]) #states
xs = batch_to_seq(h, nenv, nsteps)
ms = batch_to_seq(M, nenv, nsteps)
if layer_norm:
h5, snew = utils.lnlstm(xs, ms, S, scope='lnlstm', nh=nlstm)
else:
h5, snew = utils.lstm(xs, ms, S, scope='lstm', nh=nlstm)
h = seq_to_batch(h5)
initial_state = np.zeros(S.shape.as_list(), dtype=float)
return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
return network_fn
@register("impala_cnn_lstm")
def impala_cnn_lstm():
return cnn_lstm(nlstm=256, conv_fn=build_impala_cnn)
@register("cnn_lnlstm")
def cnn_lnlstm(nlstm=128, **conv_kwargs):
return cnn_lstm(nlstm, layer_norm=True, **conv_kwargs)
@register("conv_only")
def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs):
'''
convolutions-only net
Parameters:
----------
conv: list of triples (filter_number, filter_size, stride) specifying parameters for each layer.
Returns:
function that takes tensorflow tensor as input and returns the output of the last convolutional layer
'''
def network_fn(X):
out = tf.cast(X, tf.float32) / 255.
with tf.variable_scope("convnet"):
for num_outputs, kernel_size, stride in convs:
out = tf.contrib.layers.convolution2d(out,
num_outputs=num_outputs,
kernel_size=kernel_size,
stride=stride,
activation_fn=tf.nn.relu,
**conv_kwargs)
return out
return network_fn
def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]):
rms = RunningMeanStd(shape=x.shape[1:])
norm_x = tf.clip_by_value((x - rms.mean) / rms.std, min(clip_range), max(clip_range))
return norm_x, rms
def get_network_builder(name):
"""
If you want to register your own network outside models.py, you just need:
Usage Example:
-------------
from baselines.common.models import register
@register("your_network_name")
def your_network_define(**net_kwargs):
...
return network_fn
"""
if callable(name):
return name
elif name in mapping:
return mapping[name]
else:
raise ValueError('Unknown network type: {}'.format(name))