Gymnasium/gym/envs/parameter_tuning/train_deep_cnn.py

from __future__ import print_function
import gym
import random
from gym import spaces
import numpy as np
from keras.datasets import cifar10, mnist, cifar100
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.regularizers import WeightRegularizer
from keras import backend as K

from itertools import cycle
import math


class CNNClassifierTraining(gym.Env):
    """Environment where agent learns to select training parameters and
    architecture of a deep convolutional neural network

    Training parameters that the agent can adjust are learning
    rate, learning rate decay, momentum, batch size, L1 / L2 regularization.

    Agent can select up to 5 cnn layers and up to 2 fc layers.

    Agent is provided with feedback on validation accuracy, as well as on
    the size of a dataset.
    """

    metadata = {"render.modes": ["human"]}

    def __init__(self, natural=False):
        """
        Initialize environment
        """

        # I use array of len 1 to store constants (otherwise there were some errors)
        self.action_space = spaces.Tuple((
            spaces.Box(-5.0, 0.0, 1),  # learning rate
            spaces.Box(-7.0, -2.0, 1),  # decay
            spaces.Box(-5.0, 0.0, 1),  # momentum
            spaces.Box(2, 8, 1),  # batch size
            spaces.Box(-6.0, 1.0, 1),  # l1 reg
            spaces.Box(-6.0, 1.0, 1),  # l2 reg
            spaces.Box(0.0, 1.0, (5, 2)),  # convolutional layer parameters
            spaces.Box(0.0, 1.0, (2, 2)),  # fully connected layer parameters
        ))

        # observation features, in order: num of instances, num of labels,
        # validation accuracy after training with given parameters
        self.observation_space = spaces.Box(-1e5, 1e5, 2)  # validation accuracy

        # Start the first game
        self._reset()

    def _step(self, action):
        """
        Perform some action in the environment
        """
        assert self.action_space.contains(action)

        lr, decay, momentum, batch_size, l1, l2, convs, fcs = action

        # map ranges of inputs
        lr = (10.0 ** lr[0]).astype('float32')
        decay = (10.0 ** decay[0]).astype('float32')
        momentum = (10.0 ** momentum[0]).astype('float32')

        batch_size = int(2 ** batch_size[0])

        l1 = (10.0 ** l1[0]).astype('float32')
        l2 = (10.0 ** l2[0]).astype('float32')

        """
        names = ["lr", "decay", "mom", "batch", "l1", "l2"]
        values = [lr, decay, momentum, batch_size, l1, l2]

        for n,v in zip(names, values):
            print(n,v)
        """

        diverged, acc = self.train_blueprint(lr, decay, momentum, batch_size, l1, l2, convs, fcs)

        # save best validation. If diverged, acc is zero
        if acc > self.best_val:
            self.best_val = acc

        self.previous_acc = acc

        self.epoch_idx += 1
        done = self.epoch_idx == 10

        reward = self.best_val

        # as for number of labels increases, learning problem becomes
        # more difficult for fixed dataset size. In order to avoid
        # for the agent to ignore more complex datasets, on which
        # accuracy is low and concentrate on simple cases which bring bulk
        # of reward, reward is normalized by number of labels in dataset
        reward *= self.nb_classes

        # formula below encourages higher best validation
        reward += reward ** 2

        return self._get_obs(), reward, done, {}

    def _render(self, mode="human", close=False):

        if close:
            return

        print(">> Step ", self.epoch_idx, "best validation:", self.best_val)

    def _get_obs(self):
        """
        Observe the environment. Is usually used after the step is taken
        """
        # observation as per observation space
        return np.array([self.nb_inst,
                         self.previous_acc])

    def data_mix(self):

        # randomly choose dataset
        dataset = random.choice(['mnist', 'cifar10', 'cifar100'])  #

        n_labels = 10

        if dataset == "mnist":
            data = mnist.load_data()

        if dataset == "cifar10":
            data = cifar10.load_data()

        if dataset == "cifar100":
            data = cifar100.load_data()
            n_labels = 100

        # Choose dataset size. This affects regularization needed
        r = np.random.rand()

        # not using full dataset to make regularization more important and
        # speed up testing a little bit
        data_size = int(2000 * (1 - r) + 40000 * r)

        # I do not use test data for validation, but last 10000 instances in dataset
        # so that trained models can be compared to results in literature
        (CX, CY), (CXt, CYt) = data

        if dataset == "mnist":
            CX = np.expand_dims(CX, axis=1)

        data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:]

        return data, n_labels

    def _reset(self):

        self.generate_data()

        # initial accuracy values
        self.best_val = 0.0
        self.previous_acc = 0.0
        self.epoch_idx = 0

        return self._get_obs()

    def generate_data(self):
        self.data, self.nb_classes = self.data_mix()
        # zero index corresponds to training inputs
        self.nb_inst = len(self.data[0])

    def train_blueprint(self, lr, decay, momentum, batch_size, l1, l2, convs, fcs):

        X, Y, Xv, Yv = self.data
        nb_classes = self.nb_classes

        reg = WeightRegularizer()

        # a hack to make regularization variable
        reg.l1 = K.variable(0.0)
        reg.l2 = K.variable(0.0)

        # input square image dimensions
        img_rows, img_cols = X.shape[-1], X.shape[-1]
        img_channels = X.shape[1]

        # convert class vectors to binary class matrices
        Y = np_utils.to_categorical(Y, nb_classes)
        Yv = np_utils.to_categorical(Yv, nb_classes)

        # here definition of the model happens
        model = Sequential()

        has_convs = False
        # create all convolutional layers
        for val, use in convs:

            # Size of convolutional layer
            cnvSz = int(val * 127) + 1

            if use < 0.5:
                continue
            has_convs = True
            model.add(Convolution2D(cnvSz, 3, 3, border_mode='same',
                                    input_shape=(img_channels, img_rows, img_cols),
                                    W_regularizer=reg,
                                    b_regularizer=reg))
            model.add(Activation('relu'))

            model.add(MaxPooling2D(pool_size=(2, 2)))
            # model.add(Dropout(0.25))

        if has_convs:
            model.add(Flatten())
        else:
            model.add(Flatten(input_shape=(img_channels, img_rows, img_cols)))  # avoid excetpions on no convs

        # create all fully connected layers
        for val, use in fcs:

            if use < 0.5:
                continue

            # choose fully connected layer size
            densesz = int(1023 * val) + 1

            model.add(Dense(densesz,
                            W_regularizer=reg,
                            b_regularizer=reg))
            model.add(Activation('relu'))
            # model.add(Dropout(0.5))

        model.add(Dense(nb_classes,
                        W_regularizer=reg,
                        b_regularizer=reg))
        model.add(Activation('softmax'))

        # let's train the model using SGD + momentum (how original).
        sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        model.compile(loss='categorical_crossentropy',
                      optimizer=sgd,
                      metrics=['accuracy'])

        X = X.astype('float32')
        Xv = Xv.astype('float32')
        X /= 255
        Xv /= 255

        model = model
        sgd = sgd
        reg = reg

        # set parameters of training step

        sgd.lr.set_value(lr)
        sgd.decay.set_value(decay)
        sgd.momentum.set_value(momentum)

        reg.l1.set_value(l1)
        reg.l2.set_value(l2)

        # train model for one epoch_idx
        H = model.fit(X, Y,
                      batch_size=int(batch_size),
                      nb_epoch=10,
                      shuffle=True)

        diverged = math.isnan(H.history['loss'][-1])
        acc = 0.0

        if not diverged:
            _, acc = model.evaluate(Xv, Yv)

        return diverged, acc
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`from __future__ import print_function`
			`import gym`
			`import random`
			`from gym import spaces`
			`import numpy as np`
			`from keras.datasets import cifar10, mnist, cifar100`
			`from keras.models import Sequential`
			`from keras.layers import Dense, Dropout, Activation, Flatten`
			`from keras.layers import Convolution2D, MaxPooling2D`
			`from keras.optimizers import SGD`
			`from keras.utils import np_utils`
			`from keras.regularizers import WeightRegularizer`
			`from keras import backend as K`

			`from itertools import cycle`
			`import math`


			`class CNNClassifierTraining(gym.Env):`
			`"""Environment where agent learns to select training parameters and`
			`architecture of a deep convolutional neural network`

			`Training parameters that the agent can adjust are learning`
			`rate, learning rate decay, momentum, batch size, L1 / L2 regularization.`

Clarified description of the environment class 2016-05-31 21:46:57 +02:00			`Agent can select up to 5 cnn layers and up to 2 fc layers.`

Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`Agent is provided with feedback on validation accuracy, as well as on`
Clarified description of the environment class 2016-05-31 21:46:57 +02:00			`the size of a dataset.`
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`"""`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`metadata = {"render.modes": ["human"]}`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`def __init__(self, natural=False):`
			`"""`
			`Initialize environment`
			`"""`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`# I use array of len 1 to store constants (otherwise there were some errors)`
			`self.action_space = spaces.Tuple((`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00			`spaces.Box(-5.0, 0.0, 1), # learning rate`
			`spaces.Box(-7.0, -2.0, 1), # decay`
			`spaces.Box(-5.0, 0.0, 1), # momentum`
			`spaces.Box(2, 8, 1), # batch size`
			`spaces.Box(-6.0, 1.0, 1), # l1 reg`
			`spaces.Box(-6.0, 1.0, 1), # l2 reg`
Improved description of cnn classif environment, made some fixes to env itself 2016-05-31 21:25:03 +02:00			`spaces.Box(0.0, 1.0, (5, 2)), # convolutional layer parameters`
			`spaces.Box(0.0, 1.0, (2, 2)), # fully connected layer parameters`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00			`))`

			`# observation features, in order: num of instances, num of labels,`
			`# validation accuracy after training with given parameters`
Fixed shape of observation 2016-05-31 21:34:44 +02:00			`self.observation_space = spaces.Box(-1e5, 1e5, 2) # validation accuracy`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`# Start the first game`
			`self._reset()`

			`def _step(self, action):`
			`"""`
			`Perform some action in the environment`
			`"""`
Assert is a keyword, not a function 2016-06-16 00:20:22 -07:00			`assert self.action_space.contains(action)`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
			`lr, decay, momentum, batch_size, l1, l2, convs, fcs = action`

			`# map ranges of inputs`
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`lr = (10.0 ** lr[0]).astype('float32')`
			`decay = (10.0 ** decay[0]).astype('float32')`
			`momentum = (10.0 ** momentum[0]).astype('float32')`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
			`batch_size = int(2 ** batch_size[0])`

Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`l1 = (10.0 ** l1[0]).astype('float32')`
			`l2 = (10.0 ** l2[0]).astype('float32')`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`"""`
			`names = ["lr", "decay", "mom", "batch", "l1", "l2"]`
			`values = [lr, decay, momentum, batch_size, l1, l2]`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`for n,v in zip(names, values):`
			`print(n,v)`
			`"""`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
			`diverged, acc = self.train_blueprint(lr, decay, momentum, batch_size, l1, l2, convs, fcs)`

			`# save best validation. If diverged, acc is zero`
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`if acc > self.best_val:`
			`self.best_val = acc`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
			`self.previous_acc = acc`

			`self.epoch_idx += 1`
			`done = self.epoch_idx == 10`

Fix the reward when diverged 2016-05-31 21:01:30 +02:00			`reward = self.best_val`

			`# as for number of labels increases, learning problem becomes`
			`# more difficult for fixed dataset size. In order to avoid`
			`# for the agent to ignore more complex datasets, on which`
			`# accuracy is low and concentrate on simple cases which bring bulk`
			`# of reward, reward is normalized by number of labels in dataset`
			`reward *= self.nb_classes`

			`# formula below encourages higher best validation`
			`reward += reward ** 2`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`return self._get_obs(), reward, done, {}`

			`def _render(self, mode="human", close=False):`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`if close:`
			`return`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
			`print(">> Step ", self.epoch_idx, "best validation:", self.best_val)`
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00
			`def _get_obs(self):`
			`"""`
			`Observe the environment. Is usually used after the step is taken`
			`"""`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00			`# observation as per observation space`
			`return np.array([self.nb_inst,`
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`self.previous_acc])`

			`def data_mix(self):`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`# randomly choose dataset`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00			`dataset = random.choice(['mnist', 'cifar10', 'cifar100']) #`

Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`n_labels = 10`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`if dataset == "mnist":`
			`data = mnist.load_data()`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`if dataset == "cifar10":`
			`data = cifar10.load_data()`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`if dataset == "cifar100":`
			`data = cifar100.load_data()`
			`n_labels = 100`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`# Choose dataset size. This affects regularization needed`
			`r = np.random.rand()`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
			`# not using full dataset to make regularization more important and`
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`# speed up testing a little bit`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00			`data_size = int(2000 * (1 - r) + 40000 * r)`

			`# I do not use test data for validation, but last 10000 instances in dataset`
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`# so that trained models can be compared to results in literature`
			`(CX, CY), (CXt, CYt) = data`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`if dataset == "mnist":`
			`CX = np.expand_dims(CX, axis=1)`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
			`data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:]`

Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`return data, n_labels`

			`def _reset(self):`

Further work on the cnn train env 2016-05-31 20:53:37 +02:00			`self.generate_data()`

Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`# initial accuracy values`
			`self.best_val = 0.0`
			`self.previous_acc = 0.0`
			`self.epoch_idx = 0`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00
Initial version of deep cnn training parameter and architecture selection environment 2016-05-31 20:25:31 +02:00			`return self._get_obs()`
Feature to architecture and training params func 2016-05-31 20:42:22 +02:00
Further work on the cnn train env 2016-05-31 20:53:37 +02:00			`def generate_data(self):`
			`self.data, self.nb_classes = self.data_mix()`
			`# zero index corresponds to training inputs`
			`self.nb_inst = len(self.data[0])`

			`def train_blueprint(self, lr, decay, momentum, batch_size, l1, l2, convs, fcs):`
Feature to architecture and training params func 2016-05-31 20:42:22 +02:00
			`X, Y, Xv, Yv = self.data`
			`nb_classes = self.nb_classes`

			`reg = WeightRegularizer()`

			`# a hack to make regularization variable`
			`reg.l1 = K.variable(0.0)`
			`reg.l2 = K.variable(0.0)`

			`# input square image dimensions`
			`img_rows, img_cols = X.shape[-1], X.shape[-1]`
			`img_channels = X.shape[1]`

			`# convert class vectors to binary class matrices`
			`Y = np_utils.to_categorical(Y, nb_classes)`
			`Yv = np_utils.to_categorical(Yv, nb_classes)`

			`# here definition of the model happens`
			`model = Sequential()`

			`has_convs = False`
			`# create all convolutional layers`
Improved description of cnn classif environment, made some fixes to env itself 2016-05-31 21:25:03 +02:00			`for val, use in convs:`
Feature to architecture and training params func 2016-05-31 20:42:22 +02:00
			`# Size of convolutional layer`
Improved description of cnn classif environment, made some fixes to env itself 2016-05-31 21:25:03 +02:00			`cnvSz = int(val * 127) + 1`
Feature to architecture and training params func 2016-05-31 20:42:22 +02:00
			`if use < 0.5:`
			`continue`
			`has_convs = True`
			`model.add(Convolution2D(cnvSz, 3, 3, border_mode='same',`
			`input_shape=(img_channels, img_rows, img_cols),`
			`W_regularizer=reg,`
			`b_regularizer=reg))`
			`model.add(Activation('relu'))`

			`model.add(MaxPooling2D(pool_size=(2, 2)))`
			`# model.add(Dropout(0.25))`

			`if has_convs:`
			`model.add(Flatten())`
			`else:`
Further work on the cnn train env 2016-05-31 20:53:37 +02:00			`model.add(Flatten(input_shape=(img_channels, img_rows, img_cols))) # avoid excetpions on no convs`
Feature to architecture and training params func 2016-05-31 20:42:22 +02:00
			`# create all fully connected layers`
Improved description of cnn classif environment, made some fixes to env itself 2016-05-31 21:25:03 +02:00			`for val, use in fcs:`
Feature to architecture and training params func 2016-05-31 20:42:22 +02:00
			`if use < 0.5:`
			`continue`

			`# choose fully connected layer size`
Improved description of cnn classif environment, made some fixes to env itself 2016-05-31 21:25:03 +02:00			`densesz = int(1023 * val) + 1`
Feature to architecture and training params func 2016-05-31 20:42:22 +02:00
			`model.add(Dense(densesz,`
			`W_regularizer=reg,`
			`b_regularizer=reg))`
			`model.add(Activation('relu'))`
			`# model.add(Dropout(0.5))`

			`model.add(Dense(nb_classes,`
			`W_regularizer=reg,`
			`b_regularizer=reg))`
			`model.add(Activation('softmax'))`

			`# let's train the model using SGD + momentum (how original).`
			`sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)`
			`model.compile(loss='categorical_crossentropy',`
			`optimizer=sgd,`
			`metrics=['accuracy'])`

			`X = X.astype('float32')`
			`Xv = Xv.astype('float32')`
			`X /= 255`
			`Xv /= 255`

			`model = model`
			`sgd = sgd`
			`reg = reg`

			`# set parameters of training step`

			`sgd.lr.set_value(lr)`
			`sgd.decay.set_value(decay)`
			`sgd.momentum.set_value(momentum)`

			`reg.l1.set_value(l1)`
			`reg.l2.set_value(l2)`

			`# train model for one epoch_idx`
			`H = model.fit(X, Y,`
			`batch_size=int(batch_size),`
			`nb_epoch=10,`
			`shuffle=True)`

			`diverged = math.isnan(H.history['loss'][-1])`
			`acc = 0.0`

			`if not diverged:`
			`_, acc = model.evaluate(Xv, Yv)`

Assert is a keyword, not a function 2016-06-16 00:20:22 -07:00			`return diverged, acc`