Source code for chainerrl.policies.deterministic_policy

from logging import getLogger

import chainer
from chainer import functions as F
from chainer.initializers import LeCunNormal
from chainer import links as L

from chainerrl import distribution
from chainerrl.functions.bound_by_tanh import bound_by_tanh
from chainerrl.links.mlp import MLP
from chainerrl.links.mlp_bn import MLPBN
from chainerrl.policy import Policy
from chainerrl.recurrent import RecurrentChainMixin


logger = getLogger(__name__)


[docs]class ContinuousDeterministicPolicy(
        chainer.Chain, Policy, RecurrentChainMixin):
    """Continuous deterministic policy.

    Args:
        model (chainer.Link):
            Link that is callable and outputs action values.
        model_call (callable or None):
            Callable used instead of model.__call__ if not None
        action_filter (callable or None):
            Callable applied to the outputs of the model if not None
    """

    def __init__(self, model, model_call=None, action_filter=None):
        super().__init__(model=model)
        self.model_call = model_call
        self.action_filter = action_filter

    def __call__(self, x):
        # Model
        if self.model_call is not None:
            h = self.model_call(self.model, x)
        else:
            h = self.model(x)
        # Action filter
        if self.action_filter is not None:
            h = self.action_filter(h)
        # Wrap by Distribution
        return distribution.ContinuousDeterministicDistribution(h)


[docs]class FCDeterministicPolicy(ContinuousDeterministicPolicy):
    """Fully-connected deterministic policy.

    Args:
        n_input_channels (int): Number of input channels.
        n_hidden_layers (int): Number of hidden layers.
        n_hidden_channels (int): Number of hidden channels.
        action_size (int): Size of actions.
        min_action (ndarray or None): Minimum action. Used only if bound_action
            is set to True.
        min_action (ndarray or None): Minimum action. Used only if bound_action
            is set to True.
        bound_action (bool): If set to True, actions are bounded to
            [min_action, max_action] by tanh.
        nonlinearity (callable): Nonlinearity between layers. It must accept a
            Variable as an argument and return a Variable with the same shape.
            Nonlinearities with learnable parameters such as PReLU are not
            supported. It is not used if n_hidden_layers is zero.
        last_wscale (float): Scale of weight initialization of the last layer.
    """

    def __init__(self, n_input_channels, n_hidden_layers,
                 n_hidden_channels, action_size,
                 min_action=None, max_action=None, bound_action=True,
                 nonlinearity=F.relu,
                 last_wscale=1.):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action

        if self.bound_action:
            def action_filter(x):
                return bound_by_tanh(
                    x, self.min_action, self.max_action)
        else:
            action_filter = None

        super().__init__(
            model=MLP(n_input_channels,
                      action_size,
                      (n_hidden_channels,) * n_hidden_layers,
                      nonlinearity=nonlinearity,
                      last_wscale=last_wscale,
                      ),
            action_filter=action_filter)


[docs]class FCBNDeterministicPolicy(ContinuousDeterministicPolicy):
    """Fully-connected deterministic policy with Batch Normalization.

    Args:
        n_input_channels (int): Number of input channels.
        n_hidden_layers (int): Number of hidden layers.
        n_hidden_channels (int): Number of hidden channels.
        action_size (int): Size of actions.
        min_action (ndarray or None): Minimum action. Used only if bound_action
            is set to True.
        min_action (ndarray or None): Minimum action. Used only if bound_action
            is set to True.
        bound_action (bool): If set to True, actions are bounded to
            [min_action, max_action] by tanh.
        normalize_input (bool): If set to True, Batch Normalization is applied
            to inputs as well as hidden activations.
        nonlinearity (callable): Nonlinearity between layers. It must accept a
            Variable as an argument and return a Variable with the same shape.
            Nonlinearities with learnable parameters such as PReLU are not
            supported. It is not used if n_hidden_layers is zero.
        last_wscale (float): Scale of weight initialization of the last layer.
    """

    def __init__(self, n_input_channels, n_hidden_layers,
                 n_hidden_channels, action_size,
                 min_action=None, max_action=None, bound_action=True,
                 normalize_input=True,
                 nonlinearity=F.relu,
                 last_wscale=1.):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action
        self.normalize_input = normalize_input

        if self.bound_action:
            def action_filter(x):
                return bound_by_tanh(
                    x, self.min_action, self.max_action)
        else:
            action_filter = None

        super().__init__(
            model=MLPBN(n_input_channels,
                        action_size,
                        (n_hidden_channels,) * n_hidden_layers,
                        normalize_input=self.normalize_input,
                        nonlinearity=nonlinearity,
                        last_wscale=last_wscale,
                        ),
            action_filter=action_filter)


[docs]class FCLSTMDeterministicPolicy(ContinuousDeterministicPolicy):
    """Fully-connected deterministic policy with LSTM.

    Args:
        n_input_channels (int): Number of input channels.
        n_hidden_layers (int): Number of hidden layers.
        n_hidden_channels (int): Number of hidden channels.
        action_size (int): Size of actions.
        min_action (ndarray or None): Minimum action. Used only if bound_action
            is set to True.
        min_action (ndarray or None): Minimum action. Used only if bound_action
            is set to True.
        bound_action (bool): If set to True, actions are bounded to
            [min_action, max_action] by tanh.
        nonlinearity (callable): Nonlinearity between layers. It must accept a
            Variable as an argument and return a Variable with the same shape.
            Nonlinearities with learnable parameters such as PReLU are not
            supported.
        last_wscale (float): Scale of weight initialization of the last layer.
        """

    def __init__(self, n_input_channels, n_hidden_layers,
                 n_hidden_channels, action_size,
                 min_action=None, max_action=None, bound_action=True,
                 nonlinearity=F.relu,
                 last_wscale=1.):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action

        if self.bound_action:
            def action_filter(x):
                return bound_by_tanh(
                    x, self.min_action, self.max_action)
        else:
            action_filter = None

        model = chainer.Chain(
            fc=MLP(self.n_input_channels,
                   n_hidden_channels,
                   (self.n_hidden_channels,) * self.n_hidden_layers,
                   nonlinearity=nonlinearity,
                   ),
            lstm=L.LSTM(n_hidden_channels, n_hidden_channels),
            out=L.Linear(n_hidden_channels, action_size,
                         initialW=LeCunNormal(last_wscale)),
        )

        def model_call(model, x):
            h = nonlinearity(model.fc(x))
            h = model.lstm(h)
            h = model.out(h)
            return h

        super().__init__(
            model=model,
            model_call=model_call,
            action_filter=action_filter)