Source code for chainerrl.q_functions.state_action_q_functions

import chainer
from chainer import functions as F
from chainer.initializers import LeCunNormal
from chainer import links as L

from chainerrl.links.mlp import MLP
from chainerrl.links.mlp_bn import MLPBN
from chainerrl.q_function import StateActionQFunction
from chainerrl.recurrent import RecurrentChainMixin


[docs]class SingleModelStateActionQFunction( chainer.Chain, StateActionQFunction, RecurrentChainMixin): """Q-function with discrete actions. Args: model (chainer.Link): Link that is callable and outputs action values. """ def __init__(self, model): super().__init__(model=model) def __call__(self, x, a): h = self.model(x, a) return h
[docs]class FCSAQFunction(MLP, StateActionQFunction): """Fully-connected (s,a)-input Q-function. Args: n_dim_obs (int): Number of dimensions of observation space. n_dim_action (int): Number of dimensions of action space. n_hidden_channels (int): Number of hidden channels. n_hidden_layers (int): Number of hidden layers. nonlinearity (callable): Nonlinearity between layers. It must accept a Variable as an argument and return a Variable with the same shape. Nonlinearities with learnable parameters such as PReLU are not supported. It is not used if n_hidden_layers is zero. last_wscale (float): Scale of weight initialization of the last layer. """ def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.nonlinearity = nonlinearity super().__init__( in_size=self.n_input_channels, out_size=1, hidden_sizes=[self.n_hidden_channels] * self.n_hidden_layers, nonlinearity=nonlinearity, last_wscale=last_wscale, ) def __call__(self, state, action): h = F.concat((state, action), axis=1) return super().__call__(h)
[docs]class FCLSTMSAQFunction(chainer.Chain, StateActionQFunction, RecurrentChainMixin): """Fully-connected + LSTM (s,a)-input Q-function. Args: n_dim_obs (int): Number of dimensions of observation space. n_dim_action (int): Number of dimensions of action space. n_hidden_channels (int): Number of hidden channels. n_hidden_layers (int): Number of hidden layers. nonlinearity (callable): Nonlinearity between layers. It must accept a Variable as an argument and return a Variable with the same shape. Nonlinearities with learnable parameters such as PReLU are not supported. last_wscale (float): Scale of weight initialization of the last layer. """ def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.nonlinearity = nonlinearity super().__init__() with self.init_scope(): self.fc = MLP(self.n_input_channels, n_hidden_channels, [self.n_hidden_channels] * self.n_hidden_layers, nonlinearity=nonlinearity, ) self.lstm = L.LSTM(n_hidden_channels, n_hidden_channels) self.out = L.Linear(n_hidden_channels, 1, initialW=LeCunNormal(last_wscale)) def __call__(self, x, a): h = F.concat((x, a), axis=1) h = self.nonlinearity(self.fc(h)) h = self.lstm(h) return self.out(h)
[docs]class FCBNSAQFunction(MLPBN, StateActionQFunction): """Fully-connected + BN (s,a)-input Q-function. Args: n_dim_obs (int): Number of dimensions of observation space. n_dim_action (int): Number of dimensions of action space. n_hidden_channels (int): Number of hidden channels. n_hidden_layers (int): Number of hidden layers. normalize_input (bool): If set to True, Batch Normalization is applied to both observations and actions. nonlinearity (callable): Nonlinearity between layers. It must accept a Variable as an argument and return a Variable with the same shape. Nonlinearities with learnable parameters such as PReLU are not supported. It is not used if n_hidden_layers is zero. last_wscale (float): Scale of weight initialization of the last layer. """ def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, normalize_input=True, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.normalize_input = normalize_input self.nonlinearity = nonlinearity super().__init__( in_size=self.n_input_channels, out_size=1, hidden_sizes=[self.n_hidden_channels] * self.n_hidden_layers, normalize_input=self.normalize_input, nonlinearity=nonlinearity, last_wscale=last_wscale, ) def __call__(self, state, action): h = F.concat((state, action), axis=1) return super().__call__(h)
[docs]class FCBNLateActionSAQFunction(chainer.Chain, StateActionQFunction): """Fully-connected + BN (s,a)-input Q-function with late action input. Actions are not included until the second hidden layer and not normalized. This architecture is used in the DDPG paper: http://arxiv.org/abs/1509.02971 Args: n_dim_obs (int): Number of dimensions of observation space. n_dim_action (int): Number of dimensions of action space. n_hidden_channels (int): Number of hidden channels. n_hidden_layers (int): Number of hidden layers. It must be greater than or equal to 1. normalize_input (bool): If set to True, Batch Normalization is applied nonlinearity (callable): Nonlinearity between layers. It must accept a Variable as an argument and return a Variable with the same shape. Nonlinearities with learnable parameters such as PReLU are not supported. last_wscale (float): Scale of weight initialization of the last layer. """ def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, normalize_input=True, nonlinearity=F.relu, last_wscale=1.): assert n_hidden_layers >= 1 self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.normalize_input = normalize_input self.nonlinearity = nonlinearity super().__init__() with self.init_scope(): # No need to pass nonlinearity to obs_mlp because it has no # hidden layers self.obs_mlp = MLPBN(in_size=n_dim_obs, out_size=n_hidden_channels, hidden_sizes=[], normalize_input=normalize_input, normalize_output=True) self.mlp = MLP(in_size=n_hidden_channels + n_dim_action, out_size=1, hidden_sizes=([self.n_hidden_channels] * (self.n_hidden_layers - 1)), nonlinearity=nonlinearity, last_wscale=last_wscale, ) self.output = self.mlp.output def __call__(self, state, action): h = self.nonlinearity(self.obs_mlp(state)) h = F.concat((h, action), axis=1) return self.mlp(h)
[docs]class FCLateActionSAQFunction(chainer.Chain, StateActionQFunction): """Fully-connected (s,a)-input Q-function with late action input. Actions are not included until the second hidden layer and not normalized. This architecture is used in the DDPG paper: http://arxiv.org/abs/1509.02971 Args: n_dim_obs (int): Number of dimensions of observation space. n_dim_action (int): Number of dimensions of action space. n_hidden_channels (int): Number of hidden channels. n_hidden_layers (int): Number of hidden layers. It must be greater than or equal to 1. nonlinearity (callable): Nonlinearity between layers. It must accept a Variable as an argument and return a Variable with the same shape. Nonlinearities with learnable parameters such as PReLU are not supported. last_wscale (float): Scale of weight initialization of the last layer. """ def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.): assert n_hidden_layers >= 1 self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.nonlinearity = nonlinearity super().__init__() with self.init_scope(): # No need to pass nonlinearity to obs_mlp because it has no # hidden layers self.obs_mlp = MLP(in_size=n_dim_obs, out_size=n_hidden_channels, hidden_sizes=[]) self.mlp = MLP(in_size=n_hidden_channels + n_dim_action, out_size=1, hidden_sizes=([self.n_hidden_channels] * (self.n_hidden_layers - 1)), nonlinearity=nonlinearity, last_wscale=last_wscale, ) self.output = self.mlp.output def __call__(self, state, action): h = self.nonlinearity(self.obs_mlp(state)) h = F.concat((h, action), axis=1) return self.mlp(h)