Source code for chainerrl.links.stateless_recurrent

from cached_property import cached_property
import chainer
import chainer.functions as F
import chainer.links as L
import numpy as np


def split_one_step_batch_input(xs):
    """Split one-step batch input.

    Args:
        xs (chainer.Variable, ndarray or tuple): One-step batched input. It
            should be either:
                - a variable whose first axis is the batch axis.
                - a tuple of such variables.

    Returns:
        list: Either a list of variables or a list of tuples of varialbes.
            The length of the list is the batch size of the input.
    """
    if isinstance(xs, tuple):
        return list(zip(*[split_one_step_batch_input(x) for x in xs]))
    else:
        return list(F.split_axis(xs, len(xs), axis=0))


[docs]class StatelessRecurrent(object):
    """Stateless recurrent link interface.

    This class defines the interface of a recurrent link ChainerRL can handle.

    In most casese, you can just use ChainerRL's existing containers like
    `chainerrl.links.StatelessRecurrentChainList`,
    `chainerrl.links.StatelessRecurrentSequential`, and
    `chainerrl.links.StatelessRecurrentBranched` to define a recurrent
    link. You can use Chainer's recurrent links such as L.NStepLSTM inside the
    containers.

    To write your own recurrent link, you need to implement the interface.
    """

[docs]    def n_step_forward(self, x, recurrent_state):
        """Multi-step batch forward computation.

        This method sequentially applies layers as chainer.Sequential does.

        Args:
            x (list): Input sequences. Each sequence should be a variable whose
                first axis corresponds to time or a tuple of such variables.
            recurrent_state (object): Batched recurrent state. If set to None,
                it is initialized.
            output_mode (str): If set to 'concat', the output value is
                concatenated into a single large batch, which can be suitable
                for loss computation. If set to 'split', the output value is
                a list of output sequences.

        Returns:
            object: Output sequences. See the description of the `output_mode`
                argument.
            object: New batched recurrent state.
        """
        raise NotImplementedError

    def __call__(self, x, recurrent_state):
        """One-step batch forward computation.

        Args:
            x (chainer.Variable, ndarray, or tuple): One-step batched input.
            recurrent_state (object): Batched recurrent state.

        Returns:
            chainer.Variable, ndarray, or tuple: One-step batched output.
            object: New batched recurrent state.
        """
        assert isinstance(x, (chainer.Variable, self.xp.ndarray))
        return self.n_step_forward(
            split_one_step_batch_input(x),
            recurrent_state,
            output_mode='concat',
        )

[docs]    def mask_recurrent_state_at(self, recurrent_state, indices):
        """Return a recurrent state masked at given indices.

        This method can be used to initialize a recurrent state only for a
        certain sequence, not all the sequences.

        Args:
            recurrent_state (object): Batched recurrent state.
            indices (int or array-like of ints): Which recurrent state to mask.

        Returns:
            object: New batched recurrent state.
        """
        raise NotImplementedError

[docs]    def get_recurrent_state_at(self, recurrent_state, indices):
        """Get a recurrent state at given indices.

        This method can be used to save a recurrent state so that you can
        reuse it when you replay past sequences.

        Args:
            indices (int or array-like of ints): Which recurrent state to get.

        Returns:
            object: Recurrent state of given indices.
        """
        raise NotImplementedError

[docs]    def concatenate_recurrent_states(self, split_recurrent_states):
        """Concatenate recurrent states into a batch.

        This method can be used to make a batched recurrent state from separate
        recurrent states obtained via the `get_recurrent_state_at` method.

        Args:
            split_recurrent_states (object): Recurrent states to concatenate.

        Returns:
            object: Batched recurrent_state.
        """
        raise NotImplementedError


def is_recurrent_link(layer):
    """Return True iff a given layer is recurrent and supported by ChainerRL.

    Args:
        layer (callable): Any callable object.

    Returns:
        bool: True iff a given layer is recurrent and supported by ChainerRL.
    """
    return isinstance(layer, (
        L.NStepLSTM,
        L.NStepGRU,
        L.NStepRNNReLU,
        L.NStepRNNTanh,
        StatelessRecurrent,
    ))


def split_batched_sequences(xs, sections):
    """Split concatenated sequences.

    Args:
        xs (chainer.Variable, ndarray or tuple): Concatenated sequences.
        sections (array-like): Sections as indices indicating start positions
            of sequences.

    Returns:
        list: List of sequences.
    """
    if isinstance(xs, tuple):
        return list(zip(*[split_batched_sequences(x, sections) for x in xs]))
    else:
        return list(F.split_axis(xs, sections, axis=0))


def concatenate_sequences(sequences):
    """Concatenate sequences.

    Args:
        sequences (list): List of sequences. The following two cases are
            supported:
                - (a) Each sequence is a Variable or ndarray.
                - (b) Each sequence is tuple of a Variable or ndarray.

    Returns:
        chainer.Variable, ndarray or tuple: Concatenated sequences.
    """
    if isinstance(sequences[0], tuple):
        tuple_size = len(sequences[0])
        return tuple(
            F.concat([seq[i] for seq in sequences], axis=0)
            for i in range(tuple_size))
        raise NotImplementedError
    else:
        return F.concat(sequences, axis=0)


def call_recurrent_link(link, sequences, recurrent_state, output_mode):
    """Call a recurrent link following the interface of `StatelessRecurrent`.

    Args:
        link (chainer.Link): Recurrent link.
        sequences, recurrent_state, output_mode: See the docstring of
            `StatelessRecurrent.n_step_forward`.

    Returns:
        object: Output sequences. See the docstring of
            `StatelessRecurrent.n_step_forward`.
        object: New batched recurrent state.
    """
    assert isinstance(link, chainer.Link)
    assert isinstance(sequences, list)
    if isinstance(link, L.NStepLSTM):
        if recurrent_state is None:
            h = None
            c = None
        else:
            h, c = recurrent_state
        h, c, sequences = link(h, c, sequences)
        if output_mode == 'concat':
            sequences = concatenate_sequences(sequences)
        return sequences, (h, c)
    if isinstance(link, (L.NStepGRU, L.NStepRNNReLU, L.NStepRNNTanh)):
        h = recurrent_state
        h, sequences = link(h, sequences)
        if output_mode == 'concat':
            sequences = concatenate_sequences(sequences)
        return sequences, h
    if isinstance(link, StatelessRecurrent):
        return link.n_step_forward(
            sequences, recurrent_state, output_mode=output_mode)
    else:
        raise ValueError('{} is not a recurrent link'.format(link))


def mask_recurrent_states_of_links_at(links, recurrent_states, indices):
    if recurrent_states is None:
        return None
    assert len(links) == len(recurrent_states)
    return [mask_recurrent_state_at(link, rs, indices)
            for link, rs in zip(links, recurrent_states)]


def get_recurrent_states_of_links_at(
        links, recurrent_states, indices, unwrap_variable):
    if recurrent_states is None:
        return [None] * len(links)
    assert len(links) == len(recurrent_states)
    return [get_recurrent_state_at(link, rs, indices, unwrap_variable)
            for link, rs in zip(links, recurrent_states)]


def concatenate_recurrent_states_of_links(links, split_recurrent_states):
    assert split_recurrent_states is not None
    # Replace None with a list of None
    split_recurrent_states = list(split_recurrent_states)
    for i, srs in enumerate(split_recurrent_states):
        if srs is None:
            split_recurrent_states[i] = [None] * len(links)
        else:
            assert len(srs) == len(links)
    # Transpose first two axes of (batch_size, n_recurrent_links, ...)
    transposed = list(zip(*split_recurrent_states))
    assert len(links) == len(transposed)
    return [concatenate_recurrent_states(link, srs)
            for link, srs in zip(links, transposed)]


def mask_recurrent_state_at(link, recurrent_state, indices):
    if recurrent_state is None:
        return None
    if isinstance(link, L.NStepLSTM):
        h, c = recurrent_state
        # shape: (n_layers, batch_size, out_size)
        assert h.ndim == 3
        assert c.ndim == 3
        mask = link.xp.ones_like(h.array)
        mask[:, indices] = 0
        c = c * mask
        h = h * mask
        return (h, c)
    if isinstance(link, (L.NStepGRU, L.NStepRNNReLU, L.NStepRNNTanh)):
        h = recurrent_state
        # shape: (n_layers, batch_size, out_size)
        assert h.ndim == 3
        mask = link.xp.ones_like(h.array)
        mask[:, indices] = 0
        h = h * mask
        return h
    if isinstance(link, StatelessRecurrent):
        return link.mask_recurrent_state_at(recurrent_state, indices)
    else:
        raise ValueError('{} is not a recurrent link'.format(link))


def get_recurrent_state_at(link, recurrent_state, indices, unwrap_variable):
    if recurrent_state is None:
        return None
    if isinstance(link, L.NStepLSTM):
        h, c = recurrent_state
        if unwrap_variable:
            h = h.array
            c = c.array
        # shape: (n_layers, batch_size, out_size)
        assert h.ndim == 3
        assert c.ndim == 3
        return (h[:, indices], c[:, indices])
    if isinstance(link, (L.NStepGRU, L.NStepRNNReLU, L.NStepRNNTanh)):
        h = recurrent_state
        if unwrap_variable:
            h = h.array
        # shape: (n_layers, batch_size, out_size)
        assert h.ndim == 3
        return h[:, indices]
    if isinstance(link, StatelessRecurrent):
        return link.get_recurrent_state_at(
            recurrent_state, indices, unwrap_variable)
    else:
        raise ValueError('{} is not a recurrent link'.format(link))


def concatenate_recurrent_states(link, split_recurrent_states):
    if isinstance(link, L.NStepLSTM):
        # shape: (n_layers, batch_size, out_size)
        n_layers = link.n_layers
        out_size = link.out_size
        xp = link.xp
        hs = []
        cs = []
        for srs in split_recurrent_states:
            if srs is None:
                h = xp.zeros((n_layers, 1, out_size), dtype=np.float32)
                c = xp.zeros((n_layers, 1, out_size), dtype=np.float32)
            else:
                h, c = srs
                if h.ndim == 2:
                    assert h.shape == (n_layers, out_size)
                    assert c.shape == (n_layers, out_size)
                    # add batch axis
                    h = h[:, None]
                    c = c[:, None]
            hs.append(h)
            cs.append(c)
        h = F.concat(hs, axis=1)
        c = F.concat(cs, axis=1)
        return (h, c)
    if isinstance(link, (L.NStepGRU, L.NStepRNNReLU, L.NStepRNNTanh)):
        n_layers = link.n_layers
        out_size = link.out_size
        xp = link.xp
        hs = []
        for srs in split_recurrent_states:
            if srs is None:
                h = xp.zeros((n_layers, 1, out_size), dtype=np.float32)
            else:
                h = srs
                if h.ndim == 2:
                    assert h.shape == (n_layers, out_size)
                    # add batch axis
                    h = h[:, None]
            hs.append(h)
        h = F.concat(hs, axis=1)
        return h
    if isinstance(link, StatelessRecurrent):
        return link.concatenate_recurrent_states(split_recurrent_states)
    else:
        raise ValueError('{} is not a recurrent link'.format(link))


[docs]class StatelessRecurrentChainList(StatelessRecurrent, chainer.ChainList):
    """ChainList that auutomatically handles recurrent states.

    This link extends chainer.ChainList by adding the `recurrent_children`
    property that returns all the recurrent child links and implementing
    recurrent state manimulation methods required for the StatelessRecurrent
    interface.

    A recurrent state for this link is defined as a tuple of recurrent states
    of child recurrent links.
    """

    @cached_property
    def recurrent_children(self):
        """Return recurrent child links.

        Returns:
            tuple: Tuple of `chainer.Link`s that are recurrent.
        """
        return tuple(child for child in self.children()
                     if is_recurrent_link(child))

    def mask_recurrent_state_at(self, recurrent_states, indices):
        return mask_recurrent_states_of_links_at(
            self.recurrent_children, recurrent_states, indices)

    def get_recurrent_state_at(
            self, recurrent_states, indices, unwrap_variable):
        return get_recurrent_states_of_links_at(
            self.recurrent_children, recurrent_states, indices,
            unwrap_variable)

    def concatenate_recurrent_states(self, split_recurrent_states):
        return concatenate_recurrent_states_of_links(
            self.recurrent_children, split_recurrent_states)