From 4e8050d7d8209a3f0a73a4b8a02c32d193249834 Mon Sep 17 00:00:00 2001 From: Nicholas Leonard Date: Wed, 3 May 2017 18:41:54 -0400 Subject: [PATCH] update README.md --- AbstractRecurrent.lua | 2 +- README.md | 373 +++++++++++++----- doc/image/zeroMask.png | Bin 0 -> 44827 bytes examples/README.md | 34 +- examples/simple-bisequencer-network.lua | 6 +- ...ment_rnn.lua => twitter-sentiment-rnn.lua} | 0 6 files changed, 294 insertions(+), 121 deletions(-) create mode 100644 doc/image/zeroMask.png rename examples/{twitter_sentiment_rnn.lua => twitter-sentiment-rnn.lua} (100%) diff --git a/AbstractRecurrent.lua b/AbstractRecurrent.lua index cb87ff4..d379661 100644 --- a/AbstractRecurrent.lua +++ b/AbstractRecurrent.lua @@ -282,7 +282,7 @@ function AbstractRecurrent:getGradHiddenState(step, input) end -- set stored grad hidden state -function AbstractRecurrent:setGradHiddenState(step, hiddenState) +function AbstractRecurrent:setGradHiddenState(step, gradHiddenState) error"Not Implemented" end diff --git a/README.md b/README.md index 971e584..6a71561 100644 --- a/README.md +++ b/README.md @@ -27,14 +27,19 @@ Modules that `forward` entire sequences through a decorated `AbstractRecurrent` * [RecurrentAttention](#rnn.RecurrentAttention) : a generalized attention model for [REINFORCE modules](https://github.com/nicholas-leonard/dpnn#nn.Reinforce); Miscellaneous modules and criterions : - * [MaskZero](#rnn.MaskZero) : zeroes the `output` and `gradOutput` rows of the decorated module for commensurate `input` rows which are tensors of zeros; + * [MaskZero](#rnn.MaskZero) : zeroes the `output` and `gradOutput` rows of the decorated module for commensurate + * `input` rows which are tensors of zeros (version 1); + * `zeroMask` elements which are 1 (version 2); * [LookupTableMaskZero](#rnn.LookupTableMaskZero) : extends `nn.LookupTable` to support zero indexes for padding. Zero indexes are forwarded as tensors of zeros; - * [MaskZeroCriterion](#rnn.MaskZeroCriterion) : zeros the `gradInput` and `loss` rows of the decorated criterion for commensurate `zeroMask` elements which are 1; + * [MaskZeroCriterion](#rnn.MaskZeroCriterion) : zeros the `gradInput` and `loss` rows of the decorated criterion for commensurate + * `input` rows which are tensors of zeros (version 1); + * `zeroMask` elements which are 1 (version 2); * [SeqReverseSequence](#rnn.SeqReverseSequence) : reverses an input sequence on a specific dimension; * [VariableLength](#rnn.VariableLength): decorates a `Sequencer` to accept and produce a table of variable length inputs and outputs; Criterions used for handling sequential inputs and targets : - * [SequencerCriterion](#rnn.SequencerCriterion) : sequentially applies the same criterion to a sequence of inputs and targets (Tensor or Table). + * [AbstractSequencerCriterion](#rnn.AbstractSequencerCriterion) : abstact class for criterions that handle sequences (tensor or table); + * [SequencerCriterion](#rnn.SequencerCriterion) : sequentially applies the same criterion to a sequence of inputs and targets; * [RepeaterCriterion](#rnn.RepeaterCriterion) : repeatedly applies the same criterion with the same target on a sequence. @@ -95,30 +100,7 @@ Additional differentiable criterions ## Examples ## -The following are example training scripts using this package : - - * [RNN/LSTM/GRU](examples/recurrent-language-model.lua) for Penn Tree Bank dataset; - * [Noise Contrastive Estimate](examples/noise-contrastive-estimate.lua) for training multi-layer [SeqLSTM](#rnn.SeqLSTM) language models on the [Google Billion Words dataset](https://github.com/Element-Research/dataload#dl.loadGBW). The example uses [MaskZero](#rnn.MaskZero) to train independent variable length sequences using the [NCEModule](https://github.com/Element-Research/dpnn#nn.NCEModule) and [NCECriterion](https://github.com/Element-Research/dpnn#nn.NCECriterion). This script is our fastest yet boasting speeds of 20,000 words/second (on NVIDIA Titan X) with a 2-layer LSTM having 250 hidden units, a batchsize of 128 and sequence length of a 100. Note that you will need to have [Torch installed with Lua instead of LuaJIT](http://torch.ch/docs/getting-started.html#_); - * [Recurrent Model for Visual Attention](examples/recurrent-visual-attention.lua) for the MNIST dataset; - * [Encoder-Decoder LSTM](examples/encoder-decoder-coupling.lua) shows you how to couple encoder and decoder `LSTMs` for sequence-to-sequence networks; - * [Simple Recurrent Network](examples/simple-recurrent-network.lua) shows a simple example for building and training a simple recurrent neural network; - * [Simple Sequencer Network](examples/simple-sequencer-network.lua) is a version of the above script that uses the Sequencer to decorate the `rnn` instead; - * [Sequence to One](examples/sequence-to-one.lua) demonstrates how to do many to one sequence learning as is the case for sentiment analysis; - * [Multivariate Time Series](examples/recurrent-time-series.lua) demonstrates how train a simple RNN to do multi-variate time-series predication. - -### External Resources - - * [rnn-benchmarks](https://github.com/glample/rnn-benchmarks) : benchmarks comparing Torch (using this library), Theano and TensorFlow. - * [Harvard Jupyter Notebook Tutorial](http://nbviewer.jupyter.org/github/CS287/Lectures/blob/gh-pages/notebooks/ElementRNNTutorial.ipynb) : an in-depth tutorial for how to use the Element-Research rnn package by Harvard University; - * [dpnn](https://github.com/Element-Research/dpnn) : this is a dependency of the __rnn__ package. It contains useful nn extensions, modules and criterions; - * [dataload](https://github.com/Element-Research/dataload) : a collection of torch dataset loaders; - * [RNN/LSTM/BRNN/BLSTM training script ](https://github.com/nicholas-leonard/dp/blob/master/examples/recurrentlanguagemodel.lua) for Penn Tree Bank or Google Billion Words datasets; - * A brief (1 hours) overview of Torch7, which includes some details about the __rnn__ packages (at the end), is available via this [NVIDIA GTC Webinar video](http://on-demand.gputechconf.com/gtc/2015/webinar/torch7-applied-deep-learning-for-vision-natural-language.mp4). In any case, this presentation gives a nice overview of Logistic Regression, Multi-Layer Perceptrons, Convolutional Neural Networks and Recurrent Neural Networks using Torch7; - * [Sequence to Sequence mapping using encoder-decoder RNNs](https://github.com/rahul-iisc/seq2seq-mapping) : a complete training example using synthetic data. - * [ConvLSTM](https://github.com/viorik/ConvLSTM) is a repository for training a [Spatio-temporal video autoencoder with differentiable memory](http://arxiv.org/abs/1511.06309). - * An [time series example](https://github.com/rracinskij/rnntest01/blob/master/rnntest01.lua) for univariate timeseries prediction. - * [Sagar Waghmare](https://github.com/sagarwaghmare69) wrote a nice [tutorial](tutorials/ladder.md) on how to use rnn with nngraph to reproduce the [Lateral Connections in Denoising Autoencoders Support Supervised Learning](http://arxiv.org/pdf/1504.08215.pdf). - +A complete list of examples is available in the [examples directory](examples/README.md) ## Citation ## @@ -127,7 +109,7 @@ If you use __rnn__ in your work, we'd really appreciate it if you could cite the Léonard, Nicholas, Sagar Waghmare, Yang Wang, and Jin-Hwa Kim. [rnn: Recurrent Library for Torch.](http://arxiv.org/abs/1511.07889) arXiv preprint arXiv:1511.07889 (2015). Any significant contributor to the library will also get added as an author to the paper. -A [significant contributor](https://github.com/Element-Research/rnn/graphs/contributors) +A [significant contributor](https://github.com/torch/rnn/graphs/contributors) is anyone who added at least 300 lines of code to the library. ## Troubleshooting ## @@ -136,8 +118,8 @@ Most issues can be resolved by updating the various dependencies: ```bash luarocks install torch luarocks install nn -luarocks install dpnn luarocks install torchx +luarocks install dataload ``` If you are using CUDA : @@ -156,24 +138,40 @@ If that doesn't fix it, open and issue on github. ## AbstractRecurrent ## -An abstract class inherited by [Recurrent](#rnn.Recurrent), [RecLSTM](#rnn.RecLSTM) and [GRU](#rnn.GRU). +An abstract class inherited by [Recurrence](#rnn.Recurrence), [RecLSTM](#rnn.RecLSTM) and [GRU](#rnn.GRU). The constructor takes a single argument : ```lua -rnn = nn.AbstractRecurrent([rho]) -``` -Argument `rho` is the maximum number of steps to backpropagate through time (BPTT). -Sub-classes can set this to a large number like 99999 (the default) if they want to backpropagate through -the entire sequence whatever its length. Setting lower values of rho are -useful when long sequences are forward propagated, but we only whish to -backpropagate through the last `rho` steps, which means that the remainder -of the sequence doesn't need to be stored (so no additional cost). - -### [recurrentModule] getStepModule(step) ### +rnn = nn.AbstractRecurrent(stepmodule) +``` +The `stepmodule` argument is an `nn.Module` instance that [cloned with shared parameters](#nn.Module.sharedClone) at each time-step. +Sub-classes can call the [getStepModule(step)](#rnn.AbstractRecurrent.getStepModule) to automatically clone the `stepmodule` +and share it's parameters for each time-`step`. +Each call to `forward/updateOutput` calls `self:getStepModule(self.step)` and increments the `self.step` attribute. +That is, each `forward` call to an `AbstractRecurrent` instance memorizes a new `step` by memorizing the previous `stepmodule` clones. +Although they share parameters and their gradients, each `stepmodule` clone has its own `output` and `gradInput` states. + +A good example of a `stepmodule` is the [StepLSTM](#rnn.StepLSTM) used internally by the `RecLSTM`, an `AbstractRecurrent` instance. +The `StepLSTM` implements a single time-step for an LSTM. +The `RecLSTM` calls `getStepModule(step)` to clone the `StepLSTM` for each time-step. +The `RecLSTM` handles the feeding back of previous `StepLSTM.output` states and current `input` state into the `StepLSTM`. + +Many libraries implement RNNs as modules that forward entire sequences. +This library also supports this use case by wrapping `AbstractRecurrent` modules into [Sequencer](#rnn.Sequencer) modules +or more directly via the stand-alone [SeqLSTM](#rnn.SeqLSTM) and [SeqGRU](#rnn.SeqGRU) modules. +The `rnn` library also provides the `AbstractRecurrent` interface to support real-time RNNs. +These are RNNs for which the entire `input` sequence is not know in advance. +Typically, this is because `input[t+1]` is dependent on `output[t] = RNN(input[t])`. +The `AbstractRecurrent` interface makes it easy to build these real-time RNNs. +A good example is the [RecurrentAttention](#rnn.RecurrentAttention) module which implements an attention model using real-time RNNs. + + +### [stepmodule] getStepModule(step) ### Returns a module for time-step `step`. This is used internally by sub-classes -to obtain copies of the internal `recurrentModule`. These copies share +to obtain copies of the internal `stepmodule`. These copies share `parameters` and `gradParameters` but each have their own `output`, `gradInput` and any other intermediate states. + ### setOutputStep(step) ### This is a method reserved for internal use by [Recursor](#rnn.Recursor) when doing backward propagation. It sets the object's `output` attribute @@ -181,25 +179,86 @@ to point to the output at time-step `step`. This method was introduced to solve a very annoying bug. -### maskZero(nInputDim) ### -Decorates the internal `recurrentModule` with [MaskZero](#rnn.MaskZero). -The `output` Tensor (or table thereof) of the `recurrentModule` -will have each row (i.e. samples) zeroed when the commensurate row of the `input` -is a tensor of zeros. +### [self] maskZero(v1) ### + +Decorates the internal `stepmodule` with [MaskZero](#rnn.MaskZero). +The `stepmodule` is the module that is [cloned with shared parameters](#nn.Module.sharedClone) at each time-step. +The `output` and `gradOutput` Tensor (or table thereof) of the `stepmodule` +will have each row (that is, samples) zeroed where + * the commensurate row of the `input` is a tensor of zeros (version 1 with `v1=true`); or + * the commensurate element of the `zeroMask` tensor is 1 (version 2; the default). + +Version 2 (the default), requires that [`setZeroMask(zeroMask)`](#rnn.AbstractRecurrent.setZeroMask) +be called beforehand. The `zeroMask` must be a `seqlen x batchsize` ByteTensor or CudaByteTensor. + +![zeroMask](doc/image/zeroMask.png) +In the above figure, we can see an `input` and commensurate `zeroMask` of size `seqlen=4 x batchsize=3`. +The `input` could have additional dimensions like `seqlen x batchsize x inputsize`. +The dark blocks in the `input` separate difference sequences in each sample/row. +The same elements in the `zeroMask` are set to 1, while the remainder are set to 0. +For version 1, the dark blocks in the `input` would have a norm of 0, by which a `zeroMask` is automatically interpolated. +For version 2, the `zeroMask` is provided before calling `forward(input)`, +thereby alleviated the need to call `norm` at each zero-masked module. + +The zero-masking implemented by `maskZero()` and `setZeroMask()` makes it possible to pad sequences with different lengths in the same batch with zero vectors. + +At a given time-step `t`, a sample `i` is masked when: + * the `input[i]` is a row of zeros (version 1) where `input` is a batched time-step; or + * the `zeroMask[{t,i}] = 1` (version 2). + +When a sample time-step is masked, the hidden state is effectively reset (that is, forgotten) for the next non-mask time-step. +In other words, it is possible seperate unrelated sequences with a masked element. -The `nInputDim` argument must specify the number of non-batch dims -in the first Tensor of the `input`. In the case of an `input` table, -the first Tensor is the first one encountered when doing a depth-first search. +The `maskZero()` method returns `self`. +The `maskZero()` method can me called on any `nn.Module`. +Zero-masking only supports batch mode. -Calling this method makes it possible to pad sequences with different lengths in the same batch with zero vectors. +See the [noise-contrastive-estimate.lua](examples/noise-contrastive-estimate.lua) script for an example implementation of version 2 zero-masking. +See the [simple-bisequencer-network-variable.lua](examples/simple-bisequencer-network-variable.lua) script for an example implementation of version 1 zero-masking. -When a sample time-step is masked (i.e. `input` is a row of zeros), then -the hidden state is effectively reset (i.e. forgotten) for the next non-mask time-step. -In other words, it is possible seperate unrelated sequences with a masked element. + +### setZeroMask(zeroMask) ## -### trimZero(nInputDim) ### -Decorates the internal `recurrentModule` with [TrimZero](#rnn.TrimZero). +Sets the `zeroMask` of the RNN. + +For example, +```lua +seqlen, batchsize = 2, 4 +inputsize, outputsize = 3, 1 +-- an AbstractRecurrent instance encapsulated by a Sequencer +lstm = nn.Sequencer(nn.RecLSTM(inputsize, outputsize)) +lstm:maskZero() -- enable version 2 zero-masking +-- zero-mask the sequence +zeroMask = torch.ByteTensor(seqlen, batchsize):zero() +zeroMask[{1,3}] = 1 +zeroMask[{2,4}] = 1 +lstm:setZeroMask(zeroMask) +-- forward sequence +input = torch.randn(seqlen, batchsize, inputsize) +output = lstm:forward(input) +print(output) +(1,.,.) = + -0.1715 + 0.0212 + 0.0000 + 0.3301 + +(2,.,.) = + 0.1695 + -0.2507 + -0.1700 + 0.0000 +[torch.DoubleTensor of size 2x4x1] +``` +the `output` is indeed zeroed for the 3rd sample in the first time-step (`zeroMask[{1,3}] = 1`) +and for the fourth sample in the second time-step (`zeroMask[{2,4}] = 1`). +The `gradOutput` would also be zeroed in the same way. +The `setZeroMask()` method can me called on any `nn.Module`. + +When `zeroMask=false`, the zero-masking is disabled. + + ### [output] updateOutput(input) ### Forward propagates the input for the current step. The outputs or intermediate states of the previous steps are used recurrently. This is transparent to the @@ -207,7 +266,7 @@ caller as the previous outputs and intermediate states are memorized. This method also increments the `step` attribute by 1. -### updateGradInput(input, gradOutput) ### +### [gradInput] updateGradInput(input, gradOutput) ### Like `backward`, this method should be called in the reverse order of `forward` calls used to propagate a sequence. So for example : @@ -233,13 +292,13 @@ Like `updateGradInput`, but for accumulating gradients w.r.t. parameters. ### recycle(offset) ### This method goes hand in hand with `forget`. It is useful when the current time-step is greater than `rho`, at which point it starts recycling -the oldest `recurrentModule` `sharedClones`, +the oldest `stepmodule` `sharedClones`, such that they can be reused for storing the next step. This `offset` is used for modules like `nn.Recurrent` that use a different module for the first step. Default offset is 0. -### forget(offset) ### +### forget() ### This method brings back all states to the start of the sequence buffers, i.e. it forgets the current sequence. It also resets the `step` attribute to 1. It is highly recommended to call `forget` after each parameter update. @@ -249,23 +308,15 @@ the result of now changed parameters. It is also good practice to call `forget` at the start of each new sequence. -### maxBPTTstep(rho) ### +### maxBPTTstep(seqlen) ### This method sets the maximum number of time-steps for which to perform -backpropagation through time (BPTT). So say you set this to `rho = 3` time-steps, +backpropagation through time (BPTT). So say you set this to `seqlen = 3` time-steps, feed-forward for 4 steps, and then backpropgate, only the last 3 steps will be used for the backpropagation. If your AbstractRecurrent instance is wrapped -by a [Sequencer](#rnn.Sequencer), this will be handled auto-magically by the Sequencer. -Otherwise, setting this value to a large value (i.e. 9999999), is good for most, if not all, cases. - - -### backwardOnline() ### -This method was deprecated Jan 6, 2016. -Since then, by default, `AbstractRecurrent` instances use the -backwardOnline behaviour. -See [updateGradInput](#rnn.AbstractRecurrent.updateGradInput) for details. +by a [Sequencer](#rnn.Sequencer), this will be handled auto-magically by the `Sequencer`. ### training() ### -In training mode, the network remembers all previous `rho` (number of time-steps) +In training mode, the network remembers all previous `seqlen` (number of time-steps) states. This is necessary for BPTT. ### evaluate() ### @@ -274,6 +325,31 @@ only the previous step is remembered. This is very efficient memory-wise, such that evaluation can be performed using potentially infinite-length sequence. + +### [hiddenState] getHiddenState(step, [input]) ### +Returns the stored hidden state. +For example, the hidden state `h[step]` would be returned where `h[step] = f(x[step], h[step-1])`. +The `input` is only required for `step=0` as it is used to initialize `h[0] = 0`. +See [encoder-decoder-coupling.lua](examples/encoder-decoder-coupling.lua) for an example. + + +### setHiddenState(step, hiddenState) +Set the hidden state of the RNN. +This is useful to implement encoder-decoder coupling to form sequence to sequence networks. +See [encoder-decoder-coupling.lua](examples/encoder-decoder-coupling.lua) for an example. + + +### getGradHiddenState(step, [input]) +Return stored gradient of the hidden state: `grad(h[t])` +The `input` is used to initialize the last step of the RNN with zeros. +See [encoder-decoder-coupling.lua](examples/encoder-decoder-coupling.lua) for an example. + + +### setGradHiddenState(step, gradHiddenState) +Set the stored grad hidden state for a specific time-`step`. +This is useful to implement encoder-decoder coupling to form sequence to sequence networks. +See [encoder-decoder-coupling.lua](examples/encoder-decoder-coupling.lua) for an example. + ### Decorate it with a Sequencer ### @@ -478,7 +554,6 @@ The `nn.GRU(inputSize, outputSize [,rho [,p [, mono]]])` constructor takes 3 arg * `outputSize` : a number specifying the size of the output; * `rho` : the maximum amount of backpropagation steps to take back in time. Limits the number of previous steps kept in memory. Defaults to 9999; * `p` : dropout probability for inner connections of GRUs. - * `mono` : Monotonic sample for dropouts inside GRUs. Only needed in a `TrimZero` + `BGRU`(p>0) situation. ![GRU](http://d3kbpzbmcynnmx.cloudfront.net/wp-content/uploads/2015/10/Screen-Shot-2015-10-23-at-10.36.51-AM.png) @@ -489,7 +564,7 @@ r[t] = σ(W[x->r]x[t] + W[s->r]s[t−1] + b[1->r]) (2) h[t] = tanh(W[x->h]x[t] + W[hr->c](s[t−1]r[t]) + b[1->h]) (3) s[t] = (1-z[t])h[t] + z[t]s[t-1] (4) ``` -where `W[s->q]` is the weight matrix from `s` to `q`, `t` indexes the time-step, `b[1->q]` are the biases leading into `q`, `σ()` is `Sigmoid`, `x[t]` is the input and `s[t]` is the output of the module (eq. 4). Note that unlike the [LSTM](#rnn.LSTM), the GRU has no cells. +where `W[s->q]` is the weight matrix from `s` to `q`, `t` indexes the time-step, `b[1->q]` are the biases leading into `q`, `σ()` is `Sigmoid`, `x[t]` is the input and `s[t]` is the output of the module (eq. 4). Note that unlike the [RecLSTM](#rnn.RecLSTM), the GRU has no cells. The GRU was benchmark on `PennTreeBank` dataset using [recurrent-language-model.lua](examples/recurrent-language-model.lua) script. It slightly outperfomed [FastLSTM](https://github.com/torch/rnn/blob/master/deprecated/README.md#rnn.FastLSTM) (deprecated), however, since LSTMs have more parameters than GRUs, @@ -666,10 +741,10 @@ training script for an example of its use. A extremely general container for implementing pretty much any type of recurrence. ```lua -rnn = nn.Recurrence(recurrentModule, outputSize, nInputDim, [rho]) +rnn = nn.Recurrence(stepmodule, outputSize, nInputDim, [rho]) ``` -`Recurrence` manages a single `recurrentModule`, which should +`Recurrence` manages a single `stepmodule`, which should output a Tensor or table : `output(t)` given an input table : `{input(t), output(t-1)}`. Using a mix of `Recursor` (say, via `Sequencer`) with `Recurrence`, one can implement @@ -760,6 +835,32 @@ local rnn = nn.Sequential() This abstract class implements a light interface shared by subclasses like : `Sequencer`, `Repeater`, `RecurrentAttention`, `BiSequencer` and so on. + +### remember([mode]) ### +When `mode='neither'` (the default behavior of the class), the Sequencer will additionally call [forget](#nn.AbstractRecurrent.forget) before each call to `forward`. +When `mode='both'` (the default when calling this function), the Sequencer will never call [forget](#nn.AbstractRecurrent.forget). +In which case, it is up to the user to call `forget` between independent sequences. +This behavior is only applicable to decorated AbstractRecurrent `modules`. +Accepted values for argument `mode` are as follows : + + * 'eval' only affects evaluation (recommended for RNNs) + * 'train' only affects training + * 'neither' affects neither training nor evaluation (default behavior of the class) + * 'both' affects both training and evaluation (recommended for LSTMs) + + +### [bool] hasMemory() + +Returns true if the instance has memory. +See [remember()](#rnn.AbstractSequencer.remember) for details. + + +### setZeroMask(zeroMask) + +Expects a `seqlen x batchsize` `zeroMask`. +The `zeroMask` is then passed to `seqlen` criterions by indexing `zeroMask[step]`. +When `zeroMask=false`, the zero-masking is disabled. + ## Sequencer ## @@ -770,11 +871,13 @@ to be applied from left to right, on each element of the input sequence. seq = nn.Sequencer(module) ``` -This Module is a kind of [decorator](http://en.wikipedia.org/wiki/Decorator_pattern) +The `Sequencer` is a kind of [decorator](http://en.wikipedia.org/wiki/Decorator_pattern) used to abstract away the intricacies of `AbstractRecurrent` modules. While an `AbstractRecurrent` instance requires that a sequence to be presented one input at a time, each with its own call to `forward` (and `backward`), the `Sequencer` forwards an `input` sequence (a table) into an `output` sequence (a table of the same length). -It also takes care of calling `forget` on AbstractRecurrent instances. +It also takes care of calling `forget` on `AbstractRecurrent` instances. + +The `Sequencer` inherits [AbstractSequencer](#rnn.AbstractSequencer) ### Input/Output Format @@ -866,7 +969,7 @@ Accepted values for argument `mode` are as follows : * 'both' affects both training and evaluation (recommended for LSTMs) ### forget() ### -Calls the decorated AbstractRecurrent module's `forget` method. +Calls the decorated `AbstractRecurrent` module's `forget` method. ## SeqLSTM ## @@ -1129,53 +1232,85 @@ A complete implementation of Ref. A is available [here](examples/recurrent-visua ## MaskZero ## -This module zeroes the `output` rows of the decorated module -for commensurate `input` rows which are tensors of zeros. + +This module implements *zero-masking*. +Zero-masking implements the zeroing specific rows/samples of a module's `output` and `gradInput` states. +Zero-masking is used for efficiently processing variable length sequences. ```lua -mz = nn.MaskZero(module, nInputDim) +mz = nn.MaskZero(module, [v1, maskinput, maskoutput]) ``` -The `output` Tensor (or table thereof) of the decorated `module` -will have each row (samples) zeroed when the commensurate row of the `input` -is a tensor of zeros. +This module zeroes the `output` and `gradOutput` rows of the decorated `module` where + * the commensurate row of the `input` is a tensor of zeros (version 1 with `v1=true`); or + * the commensurate element of the `zeroMask` tensor is 1 (version 2 with `v1=false`, the default). -The `nInputDim` argument must specify the number of non-batch dims -in the first Tensor of the `input`. In the case of an `input` table, -the first Tensor is the first one encountered when doing a depth-first search. +Version 2 (the default), requires that [`setZeroMask(zeroMask)`](#rnn.MaskZero.setZeroMask) +be called beforehand. The `zeroMask` must be a `torch.ByteTensor` or `torch.CudaByteTensor` of size `batchsize`. -This decorator makes it possible to pad sequences with different lengths in the same batch with zero vectors. +At a given time-step `t`, a sample `i` is masked when: + * the `input[i]` is a row of zeros (version 1) where `input` is a batched time-step; or + * the `zeroMask[{t,i}] = 1` (version 2). + +When a sample time-step is masked, the hidden state is effectively reset (that is, forgotten) for the next non-mask time-step. +In other words, it is possible seperate unrelated sequences with a masked element. + +When `maskoutput=true` (the default), `output` and `gradOutput` are zero-masked. +When `maskinput=true` (not the default), `input` and `gradInput` aere zero-masked. -Caveat: `MaskZero` not guarantee that the `output` and `gradInput` tensors of the internal modules -of the decorated `module` will be zeroed as well when the `input` is zero as well. -`MaskZero` only affects the immediate `gradInput` and `output` of the module that it encapsulates. +Zero-masking only supports batch mode. + +Caveat: `MaskZero` does not guarantee that the `output` and `gradOutput` tensors of the internal modules +of the decorated `module` will be zeroed. +`MaskZero` only affects the immediate `gradOutput` and `output` of the module that it encapsulates. However, for most modules, the gradient update for that time-step will be zero because backpropagating a gradient of zeros will typically yield zeros all the way to the input. -In this respect, modules to avoid in encapsulating inside a `MaskZero` are `AbsractRecurrent` +In this respect, modules that shouldn't be encapsulated inside a `MaskZero` are `AbsractRecurrent` instances as the flow of gradients between different time-steps internally. Instead, call the [AbstractRecurrent.maskZero](#rnn.AbstractRecurrent.maskZero) method -to encapsulate the internal `recurrentModule`. - - -## TrimZero ## - -WARNING : only use this module if your input contains lots of zeros. -In almost all cases, [`MaskZero`](#rnn.MaskZero) will be faster, especially with CUDA. +to encapsulate the internal `stepmodule`. -Ref. A : [TrimZero: A Torch Recurrent Module for Efficient Natural Language Processing](https://bi.snu.ac.kr/Publications/Conferences/Domestic/KIIS2016S_JHKim.pdf) +See the [noise-contrastive-estimate.lua](examples/noise-contrastive-estimate.lua) script for an example implementation of version 2 zero-masking. +See the [simple-bisequencer-network-variable.lua](examples/simple-bisequencer-network-variable.lua) script for an example implementation of version 1 zero-masking. -The usage is the same with `MaskZero`. + +### setZeroMask(zeroMask) ## +Set the `zeroMask` of the `MaskZero` module (required for version 2 forwards). +For example, ```lua -mz = nn.TrimZero(module, nInputDim) +batchsize = 3 +inputsize, outputsize = 2, 1 +-- an nn.Linear module decorated with MaskZero (version 2) +module = nn.MaskZero(nn.Linear(inputsize, outputsize)) +-- zero-mask the second sample/row +zeroMask = torch.ByteTensor(batchsize):zero() +zeroMask[2] = 1 +module:setZeroMask(zeroMask) +-- forward +input = torch.randn(batchsize, inputsize) +output = module:forward(input) +print(output) + 0.6597 + 0.0000 + 0.8170 +[torch.DoubleTensor of size 3x1] +``` +The `output` is indeed zeroed for the second sample (`zeroMask[2] = 1`). +The `gradInput` would also be zeroed in the same way because the `gradOutput` would be zeroed: +```lua +gradOutput = torch.randn(batchsize, outputsize) +gradInput = module:backward(input, gradOutput) +print(gradInput) + 0.8187 0.0534 + 0.0000 0.0000 + 0.1742 0.0114 +[torch.DoubleTensor of size 3x2] ``` -The only difference from `MaskZero` is that it reduces computational costs by varying a batch size, if any, for the case that varying lengths are provided in the input. -Notice that when the lengths are consistent, `MaskZero` will be faster, because `TrimZero` has an operational cost. - -In short, the result is the same with `MaskZero`'s, however, `TrimZero` is faster than `MaskZero` only when sentence lengths is costly vary. +For `Container` modules, a call to `setZeroMask()` is propagated to all component modules that expect a `zeroMask`. -In practice, e.g. language model, `TrimZero` is expected to be faster than `MaskZero` about 30%. (You can test with it using `test/test_trimzero.lua`.) +When `zeroMask=false`, the zero-masking is disabled. ## LookupTableMaskZero ## @@ -1189,6 +1324,8 @@ The `output` Tensor will have each row zeroed when the commensurate row of the ` This lookup table makes it possible to pad sequences with different lengths in the same batch with zero vectors. +Note that this module ignores version 2 zero-masking, and therefore expects inputs to be zeros where needed. + ## MaskZeroCriterion ## @@ -1285,6 +1422,28 @@ print(output) The module doesn't support CUDA. + +## AbstractSequencerCriterion ## + +```lua +asc = nn.AbstractSequencerCriterion(stepcriterion, [sizeAverage]) +``` + +Similar to the `stepmodule` passed to the [AbstractRecurrent](#rnn.AbstractRecurrent) constructor, +the `stepcriterion` is internally cloned for each time-step. +Unlike the `stepmodule` the `stepcriterion` never has any parameters to share. + + +### [criterion] getStepCriterion(step) + +Returns a `criterion` clone of the `stepcriterion` (stored in `self.clones[1]`) for a specific time-`step`. + + +### setZeroMask(zeroMask) + +Expects a `seqlen x batchsize` `zeroMask`. +The `zeroMask` is then passed to `seqlen` criterions by indexing `zeroMask[step]`. +When `zeroMask=false`, the zero-masking is disabled. ## SequencerCriterion ## @@ -1322,7 +1481,7 @@ which are repeatedly presented with the same target. ## Module ## The Module interface has been further extended with methods that facilitate -stochastic gradient descent like [updateGradParameters](#nn.Module.updageGradParameters) (i.e. momentum learning), +stochastic gradient descent like [updateGradParameters](#nn.Module.updageGradParameters) (for momentum learning), [weightDecay](#nn.Module.weightDecay), [maxParamNorm](#nn.Module.maxParamNorm) (for regularization), and so on. diff --git a/doc/image/zeroMask.png b/doc/image/zeroMask.png new file mode 100644 index 0000000000000000000000000000000000000000..7ef8d7014d5e86dc95cb2459c02e476454734966 GIT binary patch literal 44827 zcmeFad03L^+c&OJ(JD<%S(8dlQ%;#nV`jP0GVQ6A8kq`}sinE)f`~v)nKf26ZIo+e zNwS;o{XPBR?E&|7-`9C<=XqY| z=X0H0Iq0@~{+u7?7#JAL-@E76!v+SkVBp`eS>P82^(~Xb;J@iX+g!I97(7p%J9%OT z_}MV(@b2vfg=~{S@WTrKJ%?Ql4C2=q7@WUoV4wlNIzM1w5MyItFm&9&z~P>OfpJ7e z)gdSF#70zb`;*y9~#U|_Xs>d!DP7ZD3SYOwd$ZJu!*Qht4;_`_YR zV@q;={8Taj&6%Qk6)T}((_J^Ov^ePY#Aiv<-22-WQFfYtHl$ZgtAXyhvVXAO`0N1_n)FBQ=HwTM-5Z15g8l zZ#Tbn!nc|5KWRDGOU^;hMrx*4^s#B;H9PZ-NUG6>l#>}#U-~1u9)6#c&nBsQy!O{55)8ry#vsS0 z?K|w}+i`5#F^yqGcM<%F&(w2I3azEJ8fVO!(NcMXc`Tu+;EGUBCOS0>hEV}N} z_wN_hZux%1jf&yQMR%9t-?y8a{L6)yDj*S7M0Q7+uiv1#UF^R3UoLtx2JM-4t8Cs5 zkEE*tYtHL>GmnvgM$_oi>jK~_aTLSA%P7^4t@H(zjjxA`7ukBf9mosr%6Awk5*ayc zyi=5V!QqXk3ql7ok1tZ+ppYTcPiS$ZrC$3u00BBnzpZf_uH)C^UEHQ7;nd{ zewEp7n3TLeu=tm%{2$^dZq<^?3r4QFl2xbN?P}&V*P~9w8~>=jzmyPrGH=c@zUO== zFLRWtb6axWVbym<Ml0Wm_%J}w_1M=;kT^7A+bo=1B zGAEsqNFI7!pXE5vL)jYmw+x9<7YJ>O=} zx5f2M_xwMonl%U8k?2_OdY3maqSF`qRw&U*7NUUKFJ?t{aQ2SX%X~9ZC!vG!jtlQaVidT zzslrPMiHzYtPwu|l^vb3jQocSFQuvMY*8o7-hDNy&R%jW061GVr0abIB&Hf7dfJh| zkNho{533SEah|2Fr_HFloj0ptL96My6HiX<_z3I#W1aNQ9h>-e52dUKNz8}G!G~96 zC6vk+WnK6=0DkY^E&AXNM8xiO9(MIpVp3TjVbw><2j z)t&sb0E)m*)s7y59)S>E-=eA3b^j&RzglRQHLWm-Zlo+9uijAxNLBs^LE~%UbT#$F ztDgxJfscwWsUviW-V;As7a3I?e)6jO|aF#8mD?cQrHmQ5%N$WcP4YMnk`~SMhxrRpp45 zG3M`npRXAy3vRXI8sCxHUM-i|^jqVPeCQz$`5}R@(qnD8j)IiF2z>Yw=`t+C$%7_E zH!cy=<-A`yIJ!P*?iM|X)Tom#h%M{Pfbx8UmB;NdCJ1~xY*VrYvFyopU;tSXZ&Odx zBbFdg?U-cmag@1Fb+w&B*1LHw?_qWHt|kgo7l=oL-5`kzb=Rr-o|RRj=MDLGsY$O! zixcRqy0wJArqa6h1>`2!Q+iOWvUy9>Q+!p4(#~8}w>WAOE9&9i&0c7FBuSL7ZalF* z8J07iAJ645r6%zJRsuz_%^$a(WHdGk9a%vL233MFQ(LqRjx{>ePvrTO)T(&LsX&pr z70#cVFFLD>b@>pvF?poc0w~;aHb@Xk&+*8U= zF$mjMV&0`_d4zt$>!}G3N@)&CY3(j80(xWl)C69;7cDR>&66>+qUtKHqgiivH8tgg z#PQWVz01|FU7f1kTCPfIEoY);Vx?MJnFWAPW>g(?BQ^H_BAM)se>P#CmlE4m4-DEs zsTBd?MnKCIWn`F<5%+-h1-soxB-}{;oRt}qQDm>(s@}I5HHTlS^~W9>n_%oG=p_gf0yiUV6@SzFICM9G~z|7Eq&VIy zzT%_&ZQ~_Ns#+ZboQ>fv*rQ86W!*K1i z(+yibnT0J}Tr6q0`ZRl=&wvJV8O1qy|>dkftoeToXE|VOIdo*A>YfcJ$O?VW( zinqU-US{9WP)*sC+|8%=8?&ao@2VZE=iNDbc?3w@X?}v&Cu>B=D$-2ghcV1!ol1|J zuADWpk!}3?i4^&(0h6(ZC%#8`$r&Bgb`CJx9p_g5s6iEzknyolgKd61HKqt z^;zAg=-J6j&aq^^`*|xvUi=&o-eM*el&qy?t}^A`dk`Q;t)aiNg-8oKoW#!^<`0|1 z5_MTR#7`Up$0Pg_X;Mp861zk2E20{9Qs3fy2=kE~z8F#!{!DLj@=t-gfkb|7Eg-nU z%NXz{GADE#M*V>_!`L?ItL27m!sO?Bw@ghM1`=mSmqFc9dmI)QSQSp@?WBv><{m0k z#nPRePogq=Hqi%0N^?@#JC~5*ag{$06h$qp(yt6`=hc5cs#Yz&MghFE$^D%fXu%U& z?gZnDh#iEkkN2Lsa}NZC(zp+62XpJdSf~HQ-@5xr$Qu^P4!bI@$oq=gC}+? zIE4VmFK3^f7s86%A=3_HbEzjj&@Md}>lCdl{eay+iR)r@o#Q!nt~b>Dr5_de83}mJ z{p8ZB^Jc9gmLAJcH?VCvk($Kd(qmchm{?3bJHVtwLL%)2PEIlbFGcoHfod|r55G`` zQ@jGZ9!OM(m(Y9dTD^Jpe7k;|8-Jkkt_&7F*WoS?8r`w+ELwR<4lR7jz7-L>!BI@R zYmCN{1`N5$F-~z;Tc%&yU5~tA8-qORy`)P8*jm+Tlrc|oe$;j%S z#tHU|T-jqGtx9Rm-6@;otbySN;+1g6=t*wK3!HTDY;_&P_6Ye(23hrJZ`bNqcC#@ zRRM!>VIMJO2{eifXXZ$}>a7yb9|e%o^=DDjw?x~jU7k1FbNB&hKW)m;xDp%6^7VJ& z&Ft1*&pxD^+HwCmT5gZvak$FyQ&B;yoYAe+l*rFws2d!PE)-gYuPI?UU#;)L7)>d5 zIS17crq}(JLg-4DEN^Ay^#(G=A4WXXB=W_5b)7^RwC9eNY;+tm;^jCu9fK%w-Kw|^ zWU42YlO}-Z)T_G@RuUQu^+yg-&aV0{`Ou^% zLmf;DM1n)i%3RVo;qF}&e!!mG%iAD-#7tNsf}4@yo9(I_b;HPCnNBrLDcBxY^&oCwG+@ohzqVINzp&h(MY-*}oVK&Rg$#Yw(8v?s?14<@QdH9cteT=;l=3D@b2 zM)S#r;1~0IHbs5+>rxlRxEsG2666?J0e-vvCgZv{Wyi>!Osrz5idRuDkj2rhYki-9PcAKCjHuLv z6{p()H6+IT*=f|XO7X}py(cESF1)yZJzLCt_AU*d@u86SVq>{5afOKu6V3C ztDzRTMU#kE7G+?@skFu5TzU|oFNJSr-95;%8f7jW4tseBNTdqUO}^VirP?u^ zjZqmD$&$w{)o$(aRPPKob2rIe8%lH^9vNvptC9nY0Iv#Q(|9yV)O3D9i44W+`a7v;X!*!TV23&Y*t~_4{v}z zo50jRbs1qKV~nHZ=Z7nP20jHdV^{+L;3QEDcOK#`NZ4rHIuSNoYBOY?zlqeyn*dgI zE)_)=l$-8fH1bTeJ}}eFJh0u``j#A>$%-OYgvq5ZG# z&3{3_+O3UmBMUJ}(fZ1&Er0o{Ici8URU6PMD2QqO>{lpM>2=p;w1XQ5mF=Z^uCy%P zJ1eMLdg6nb>8`J~((gHv%IR02wKYKZfzUEudEl)oxvCiKne5~l_%e#XsPRWTz>O1# z$AyQ@stzNJ_vB7IB`(5O9^)5NuOCvW^6XQ%sxZCeqI8G5YJOPQZ7L%c%N4%D z&8|XnG-LOknY|lP2Xta@vBm=A{U{mC^1z~ysK~+{ceSIxOz6Bs?dQuzmW^h;jVx64 z-xS3B9-m;=buyG@6~sRubo*bd{4nT+Gxf?H5BP&?5JGe?OOd3ednkYCV&cks+J`DS zVlEx>o7X!!-Is9iFrtZg-n1@DLLzjHdNMlo^=ICI=71;S_C;M4Oekk4jd(77ysMwb zfnSQ$kW4hXsoIm_Qb)03$Go1JpLz`?)Lw)!0hpd(Go`EXO!JY+lFz==ns$d{$++B#)K>^OfboD*Cbuhl5TJ4cCljy1SfClE}shB7VN`19;9 zJ&%u`vJU{b{fzfQWTR+FNUmDk9dPl2T7{X(MsoPp(A1bx;zV31dV1WT%Pc(cj37_kj#w(@VoB{re=v{vSr(gJVA_rmC*o zpbHccND954a5@q}sT!~6sy!~ml$H_wekqody!3y@#J}iz)1D}g`M>L$f`V{?U`)L& z7I<~o{{7T&`=g~0!LnHQ_CQ zo-$Bc($)O&e}+>WV|mg|&8C5H*E={%?D79ilfEG<-+-rYxZF2f z?i+0R4b}YydjDVM1nlK7Bq3`sFMs@Ba4M|9E!j(v+tHI_YlE^Q{^g=~jz?<}UnezY zK6^dj!FK8Xmx~TJ#@w!g7}R(S{!b_PtvH3SnH%!*-}x+G=q>*(spY>JfD=Gs=dw#6 z!-RpRbRw0jxWJz(bY%{`>$o5_C>Fat7{A~SC$?a?Ya!r%2%7S5E87Xs?`5_iK zfR6Ezw_RU2As|D>kkM`(068ZfhMsDq{)~PUL zv5zJB#0Yw{ZgW9$q_N{mk;O%t#Pl%;#%%o* zmNuvI!&O4VyaA}nc`5>tVotlENpw8i$IL56NjV1)5kyLKvl4>2Zgae_nRee$TZ3x^ z0}vm2-dQ{*bY1tXqzyg=G1OLAXZAY3Xi$b#CK=+BTRK+*2Y1UoVZS2O5IP#MDK^gd z1X#%FnCgP&RlL104IAmeWVM)I&or_lKG!(8Bc1AXs zGXe$Nlr{VWWo%UuOj}ddz=+$TO;+%C7!f-@>f->rA*}YAv&`~jQnjC|tS~vd(d5!;Q56({X0n1@aHC-F7z9pJ>?lAx9Dwg+9!GI47jm1fbse`0i~(@C)uE0mS2xs2#z^1ehe zsjbysRwWx+Z^CJ77+=RvW_pr8(=$7qysT7Lb(Kq|r9qpN7SiIB&|;lx!jy}984_52 zFUEu>=5T)QuTvH|_&H0nQR(_>zf9N&uQXt)F*H^-Mi)=$7^Lk$aj3L87+*>v~#+S1K>7BU2}ON6{Yu zwEJwDs+@Y$WchCmedz2TYT3H@4e4BPF8`B&DCDZWJCw=7%bGqqgD|Jf9F9Y~Kzz5&u)IsP_XSy`D^}~B5HZGI!K2eBfx1|y(Z$K)9ga;tRTJ^ZNRuihO zruNd!4D{TzG;S~X)jAHfj;|JE(3w-m@7Vqoo^6>d*@eVLFNp)6R#%I## z#(0IznN8r#R2A^Oap;4Cg{stkD62XYQSJpiA>&VdeA=#{t(~mIg%_sub;F}oT$P-~ z4DeyaS4;d3YPUu``@$n@`Xx%T1eW@XQRc8FdN0};evruf5l=A_e%x4ToopQ|vOdD^ z3>{X`XEthU9x0qcgrzNWuyjd-5N~IB&g_qCf?07= zowmvr+6aW$Lao{kYHOVTUI83$3J)_UL&Mh&E~GfrHwMu+D&svq;q&7YJnY2G$ZqMz zz3GJGk&`euxcJgyKoUb<9(>OsRbaED^ihi9Kxhg>JegciBM_gj?LPK#_c-A=<~gex z(}VKmrpwWMO`a=gh6x6+c%Nd2uzAFkuHf814-JN-D>h6)NDPj^q6I1ltNlEUQ$aK)F~7!v^uS6HusUsOOVT_4CY|BRkhwk zOWZ?uv&4iL?h{`r`8RHzBkdedy~8wpKaikhW?N}o%SW??Yh2Tc^cq(Z=43m|)Ma5M z(XU{Mh$a8gBKFJ|tLq#26W=c-pdCO)VR<=HD8QuS+7hg9ASn>z_!q zzR8A~u_LJ|h?p$1CfH9u1G(%u9p3{Vv+d9f3`?WkE4dhQ2oBO}ER-|!@w!M+d>h)* znpvg3Urm5qU3;RPMWj$-uxBt<8V)DM4{>>DWY>M*y-sXz*EnlZbG*P7>JEgcw~#xk zG24`oh8HbU>_S}fQNPKQp`IUHh%qCdxhG>sWG_!mjj`h6>mHyAAM=N2@pA3fpwOov zi_qzaYJaA4sYAg{GR=jk8|p4nZ%%SP@Em?Llh}zBrlEb?NN8ofApHD>-1WdUjQ=K* zeB#U-MUn?slM7^Gd=G;|+VA~48Zq?g{spo{zK$NIFOvOZaeV2dKCO<^eL6+a`2=?X z5Inv;oOVW7PG^Sgc7TzrE@VSJ{ahQfgMeqAwX_@d?+FKmV`n|QMCxpmbV4NZ4AHvO z-Hxyv^8ZD<6|bO|Q--RMy`$Sv+FmN59hh&EcRL_l=*#Hzl@^dlv7(oiPOxn0U z@7?Bp7+F=M(Ci2kv6#oy>=%u%=*;RM=j0PpcA)W9=!gSbTQfoFAOQOiHm5A(ag`PP z9U%E3+!5r43_vfDw9)#+zAD}#UsQ)^ZV=%AAgk6rt`sh}ZAvH`emP>4L#hR3;Vuv5 z&>w)9_+Q5NX`@D5i{#WIlXL7sgLqMGAuXAhUG8xWXRGWhw=k}f;Y+bela-QefGf@h zKZGQ8S{3f&4??XR4YXTt7RenkmX5-!z-Y^LE-X5H&qjGCw@N$xv-;#9^swm{@1zQI znIox!j6a8;&T>#j79!@ z2~-eo?1^YH`A5}4ZO!vBGWWcgG?&22{BXmQkpmB3y2+7or))PO0*_T^794q-9uSVE zW+0P;QVIp`PvCJ0=YzcHRdDpuig?6B!iouwpo7KA%358)%-P#p1dq!vbkb@s7VvrX zgBasXe|CR5Tf4&<@c=$vn5G1&I@#;)srBDCWai6$*EI{SDR0P=3CFQ^7sa8c+oJMI zwhPwdaOlKVV73GNXqE3qC1eLQuBCHXJYixJ=$?H7n+bFW;>o}wfdnc`Ky$PKtloQc zZrSS7sQF>x>lLl$gfU3I(cgCaDVlj19a|RI!6*y8~h+{v|~)g9F?6Rc~H;DxlT^_`Y?G+0>ug6n5KT^r@$%(V znH7*(=d4>p>Yw>zX6!kvOIxiLT2-Fur?PrNc(CQ@6fZDFzQEdAPQ-g*rlo89mfYCL z;Rgi~tN6bYx~@!BN?LtZ==uP@lm_2SY=5e71`O2}W++wt?~SQJJyr120+bIel_gv@ z{Gv#R?d-kQpMG2PCW5!Ntxr3fcFgUc>_SgppH@qas`Eix*EdLXm_>Q?Kmi)5V5ckw zTKzhe5D*$Ie~*IeCm-bC|L{WCpb!yNbqDB2Fd&-)Z*Ps>(h8er3G7xD`NqboHTmkk zkpsSJhzJoAcD~Bbi$OF`D5ByORYQf1@Z@^UIv&O2sX}i3gg2?!NED<0Hus=LL!ksh z*3jAeN@bDBM+mtpkZrwSBg#Ex{5%Mh4-tRj1PxkzE-2zQ{+v~K=~Z2p&@BJ~SB{#3 z5Ty>{bjGaO)S9c}n!@)xq-z1)MNQ($00e7m(`H0B%Fp)$KOlEv#!OL`l&4McxUDUh zpUHZ&xqM~A8|cyNynrIXnnGSBXn{8pr?=Uj;-9X}?sh;>d*` znf02A8YvqoIS)EY@!`!mrO&5kxnfXKCQ-j8S_XF}-_^s)S5(rJEaz`9K>mQ<2HV$Pm_Rm(Lx8T&O2hu$s7i)dd|eMY%N`RJX=l)zFxAnKQ&hu z(AlO?XB@}7#RegsIhys5lwZ6zK7BE<;BK<~*dVlpb_32w`#n38U36v@h?S$I1^mg; zPYO0|)P=~cEydfpY7n>5WWhojZQr>#`)wjYy;9=X8TY*E6WX2i{2{p2lr4G$-~101 z{ZuZRYvhjLE5#q>`#9qCO?-MqW2%J^oMB@rh^dxO5ilB}+Qq%+qOg4*bJ3`TTzCb} za%fWXo92Y*M7hHlgcQD*J_%Z=XA(HyIa+lt&g^;B4YpOacEd`hB{8kBd6fjT6N#Bf&$ z?T~fnkuNWis}LF=#o>n0h!+G;YTuADhoNeGr&oHhB-)uvJ$j0WCd4y!qt*L-Eq_NE zvvmc049OaUpJO5AFtn~Wk?8qu z%}{^a!-G2S> z5ZwdZu4hM=!q1kg1;kLI`piCJshp5OVeC35jM!b>v75Bf>$Tx^`gSv7(u+O$YZ4kh zqoCRJ?U&YlIRyD{5$fR^!J8!fi^Q(9cMPLrD%D$DF={eO5okwOVZ;B1He5^B-5mE3I>r&bUj zNj5A-e>t={6(S$5ITWJSugSrI5u5+;66ke@>D`vpAb;}D(%(-i~b#P{{p~I!R{bseQvhn zi7Upek`#I_XldHIQ)7g`b2wBL1zkZAK#!$L!hqeyZM5ID)CM`iz4}*L z3Z!cN7uaVfiJ^XRF17y+h{CMXr5*Y!mBjn3)$tRcJ2X(Fb~!3*?p%7J#us#B{!0Jg z*IxXqH4AiR{8R#Hy>r^~RrBsq$3u4gmC?cicXyzVibn2)15;2$^_=&1cgb^6x$ZzKP0+J9T~|AQ6Yy`Q+^;L>>fIm+%F?tOc` z8w=3DsPtm;4_(F1k(8{EY%B2828#C+qbxaZ4z?ByeGaw7{Gt14YKOUz(0+?%z4x%6 z*?MgC##IMyhA3k)9PQc39?m~7*uGjvtxBgEfNC~#UHrh4_|->N|Klx^FK=({n%GG-*^%qvBk^(Pk!0@eeK4{dy5IL)~hHzXT-!Yd9KT|XX4fc}2 zGmmk9meK7(3_s_a8Oyf$G{^FQhh4Vs3A;1sVBJsp`Fxuf^=6tx^gM60iYE>0^dxfP4k*Q7*3)JhLRFQI*r_G=QEErs$Eh0C-xHHNx12?!hFQ~0MhAj9BF6p5^aD zY#|%X74sOX>*yHs z>wHsnY`Vj`Em8^V8je(SClB$dpp+I(Bq1WTx96E9Z?*Tb4gWC~GB?_Z7+ zEDDe;9!=iNBfWAIh{Scre)~g{C_OQUnQ?bip}o9Gy4huua_L=KdRZi5@`9t;BA0yl z6Err>&-a%Uk4D>1*)r(R+zDS+{5D^_kIqP3uj-31^(ztA-Jj%;O;Z&yhb+^tiBCo? z_f!c)Hd92Bp$)I&QxWzR4X+*D>6#uFj$Rx6MwH%4^ova`O;_HZm*dJQSJ+JudKH&| zzCoY6K&N&qW3eCe(^2$0Y1OBR0}3R=-Ga?oB`^2Zo73z3R@#e7((7;YWOdI`1ia^F zC$GjL8vD#pi&ukQ&R*%L<>W2Lj}UYlpm-HIvR>zrLBaJojg2L=fXDtm!7^x)iO9%8 zH*O^7sz2|3E+5O+-yeToPZ&m_R~mE2f@3)=I%$}8|M;`#>r>W#a=Q5`THk#Z<2$2q zKfAhE(_W72b;mNI0 zhqF_5-t^qTc2n=WCnrdtR&SoM`LKg{ ztxsG()A^*Uw<;Sv1zO%o)`VH*z+-9IOj~3-@p7>bDh~w{*W4s&#|wBV6tk*sMU1ch zjJElH9+lmD!%;v-#I}bYW1A6Df0Y;2jeRl@&pnQvJjq49tg{x+pbMyJ%Dx^42k9s> zTj!f~eM0J(e21ala$t%n6hH6z8N=%-V?f_BzR9|d>b-$3>8!9ap6ZOGZ!kMH=(O`S zBEItY)BKK8*))4NT+TtZ#V6_8U>igFiaH2kLguCC64GaxCj;g~D&ipu+>sH*2uq4u z8`kQgjrkP+n+hWBt%L^?KY4J|`2-(3$B~4Z?pt^9<)JNwFzaXb;zsVBX9S?CES+VveOo913scQ3JtxY?4(sWg7`gjO^uNm%%^5KJIlH7=d z^-xGhSQu5Y;uWvXgHMc2yOGSE9CoUSK{WY>)N8R382ybBi_ZY3X9ME;@z(hr^8zEy zPJy}{PgS}4N5!{P0il?^)f-|)P0bul(0^d%lVq-Jd+Rj$;ixd2^xbJIE(1XRqW5^*^ zSk{u$OR^Oy0i^JqaqB@$jEo)Cj(vVzSZtmNB<2yf%bD-q1tH98G!BswNy}N!;8@)?Kqk4%fSX`7YMjTH2T`o7 z-N$(_?bhO>u>$vB8U--l1mD3)x9c3q=$)4G6#YUP{HWjA{7^@a%+VONo~nC5dk!Q< zy*q5aBpt0YZq0($5rWdfyGdi%^rMdqm^-=BE*(c zo6Sak=)1}gl}>=`!S7bAZe05+9^72-hQ<+~JPPw(IeEfcpWK@Bh#lj}*W_o)TVV;R z%|%wq4GJPw-1I{n>v#No*=9*z@g%3d zm*=Gtzj!Ty7Amrsx+C3&tL2##{X;sIwh%9e4&gFGDMNje_(UKv9LZK{6|$R}VAehi zD#uzM?b0~{2^2!p)_A?QJT7#qKI*JFxlQO;lG8Yk6}lJV7Z*XELs*+`DCy!8FcHS1it6hEE=ip@kkDw_9HoMA zxs!e%0Epx&E5}I0W2kwdF(Y0&s`iq0)JK{;(AT^`%bLtDA5xWAltNKzCfH3u0&@r2Lsv^1t)V3a8_z-&Nt4? zMXgNdzP{U#&3121AYP5HI)i*m=<*SGybq^0_v>z|-8lLt{v9Cr;lT9({V(*Ei7dxQ zVyJ_#qtcBnemn6KXuSr6k@vNy1dW!C;br} zDE?blM8uYtE|ruW54qB|Xizf754v&SOqG(R{@8z(0}BTj?LoKmHKFbf#Fn8z#4thM ze>*fU(UDIy$1G} zW_FsUSu{DkVr+QkZxbxpZ&6ONLDvyca`~CVd~9rn2ud4m&ZbXwHDv~+a*>vg$ZO!8>_|YugMk&~#-EhYvEcnvaSCFUWH_Kx~ zu#O2po^~|Cva>Ta4UtJSzpq|ISbl=$s|ZB3g>LhBry+Heqz)`+2V7IKqJn(Gx^VpbII>f2%B`jYJ;}5F+D0i?+I3quuH!3=pyG-1(C_ zi#KL_V??VzVa?9$K`a>4U4$nKuitr{Er(J9IY-S%Rnn3A-V2pJtlHeF>Kd0Zh;Y{@ z3+`?Y;l!C8nQ*3)%&8m^K8~H#LPhqtpZ3Mb+r3eR7sYUJ7XTye*AX6E-@uZP)94>! znSIKV0+fsl;AGD>_ssD|Rw!QAi_e&$bce6JQf=(&9=ZkQ(h`-Y1n?@4PE$j|$ribs zgsfE`wX=!G>c~RaPf8Jxs2OSO?=6Xzq@vA`*NG~Kh=mqInb)473uz6}rOq@Wn$8a9 zdcV_S5l;deoQEgI_lBbu;xctUOLE7&c!1Pzm}tuUY*vKawyau6bOsy<<< zxLr0qbFigzE!m1;vg0z?r>un}2Ho1v@_TYa=@zKyNFZlwc)O|$muu!e28r%Wcttg& zd9=p@WDcI+uzG1%wQliXajO_o|BA#{&jxu>ZsLPLoI4_7!%ISCn&Xaab!$0zOK6Mb zz}7dSH#Hs-JZGocdRb=+Ly7lCz!XwC>Bt$zD|sJ-{wRNDT+x<~G(~V-UwFP)^2j*# zn0)yQZ~_wA7+c#iv2hb~=c($2L4CuX+K^^#0g<%)U{F+MOnJ*I(LD$Am2!?C<~uSr zcyR2464*H*zpDw(ZirVxM6W_w_+0qFJe$1fkjbjwjm3)Ugiw}~Hpd5~H&nt&%~Ld- z)I$w~(5tsl^S+}|bsnYoO=BhR%304!p1)HYsDgPAC=7j*?*PX+dY`ejo+aWw1clwP{sXSrhK*g%k@`&Z5l9-+% z`_wcR@%pn^L1NmoP_dV_WhS~Kj!QNB;-#jQjelBjR+dJBc<`~aQ3bu6LHAoSD8-R- zV~v;0PhU12a@Hj}hQQEhN9)r;rzPC7Bc_Cj+hd!AA+OT4EHihy;30<^`Lu|yS?l>Q z6T*J19AkNHqgT4Ej>#jEPk;1dIb4(KsZWx^yv{Q zAE8N9^*yfT;W$iu-o*OW{V~1Bj6*56ub!M< zK5eM4Z2YP!7`q=vl8#_>G-(NY308|8GgAk01aVo=1Wb}KHZO}!kg z8wDPx(Ey?*_Gy0Apzp1EEtsJYn58^kOeZcRqgF1b!`r0F+V6cb%sl0e_M$AgY}tz^ z7N@`@hW++*6B>zM%YW?%A7Jh7DUx45Q>Gt)8l}{C$2+jE^rt!SXINs|edDcSRONWW{S3V)S3fzYPAv?7@@49O4>>!8by2uN2+Z|i zDhMBqK8Wy%jk7rn9wd21Y=(`E91_YVCT|WD>P9DZW!w~+i#?S}-Y33Oq~C`Sc=Agd z_gah1FK;4?E?Kq6dFEXrvE~PFCGNdi;EAElms-v$Oz@P>EIEp1_ArT|@_IihxP0GQ ze15J{E*eUwiUas=^&DOke=W8hW3AjB+$JpEgw%~hzAMuew)f;Z8v^CZb1|Ejae_Cb zWAywB`DVT;6x#lig!px1z0%<$6eWbVc+4$kcRP~Q$j#bM)V0cbqQSH*-{;>9HB1Lc zH=!PV0%e^PTDa*!sJ3rn#vs>++e|7CjPWhxnJ;poaLy?^Cb2A3rf9V5Z)dw@r69V! z)qJ1b#`tWK)r#JL_N@2>aN~2W4Mb*3I9z~rD59c#~ zR3fSxjSrN=`zhY0rvnypC!rWy{ za0V~P*W%o#iv3rV%LKt?Dtz)5<)sbVMFq8sF&7%(@eYje|4`bQTZWalIMa?SAvSb+ zsZ@-iUWD@lN&BP!ijU>=cX)cK-_-1#V)6|7sY1ONo5C4r7xO7F>%Zb**VDw~??wTF zzuyS=fV>Eqtg-Z{UgqRVVI5Qj?f$RmUN)HBYGUr$6jq>23ccIQ!al(Kr=q*%(0&@- zQSUXv{eaij)UU$aW?28PrTB)*eS;*Yew^SN9{UZ9{)WVVId1!nhVb=AME;-SU_ALh zM9z^w4bGEW{(tcsZ8^Nqavol`AR>ML_f0UT7r~p-L!*+<^L!tH1o+wO%chGK&KQHZ zKhqsMD?A5&-;X8@?3OLq8mLKZ_ClwG-f^7XsftNDHmj?#c-~bv#MTInA-G$co=&Tc zow`?|t=eu}xx=7x5V|Xs<7*qM$>6;-4Hg+O#1~3j%rnjq|TM+B@z$!g9tc~ zJ4Ze*A0k8BKmiOvflf+iPum`xWfo!kBiOW` z8Wv4QJzkxWlpbJcsjZ26I)tmi-nn~x2>0DY-w#_|5yK`8$<>Sl``5#x%HJ3mfxS06 zl(@9}BP%#WhWd4?z)wIDa^9>wovRN)xu&TF=CnWR)^>U4kJtNmu7~3ck7*Lg`G}~^ zxQ5JWMrq`y32h5DgQ7JM1`B_E-Hb|gnLgJ96f4M{yI{etR-^H=h7ph9agX{@5kE|u z_6a>BP-W1W%Pk*{PE=8F9Y5 zcJV4>4Jz{BaYd@t7m{-Co$bhr9%NqJ3Z|oM@3?gQ2=@2$>tm_+)kX%wL1A1gNVI|;RJvbyB9T~+~vg86@=~rPf|_uZH0Xv-o*WGI2ss=;9D~A zGsZz5zP7G=?QiXmMhmU7dhqy){(jk^dEgIg-FO!pc{DSq{(bTEt78A+EI7?xZel=q z41XI~oRRqyMJ*^iKc?cjDvLIw&xADYHbk_-PL<&AH33k!yuT3j>P5N z{15}Fan&R~brDF8B91bdh~dYoI$r(c$hrUZ`_e z-(}qd`}G-))|SO%Zt7~6%5cA&etLu!?!zz`w$K`!kd*VJKAh02GWxbxd4M&VqApOM zo;H>XUy+g8peZS)#S0xolSi)JagUhRxgW8053d#44`}MRh%^3x=v%b<|JObm2a&Bkj-I zM7O3}E4ks7{)V@=A!WsOV)zkOvK-5G( zb%2{cDMUFNbbY5W%ni2_^U?D`d!eD3f|JUA?Mm3dX z>l=aV&=N%jL`4aT3?hS|GLr--r4$v*2^0kc1qm|A5M@dz z1uPK|kui`&kT8UV#1Ima+~l1QtnOa#yK||Q^A{_ZGw!?3Is5zexqD*`jj~H{ zf6IGlAS2QDduCko-Nd0jX?JH;~^-kz*ff$YZ$tbeBn-3hwQ~tt3c4s!~;-s{3+#a z?F`h1fA5*1e`T+uBMxk0iUTrK+TnVSLSOzYh;*AsUIp@ujt^jd{p$sA7pLVoq}Jjq z-9csDE6tfsq~F4KLAOp+KX%xNKvw>TY`-sV#zLc>)sOeQwrM|AkW!V~MJoT6 z(lC~;N?!YkMssT6aw5{BWqEj7r-_Q1%SwJBPTod#8Ab01kVR}oVv+u!EoU5KRW6%V zk%eBWpn^fgg$t&;c%cLn=u|9|DX~!b}uvkal{T@53dM9j{ zVvuAqb9jEHr&edJLd3~~?_YN%DY*U!-BT2>-O0p?pq>pNJ?iD?nbZ|wFpt~vQdc@O zcPL@5aV@PC3z@&UvU?s#eApq4_g|oRmNHE$Rkn$pvU_psj zt+09J^x#@V84kZEf(z0wV*YY#)p}!PLL4pkI;p~(Bv%N&(*i=&0^PnCz)<37AWb26 zgq4efcF{pc3qVNaYJ%G1D5rvH&&fYI2TUWsLV5;)=x?4S?s$ZQ-qeDEpxX%WQy33! z6-nEiMpkx3x=7G?@ObIuJ3H>dsPk*e*4JRbn3?02y(INQU*#WciHt zxng};L~RWDR@EY*l;RvDJhV`dD9p99iO9D@R~w9W&c@wWHCoLU+)*jUM-F40x; zdcN?!gA_pXR{;jNmK9uXuDTv0)9EB!R~V<%+o^)Uxk*-n_urI>lXeO{;}XOtTSIxS zjEI6`EiCC5qKuk!B)ilELnk~2`ezA(oJ8(S4NYU8-_t7==6Td1W262s2h547ZX_Eg zjUbeC)9d&OJZmP89;OQAV=N_Lc2)$imU=>*qr1b!C1nabIWHrOK@2aX`#Jko#6YQ7 zun3#VN&3N4$=kr9z4G>!3Q7k{{V$+t;Q%J_dmx|?u$f#WLayeZsckT1s0Zo*VK0ma z%d;I&ul2pLC^qI>Qr-!xSD716p_G7~<(E_(d!)0$qi_w7?JOX|-4!v79wTj}id?9O;TR)DXuX<7|0*T!!H|HDz#XESso}i*; zQJS++Ws)KH3juNDFbu1~vyjS8XHa}TZDQdvH zU_XtRKQf9=;=>PcG+|^D!mS_la_nCv>Rw65JpseW%P#DD334##ZBog~68J6z3A$zc zT2#FR??M6JSibDD0%;NOv&xf+Z-_3cwsT;sk)9nt>R7+@E{{drWOF2!<&x0vyck5P zapFM&>L4cM3gEd){LUU6*iDB*}5Oxe=jA7=KMQm&nR`5g1_*Qvs z-*#cOus}(XUwXw6$_B$N_u0)VLadMqZUZTt2cj0?vs(t07jc>EPjy5#Rs}T^KYVJ( z4ijJ0&Ze`{aO`QNcC}80aq?-t*PVslt>f+V@kLQJU!;4%g&Czytc9w$d%(Yol*$}P zg3A@7(ey{S_WEKp9Rzi7jaZ<==l1O-?`!PHD>a}$7agneWNa_jqlGgnV+Ci+cQSS= zTCwHzM6aoxDLJ|zE4M&fUj0lIMGj+;%vfmyJSnHrG1w^?T%*`G0zFd^BfFF=?8ruN zrDjM6>zJ3_0vr!8!I0oz8oUsw5Qle>e{ibef(&I%Ef2Zqx<$_Uh<$LB+-%A01iLq* zSLgfISf`n#2%S-#^WDtaP`Dd4C{7P==ZIqC?8qSBExmonAUw3vUbt>22s~xGA2xW; zj9?csuo1^tQwBq>iU*0Y+4bZ32@*3cX*O6S!?JXwJ~Haig8OZA_*{$&10zoA?bo&g zdu@k!fn!Vg;(&k$2qU&*PmAjP9uc^72U~kh6PQ;XY4-d zF%?jC?8dyYhqzW`sT^c>I{CZow(poMOli1`P5|obfa`bG)?4RSo}>atV!aX}{?758 znpa;ROX-EZDj&UctQzd+<;kOtiVnmyBfnNY-!!u77$G{-q}oKGsufx3H98ME{LQp6v*iZy=DC zrHYQy!;TdD8zOJlS@Nd=@j{cDo)XCLZy3Z_^X}<-568gNZTIe)_ist18&?g=61#}-b6UA zc-Jj7xYkched+#TLvRK31#E}+bxgGUlJyg*DRbH%zzXnT_@W+nr*-;`o|xYZ6JIf(lgW2xJ`XG*>>mz<)STgIN{V>&cBJ5f z7`$2V;-4hwl#y6cYk6mW_?F5e>|U|-%nS}U9(TR9{A$6w;~UAgqv%u584e5{K6px( zKg$@c_ncJjG{O-18`lA3IJOKYY5((~UcP)j!Psh1> z&nF;c?tFa%?izc0D!TAEe;(ux8Z#=(z_oL(qxR1SaH$ZiSrdI|*UvEh^|K@*9rFWp z6JR}~dd9ShN3-+|>ee)WX(~Z8t)Lsg_&*(LHj3tUZH2C+;ard2B~(};H>=EpXOYu@ zDRi6De5q`_P3044gIaZ&NZuXl>v!lFsfN8gcpmvCC~z}U{*E4wtgEu&8$wgmr@45< zG&jyw{pA342?HvH^csuH&_OUh z-E3JmnwPCDmraY_$+v>4)i`mao8TkmQ?=faJCmdG!CW6%npjb~ zakd(v??QJBT3(2fXSYUvncaNff z`BIS68rR!k337J{v!eF)vvRU|>m~c4j2-Z-S7Q~F)@&Mobe1YVTN3HVcBpP?K?P!0 zm0fW3EwQ{jZ8R14hTqF`x9shVu=tEp)^dAykY8kO>2rFP%9&0^m_*((ci7 zk6P%Znv06%8zHd2zZppX@IyplSQZl(868f`Xb1nBwT+10p zUsd_*+`KFOeBo+~`ya>K;{iK*s<*c0`^OS`od+xa{I3 zNV*Re982<8?GjdUH)4!uGo$t8Ip`5!*(ko4R%ZRqL;j1k*!PL-d&YN9Z1c){P3*}{{!#7qfE(+7HOF=#gR@90`0aPs0k_z!aJkpkwK}z_{FI^O(0n%F z4>#p8&8(K~-JbvUq1Lb4V8ObPdd!S-ndDo=j8j^kfUoRmGl{xawt8v6aFSF0v0GN# z)tuvqVOZlsEz|p0^<8te-N54+rGi4v>rQnN;pB9Bsef0B86tKucuH=+e&2DNn zFe0N7y8ScrAAWpXymqb^7~iK?>4qo2Xrs=8Qr}^J|JA#%WTCo)AI`;BP4(vLX{&!| z>)_VwKf9PTPir$mpIiPy0-dM`HJbi(5K}{y#JOM24lU%a5B^@?453Sz7SNCRPr}_G zfLT35#DS=&783*FACpY&6T1a~sht)R3K-)cS(I?xYQdvWG)e%nutw)lv9mhg;1ClAXBw|XcPa!rbU5U^O>}7++a8PrZ`rWZ%em1N1>|!0rOxa}yz*C502dlXB(sOG z3qLnNpFI{WdvvM;cjK=E^FCqM@n6oJe-C^`oijr~O%z@3b9se3+yyewZvfv;JL|Chjr!-ty}UZc zObi4W;@3!8$LYzYeb3QTG|%VLjQW^kVl@5ZJ<;XrW7cu3#6W+!4COim+CRBIz1rXQ zB{dR(3@>mZP8uaN^_XOIF)E2rqUkxyg2Xy6p^iP@mjFYy54WToe&=2!y|!9Xl81vb z8XnlK<7_ypsJR`^0MNwVvti}@bBhd!Zf`4iTnssDJLwah82KkqgmNlicV->ZkO|eNX z@QqAOlLyzOFW_0-yu=RGY32l)MMQXpfyPKmszujlS(L3i=KGTkEAP54@?}J|F!-Wg z-#ZygLpp0d4)2K*%ax+Yie1k)T7XMxwAxpDb8v4QgHrXua-DX!=|aVPr&v652;XYA z`;O~$>C<>4xCk%z@C+N}`8IMf%EHh$VxcA6EB16gkjy&8J1Y zZ}X~BqQ3W^Wh#+&ta-@v&XvBlq7>tdzE>cW^MN|eWIwr&%H7I3hCju}Tby>I#Ge(~ zTHoO$B9+|6G4!}bB0j$m*^K6@AD=8f;nrz+#_a?4)M~T+v(8x>fy`|9QM7x4?y6t6 zq$Gk6WD8_b&owRE!taskh*gJXR>s@l{0DxpAQ!i)RHG)=0~n8H9%c<<)igQr$KbxM zML@+SSSlY*d0}+CYb90hU60UhOr8sOV?S%UEQn1wAX&NO=8&BPLbmeCtrEBb!~v8FG^O$+(D$B&=vQD4#{rZ8b>bO;$gnrnHYC_ zZ=Dyy<`;{^b>ehd!9k14DVyxvhZ_kdwCn&AMF?V#4E{~zpvCyHyZv~Hs1`SDk&Szy zY}q5;KgYcI5$@Nc8eNUYr{ly2BZ%ScrOGc=RIGUp?&+%ryLgA-3u{GeZo4e%&E%9_ z7v&HPf3oOibRFe1RRg8561@`-R$p^ZJ7f*dD%MD9&bsvaI$E4WCS(%r;npO$^aY%Q z^I{GghWp;XcU<1D{kQkIu6jf1Y3fbK7V{5D^|**LhglA*OURKrp#?6DNwB%%U5!W< zl`>PFG8Y8l(@g*HP51N7-3R6rp}`uuZq{7YE5Aw957Y?o$q{R_8C$Oeu`eU^>W15{ zgb&yq>nGlQ!H&8CWlpHmC!gXDn3$glz-bCYoM!p)T(lOQ@kcm-#z8(n+YgI!Toyt+d#fa1oWG9qu3i2u5Kdn%3$*Mdb_wV^b(t!GG3?Tl2*R%hMx=~fXUAFB}3E!~k&#H!&r zEInsAr|LK8?*As+3c6n>0PT2tMC5X4c+kzv?=qGGGM zq6^pUf#uu+p0P1@2i2I)0*DYmHCfQhxx9V?foAceV;Yf7QZ=ff)O)Y#J)>TIde#itimwN0% zAjKB2Ib!kY4F9!40JjcN)Bwdm?gnQtm3St6__)c`n6K|BJY-R%JiskXFuzE>b&`f_ zk5xR(2j7`sZN5%jutA0tbo-16YHLTqW~vm#zl>Os{`0wawbvort6Ngqr|L8hUDg3a z@E-7PjerN>u@8X8cN=Ha$v?O55yj9N zUDSsKR+U)CnA;X}#GLr&=w8)51xe!vF6zGkLJDZ?$6?4!oLEsyGQ}f-lifXG}ZDk%YSMYP(T2~i->0>o#V1TSrhAU zmNE#wQLKIyoUE(JIF}Q=nSTj~->d0QtqwNfb)p3McrSKr6kS!Plr|13hn|G3nwH#| z|2GXgfUoYS#^!MGxCmWmcvkr^R{6kmSTU+ymFwrO7a<2WhEfkGu{MB&0ErDUIjtMw z*YjmjYYhpm$mNDD^9y%k6X@tFzfd_Wsk8!u0XpArjS z4HuvG2;!y8{i@=3PcG^%Dq~Z;^#` zEX(Q!GRhXMXiu1!m%z?+lQt$Ey3!_}f*m!4FlMr{M#|;DWLM39c5L)vf=J_3qYAhpnf%wsU?!1RUO(rINyKdmsh-VpI4b7HS zCs?##yH=idJD91pqUzhJt!8|`*)iWNyKlDczhwR24d+Jq*v;@(=VSeP9`iRVqN*)4 z8E{LBQOfyX!|ggFMaPBnf7$8r6&RZTKZADvBfhmdS@?9tlE}CJvh*e#Tuk85O5G0f zzJg%46NItGDYnfTM5mpg|20mrrRfO-4NIqSiroOgZ#5jB#wqp&M5~=(N++FSr$Bhw z3HDgy6q~mJ1jC(R^);*=$d0S{d!O95?!O8Dt3UStjOyJqNN0dM{r-%V89$dyiSVJN zIgQ}5(%?ee{;}6tLUIxe{ z+P$bd;gEk>V?5RI{=>o3I`)NlgGng%N5g0l(W=89m|xZZ9gofNLE-ApjKoHsXARrN zS61*&gJxHPW&z*<4vm4z(8MrVV9SxA+P%P67LpSm5Oz*kQufMn7P5MB0Fxydefx@1 zNRi0cRflOMa`V>NE%a4uk;*X-}XQ zAKPx)d+g)IY=0ZTGhzXhDm1#F+6Le7Cew7c!r%jj2X^mjKC0bX6qGVZ8-WD+KsvL5 zOYOW#hGd!q{Z`pNo2>3%WG&%V*%pHVO+vWCH?IwTv3x21pkR*_l6q>+2c&d`8OGJd z-}54?(uT9_QFGPUNU&kZI&}NN)!`C+@*&NheA+mpTTHO+wU&u%Mgx%r%~E8_g!-w$ zA8<9bHQJL%E5aYEg~dwFRF2t>^;{5PybK)uU{lgz5p~Td5bK+X_-jj9O0g zXWCH8#(VSJfM04t^G|C?hul&-i81wV==lH=aPXR+iwwIPdC$u(^6%PGJAz%5$IIK| zC1Xjl0;WRk2Gg+@QRKtL9ge|j^hDaH>H3!1Zww&RyT#d*(8nf8u(UP5GV7oZN^bn+ zWJPl0P4m}8m%l4|YLUgTZJ~qwgUC3ex8b}%S~>b!v!F77n|EFPhSJ?-c(7_q+waX8 z-qSd&(XMR(Q_V3*mxFwopT!<3X;pHmQ6$l?EpF@&?mXuqdpTGZ$=@gb0ABo`cnL&w zOF0GeQhx5xX`J+qCDsizn`At0O6kHbt55bOFG|DMeq}$wMUro4<~NJs>m{OZ_R@cZ zwRpr~?sdz);knoSe91KRi7i>Ir1vD7o%rpa8t;1l?|oW-0v{FpYsSLs$O(}b1VPKs z?>v0|)QR)%rfz}m;19IH(8$EVaI=BoCI_RROgC;ZHQKz^(9qP-@Q*-&^FLJZ^*?pm ZBlLf-@B+W9A5?&L@7%W|cbjYUe*q{Iu}c5| literal 0 HcmV?d00001 diff --git a/examples/README.md b/examples/README.md index c9ca2f0..e68be30 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,15 +1,33 @@ # Examples -This directory contains various training scripts. +This document outlines the variety of training scripts and external resources. -Torch blog posts - * The torch.ch blog contains detailed posts about the *rnn* package. - 1. [recurrent-visual-attention.lua](recurrent-visual-attention.lua): training script used in [Recurrent Model for Visual Attention](http://torch.ch/blog/2015/09/21/rmva.html). Implements the REINFORCE learning rule to learn an attention mechanism for classifying MNIST digits, sometimes translated. - 2. [noise-contrastive-esimate.lua](noise-contrastive-estimate.lua): one of two training scripts used in [Language modeling a billion words](http://torch.ch/blog/2016/07/25/nce.html). Single-GPU script for training recurrent language models on the Google billion words dataset. - 3. [multigpu-nce-rnnlm.lua](multigpu-nce-rnnlm.lua) : 4-GPU version of `noise-contrastive-estimate.lua` for training larger multi-GPU models. Two of two training scripts used in the [Language modeling a billion words](http://torch.ch/blog/2016/07/25/nce.html). +## Advanced training scripts -Simple training scripts. - * Showcases the fundamental principles of the package. In chronological order of introduction date. +This section lists advanced training scripts that train RNNs on real-world datasets. + 1. [recurrent-language-model.lua](recurrent-language-model.lua): trains a stack of LSTM, GRU, MuFuRu, or Simple RNN on the Penn Tree Bank dataset without or without dropout. + 2. [recurrent-visual-attention.lua](recurrent-visual-attention.lua): training script used in [Recurrent Model for Visual Attention](http://torch.ch/blog/2015/09/21/rmva.html). Implements the REINFORCE learning rule to learn an attention mechanism for classifying MNIST digits, sometimes translated. Showcases `nn.RecurrentAttention`, `nn.SpatialGlimpse` and `nn.Reinforce`. + 3. [noise-contrastive-esimate.lua](noise-contrastive-estimate.lua): one of two training scripts used in [Language modeling a billion words](http://torch.ch/blog/2016/07/25/nce.html). Single-GPU script for training recurrent language models on the Google billion words dataset. This example showcases version 2 zero-masking. Version 2 is more efficient than version 1 as the `zeroMask` is interpolated only once. + 4. [multigpu-nce-rnnlm.lua](multigpu-nce-rnnlm.lua) : 4-GPU version of `noise-contrastive-estimate.lua` for training larger multi-GPU models. Two of two training scripts used in the [Language modeling a billion words](http://torch.ch/blog/2016/07/25/nce.html). This script is for training multi-layer [SeqLSTM](/README.md#rnn.SeqLSTM) language models on the [Google Billion Words dataset](https://github.com/Element-Research/dataload#dl.loadGBW). The example uses [MaskZero](/README.md#rnn.MaskZero) to train independent variable length sequences using the [NCEModule](/README.md#nn.NCEModule) and [NCECriterion](/README.md#nn.NCECriterion). This script is our fastest yet boasting speeds of 20,000 words/second (on NVIDIA Titan X) with a 2-layer LSTM having 250 hidden units, a batchsize of 128 and sequence length of a 100. Note that you will need to have [Torch installed with Lua instead of LuaJIT](http://torch.ch/docs/getting-started.html#_); + 5. [twitter-sentiment-rnn.lua](twitter-sentiment-rnn.lua) : trains stack of RNNs on a twitter sentiment analysis. The problem is a text classification problem that uses a sequence-to-one architecture. In this architecture, only the last RNN's last time-step is used for classification. + +## Simple training scripts + +This section lists simple training scripts that train RNNs on dummy datasets. +These scripts showcases the fundamental principles of the package. 1. [simple-recurrent-network.lua](simple-recurrent-network.lua): uses the `nn.LookupRNN` module to instantiate a Simple RNN. Illustrates the first AbstractRecurrent instance in action. It has since been surpassed by the more flexible `nn.Recursor` and `nn.Recurrence`. The `nn.Recursor` class decorates any module to make it conform to the nn.AbstractRecurrent interface. The `nn.Recurrence` implements the recursive `h[t] <- forward(h[t-1], x[t])`. Together, `nn.Recursor` and `nn.Recurrence` can be used to implement a wide range of experimental recurrent architectures. 2. [simple-sequencer-network.lua](simple-sequencer-network.lua): uses the `nn.Sequencer` module to accept a batch of sequences as `input` of size `seqlen x batchsize x ...`. Both tables and tensors are accepted as input and produce the same type of output (table->table, tensor->tensor). The `Sequencer` class abstract away the implementation of back-propagation through time. It also provides a `remember(['neither','both'])` method for triggering what the `Sequencer` remembers between iterations (forward,backward,update). 3. [simple-recurrence-network.lua](simple-recurrence-network.lua): uses the `nn.Recurrence` module to define the h[t] <- sigmoid(h[t-1], x[t]) Simple RNN. Decorates it using `nn.Sequencer` so that an entire batch of sequences (`input`) can forward and backward propagated per update. + 4. [simple-bisequencer-network.lua](simple-bisequencer-network.lua): uses a `nn.BiSequencerLM` and two `nn.LookupRNN` to implement a simple bi-directional language model. + 5. [simple-bisequencer-network-variable.lua](simple-bisequencer-network-variable.lua): uses `nn.RecLSTM`, `nn.LookupTableMaskZero`, `nn.ZipTable`, `nn.MaskZero` and `nn.MaskZeroCriterion` to implement a simple bi-directional LSTM language model. This example uses version 1 zero-masking where the `zeroMask` is automatically interpolated from the `input`. + 6. [sequence-to-one.lua](sequence-to-one.lua): a simple sequence-to-one example that uses `Recurrence` to build an RNN and `SelectTable(-1)` to select the last time-step for discriminating the sequence. + 7. [encoder-decoder-coupling.lua](encoder-decoder-coupling.lua): uses two stacks of `nn.SeqLSTM` to implement an encoder and decoder. The final hidden state of the encoder initializes the hidden state of the decoder. Example of sequence-to-sequence learning. + 8. [nested-recurrence-lstm.lua](nested-recurrence-lstm.lua): demonstrates how RNNs can be nested to form complex RNNs. + 9. [recurrent-time-series.lua](recurrent-time-series.lua) demonstrates how train a simple RNN to do multi-variate time-series predication. + + ## External resources + + * [rnn-benchmarks](https://github.com/glample/rnn-benchmarks) : benchmarks comparing Torch (using this library), Theano and TensorFlow. + * [dataload](https://github.com/Element-Research/dataload) : a collection of torch dataset loaders; + * A brief (1 hours) overview of Torch7, which includes some details about the __rnn__ packages (at the end), is available via this [NVIDIA GTC Webinar video](http://on-demand.gputechconf.com/gtc/2015/webinar/torch7-applied-deep-learning-for-vision-natural-language.mp4). In any case, this presentation gives a nice overview of Logistic Regression, Multi-Layer Perceptrons, Convolutional Neural Networks and Recurrent Neural Networks using Torch7; + * [Sagar Waghmare](https://github.com/sagarwaghmare69) wrote a nice [tutorial](tutorials/ladder.md) on how to use rnn with nngraph to reproduce the [Lateral Connections in Denoising Autoencoders Support Supervised Learning](http://arxiv.org/pdf/1504.08215.pdf). diff --git a/examples/simple-bisequencer-network.lua b/examples/simple-bisequencer-network.lua index 2d87004..cd14ead 100644 --- a/examples/simple-bisequencer-network.lua +++ b/examples/simple-bisequencer-network.lua @@ -10,11 +10,7 @@ lr = 0.1 -- forward rnn -- build simple recurrent neural network -local fwd = nn.Recurrent( - hiddenSize, nn.LookupTable(nIndex, hiddenSize), - nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), - seqlen -) +local fwd = nn.LookupRNN(nIndex, hiddenSize) -- backward rnn (will be applied in reverse order of input sequence) local bwd = fwd:clone() diff --git a/examples/twitter_sentiment_rnn.lua b/examples/twitter-sentiment-rnn.lua similarity index 100% rename from examples/twitter_sentiment_rnn.lua rename to examples/twitter-sentiment-rnn.lua