Robust likelihood

629ysf · May 27, 2020 · 2df6c64 · 2df6c64
1 parent 9ef36fd
commit 2df6c64
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 14 deletions.
diff --git a/ddm/__init__.py b/ddm/__init__.py
@@ -4,15 +4,15 @@
 # This file is part of PyDDM, and is available under the MIT license.
 # Please see LICENSE.txt in the root directory for more information.
 
-__all__ = ["Sample", "Solution", "Model", "Fittable", "Fitted"]
+__all__ = ["Sample", "Solution", "Model", "Fittable", "Fitted", "FitResult"]
 
 # Check that Python3 is running
 import sys
 if sys.version_info.major != 3:
     raise ImportError("PyDDM only supports Python 3")
 
 from .models import *
-from .model import Model, Fittable, Fitted
+from .model import Model, Fittable, Fitted, FitResult
 from .sample import Sample
 from .solution import Solution
 from .functions import *

diff --git a/ddm/models/loss.py b/ddm/models/loss.py
@@ -4,7 +4,7 @@
 # This file is part of PyDDM, and is available under the MIT license.
 # Please see LICENSE.txt in the root directory for more information.
 
-__all__ = ['LossFunction', 'LossSquaredError', 'LossLikelihood', 'LossBIC']
+__all__ = ['LossFunction', 'LossSquaredError', 'LossLikelihood', 'LossBIC', 'LossRobustLikelihood', 'LossRobustBIC']
 
 import numpy as np
 
@@ -131,6 +131,7 @@ def loss(self, model):
 class LossLikelihood(LossFunction):
     """Likelihood loss function"""
     name = "Negative log likelihood"
+    _robustness_param = 0
     @staticmethod
     def _test(v):
         assert v.dt in Positive0()
@@ -166,7 +167,6 @@ def setup(self, dt, T_dur, **kwargs):
     @requires("model.dt == self.dt and model.T_dur == self.T_dur")
     def loss(self, model):
         assert model.dt == self.dt and model.T_dur == self.T_dur
-        MIN_LIKELIHOOD = 1e-8 # Avoid log(0)
         sols = self.cache_by_conditions(model)
         loglikelihood = 0
         for k in sols.keys():
@@ -178,15 +178,22 @@ def loss(self, model):
             # 0.  We will issue a warning now, but throwing an
             # exception may be the better way to handle this to make
             # sure it doesn't go unnoticed.
-            if np.any(sols[k].pdf_corr()<0) or np.any(sols[k].pdf_err()<0):
-                print("Warning: invalid likelihood function.")
-                return np.inf
-            loglikelihood += np.sum(np.log(sols[k].pdf_corr()[self.hist_indexes[k][0]]+MIN_LIKELIHOOD))
-            loglikelihood += np.sum(np.log(sols[k].pdf_err()[self.hist_indexes[k][1]]+MIN_LIKELIHOOD))
+            with np.errstate(all='raise'):
+                try:
+                    loglikelihood += np.sum(np.log(sols[k].pdf_corr()[self.hist_indexes[k][0]]) + self._robustness_param)
+                    loglikelihood += np.sum(np.log(sols[k].pdf_err()[self.hist_indexes[k][1]]) + self._robustness_param)
+                except FloatingPointError:
+                    minlike = min(np.min(sols[k].pdf_corr()), np.min(sols[k].pdf_corr()))
+                    if minlike == 0:
+                        print("Warning: infinite likelihood encountered. Please either use a Robust likelihood method (e.g. LossRobustLikelihood or LossRobustBIC) or even better use a mixture model (via an Overlay) which covers the full range of simulated times to avoid infinite negative log likelihood.  See the FAQs in the documentation for more information.")
+                    elif minlike < 0:
+                        print("Warning: infinite likelihood encountered. Simulated histogram is less than zero in likelihood calculation.  Try decreasing dt.")
+                    return np.inf
             if sols[k].prob_undecided() > 0:
                 loglikelihood += np.log(sols[k].prob_undecided())*self.hist_indexes[k][2]
         return -loglikelihood
 
+
 @paranoidclass
 class LossBIC(LossLikelihood):
     """BIC loss function, functionally equivalent to LossLikelihood"""
@@ -209,3 +216,21 @@ def setup(self, nparams, samplesize, **kwargs):
     def loss(self, model):
         loglikelihood = -LossLikelihood.loss(self, model)
         return np.log(self.samplesize)*self.nparams - 2*loglikelihood
+
+class LossRobustLikelihood(LossLikelihood):
+    """Likelihood loss function which will not fail for infinite likelihoods.
+
+    Usually you will want to use LossLikelihood instead.  See the FAQs
+    in the documentation for more information on how this differs from
+    LossLikelihood.
+    """
+    _robustness_param = 1e-20
+
+class LossRobustBIC(LossBIC):
+    """BIC loss function which will not fail for infinite likelihoods.
+
+    Usually you will want to use LossBIC instead.  See the FAQs in the
+    documentation for more information on how this differs from
+    LossBIC.
+    """
+    _robustness_param = 1e-20
diff --git a/ddm/models/overlay.py b/ddm/models/overlay.py
@@ -4,7 +4,7 @@
 # This file is part of PyDDM, and is available under the MIT license.
 # Please see LICENSE.txt in the root directory for more information.
 
-__all__ = ["Overlay", "OverlayNone", "OverlayChain", "OverlayUniformMixture", "OverlayPoissonMixture", "OverlayNonDecision", "OverlayNonDecisionGamma", "OverlayNonDecisionUniform", "OverlaySimplePause", "OverlayBlurredPause"]
+__all__ = ["Overlay", "OverlayNone", "OverlayChain", "OverlayUniformMixture", "OverlayPoissonMixture", "OverlayExponentialMixture", "OverlayNonDecision", "OverlayNonDecisionGamma", "OverlayNonDecisionUniform", "OverlaySimplePause", "OverlayBlurredPause"]
 
 import numpy as np
 from scipy.special import gamma as sp_gamma
@@ -218,7 +218,7 @@ def apply(self, solution):
         return Solution(corr, err, m, cond, undec, evolution)
 
 @paranoidclass
-class OverlayPoissonMixture(Overlay):
+class OverlayExponentialMixture(Overlay):
     """An exponential mixture distribution.
 
     The output distribution should be pmixturecoef*100 percent exponential
@@ -273,6 +273,10 @@ def apply(self, solution):
         #print(err)
         return Solution(corr, err, m, cond, undec, evolution)
 
+# Backward compatibility
+class OverlayPoissonMixture(OverlayExponentialMixture):
+    pass
+
 @paranoidclass
 class OverlayNonDecision(Overlay):
     """Add a non-decision time

diff --git a/doc/downloads/simple.py b/doc/downloads/simple.py
@@ -24,7 +24,7 @@
 # Fit a model identical to the one described above on the newly
 # generated data so show that parameters can be recovered.
 from ddm import Fittable
-from ddm.models import LossBIC
+from ddm.models import LossRobustBIC
 from ddm.functions import fit_adjust_model
 model_fit = Model(name='Simple model (fitted)',
                   drift=DriftConstant(drift=Fittable(minval=0, maxval=4)),
@@ -34,8 +34,8 @@
                   dx=.001, dt=.01, T_dur=2)
 
 fit_adjust_model(samp, model_fit,
-                 method="differential_evolution",
-                 lossfunction=LossBIC)
+                 fitting_method="differential_evolution",
+                 lossfunction=LossRobustBIC)
 
 display_model(model_fit)
 

diff --git a/doc/faqs.rst b/doc/faqs.rst
@@ -136,3 +136,40 @@ hierarchical models, but you are welcome to implement the feature
 yourself and submit a pull request on Github.  If you plan to
 implement this feature, please let us know so we can help you get
 familiar with the code.
+
+What is the difference between LossLikelihood and LossRobustLikelihood or LossBIC and LossRobustBIC?
+----------------------------------------------------------------------------------------------------
+
+Maximum likelihood in general is not good at handling probabilities of
+zero.  When performing fitting using maximum likelihood (or
+equivalently, BIC), the fit will fail if there are any times at which
+the likelihood is zero.  If there is even one trial in the
+experimental data which falls into a region where the simulated
+probability distribution is zero, then the likelihood of the model
+under that data is zero, and hence negative log likelihood is
+infinity.  (See Ratcliff and Tuerlinckx (2002) for a more complete
+discussion.)  In practice, there can be several locations where the
+likelihood is theoretically zero.  For example, the non-decision time
+by definition should have no responses.  However, data are noisy, and
+some responses may be spurious.  This means that when fitting with
+likelihood, the non-decision time cannot be any longer than the
+shortest response in the data.  Clearly this is not acceptable.
+
+PyDDM has two ways of circumventing this problem.  The most robust
+way is to fit the data with a mixture model.  Here, the DDM process is
+mixed with another distribution (called a "lapse", "contaminant", or
+"outlier" distribution) which represent responses which came from a
+non-DDM process.  Traditionally :class:`a uniform distribution
+<.OverlayUniformMixture>` has been used, but PyDDM also offers the
+option of using :class:`an exponential distribution
+<.OverlayPoissonMixture>`, which has the benefit of providing a flat
+lapse rate hazard function.  If you would also like to have a
+non-decision time, you may need to :class:`chain together multiple
+overlays <.OverlayChain>`.
+
+The easier option is to use the LossRobustLikelihood function.  This
+imposes a minimum value for the likelihood.  In theory, it is similar
+to imposing a uniform distribution, but with an unspecified mixture
+probability.  It will give nearly identical results as LossLikelihood
+if there are no invalid results, but due to the minimum it imposes,
+it is more of an approximation than the true likelihood.