Fixed p/pd/pdd error estimates for prepfold folding correlations

paulray · Jun 10, 2021 · e08b026 · e08b026
1 parent c6bff50
commit e08b026
Show file tree

Hide file tree

Showing 10 changed files with 159 additions and 56 deletions.
diff --git a/FAQ.md b/FAQ.md
@@ -673,10 +673,11 @@ significance, as `prepfold` does.
 
 There is a semi-analytic correction for this effect that has been tested using
 large numbers of simulations with fake data. You can see it in the `DOF_corr`
-function/method in `src/prepfold_utils.c` or `python/presto/prepfold.py`,
-respectfully. That correction can also be used to correct for the noise level in
-the pulse profile, for more accurate flux densities estimates via the radiometer
-equation, for example (`sum_profiles.py` uses the correction).
+function/method in `src/fold.c` or `python/presto/prepfold.py`, respectfully.
+That correction can also be used to correct for the noise level in the pulse
+profile, for more accurate flux densities estimates via the radiometer equation,
+for example (`sum_profiles.py` uses the correction; newrms = oldrms /
+sqrt(DOF_corr)).
 
 The effect and the correction are described in detail in Appendix E of the
 recent paper [Bachetti et al.,

diff --git a/include/prepfold.h b/include/prepfold.h
@@ -167,24 +167,3 @@ float estimate_offpulse_redchi2(double *inprofs, foldstats *stats,
 // inverse of the average of the off-pulse reduced-chi^2 (i.e. the
 // correction factor).  dofeff is the effective number of DOF as
 // returned by DOF_corr().
-
-double DOF_corr(double dt_per_bin);
-// Return a multiplicative correction for the effective number of
-// degrees of freedom in the chi^2 measurement resulting from a pulse
-// profile folded by PRESTO's fold() function (i.e. prepfold).  This
-// is required because there are correlations between the bins caused
-// by the way that prepfold folds data (i.e. treating a sample as
-// finite duration and smearing it over potenitally several bins in
-// the profile as opposed to instantaneous and going into just one
-// profile bin).  The correction is semi-analytic (thanks to Paul
-// Demorest and Walter Brisken) but the values for 'power' and
-// 'factor' have been determined from Monte Carlos.  The correction is
-// good to a fractional error of less than a few percent as long as
-// dt_per_bin is > 0.5 or so (which it usually is for pulsar
-// candidates).  There is a very minimal number-of-bins dependence,
-// which is apparent when dt_per_bin < 0.7 or so.  dt_per_bin is the
-// width of a profile bin in samples (a float), and so for prepfold is
-// pulse period / nbins / sample time.  Note that the sqrt of this
-// factor can be used to 'inflate' the RMS of the profile as well, for
-// radiometer eqn flux density estimates, for instance.
-
diff --git a/include/presto.h b/include/presto.h
@@ -1171,6 +1171,24 @@ double max_rzw_file(FILE * fftfile, double rin, double zin, double win, \
 
 /* In fold.c */
 
+double DOF_corr(double dt_per_bin);
+// Return a multiplicative correction for the effective number of degrees of
+// freedom in the chi^2 measurement resulting from a pulse profile folded by
+// PRESTO's fold() function (i.e. prepfold).  This is required because there are
+// correlations between the bins caused by the way that prepfold folds data
+// (i.e. treating a sample as finite duration and smearing it over potentially
+// several bins in the profile as opposed to instantaneous and going into just
+// one profile bin).  The correction is semi-analytic (thanks to Paul Demorest
+// and Walter Brisken) but the values for 'power' and 'factor' have been
+// determined from Monte Carlos.  The correction is good to a fractional error
+// of less than a few percent as long as dt_per_bin is > 0.5 or so (which it
+// usually is for pulsar candidates).  There is a very minimal number-of-bins
+// dependence, which is apparent when dt_per_bin < 0.7 or so.  dt_per_bin is the
+// width of a profile bin in samples (a float), and so for prepfold is pulse
+// period / nbins / sample time.  Note that the sqrt of this factor can be used
+// to 'inflate' the RMS of the profile as well, for radiometer eqn flux density
+// estimates, for instance (newrms = oldrms / sqrt(DOF_corr)).
+
 void fold_errors(double *prof, int proflen, double dt, double N, 
 		 double datavar, double p, double pd, double pdd, 
 		 double *perr, double *pderr, double *pdderr);

diff --git a/python/presto_src/presto_wrap.c b/python/presto_src/presto_wrap.c
@@ -13551,6 +13551,45 @@ SWIGINTERN PyObject *_wrap_barycenter(PyObject *SWIGUNUSEDPARM(self), PyObject *
 }
 
 
+SWIGINTERN PyObject *_wrap_DOF_corr(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  double arg1 ;
+  double val1 ;
+  int ecode1 = 0 ;
+  PyObject *swig_obj[1] ;
+  double result;
+
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  ecode1 = SWIG_AsVal_double(swig_obj[0], &val1);
+  if (!SWIG_IsOK(ecode1)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "DOF_corr" "', argument " "1"" of type '" "double""'");
+  } 
+  arg1 = (double)(val1);
+  {
+    errno = 0;
+    result = (double)DOF_corr(arg1);
+
+    if (errno != 0)
+    {
+      switch(errno)
+      {
+      case ENOMEM:
+        PyErr_Format(PyExc_MemoryError, "Failed malloc()");
+        break;
+      default:
+        PyErr_Format(PyExc_Exception, "Unknown exception");
+      }
+      SWIG_fail;
+    }
+  }
+  resultobj = SWIG_From_double((double)(result));
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
 SWIGINTERN PyObject *_wrap_simplefold(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   float *arg1 = (float *) 0 ;
@@ -14064,6 +14103,7 @@ static PyMethodDef SwigMethods[] = {
 	 { "max_rzw_arr_harmonics", _wrap_max_rzw_arr_harmonics, METH_VARARGS, NULL},
 	 { "max_rzw_arr", _wrap_max_rzw_arr, METH_VARARGS, NULL},
 	 { "barycenter", _wrap_barycenter, METH_VARARGS, NULL},
+	 { "DOF_corr", _wrap_DOF_corr, METH_O, NULL},
 	 { "simplefold", _wrap_simplefold, METH_VARARGS, NULL},
 	 { "nice_output_1", _wrap_nice_output_1, METH_VARARGS, NULL},
 	 { "nice_output_2", _wrap_nice_output_2, METH_VARARGS, NULL},

diff --git a/python/presto_src/prestoswig.py b/python/presto_src/prestoswig.py
@@ -468,6 +468,9 @@ def max_rzw_arr(data: "fcomplex", rin: "double", zin: "double", win: "double", d
 def barycenter(topotimes: "double *", barytimes: "double *", voverc: "double *", ra: "char *", dec: "char *", obs: "char *", ephem: "char *") -> "void":
     return _presto.barycenter(topotimes, barytimes, voverc, ra, dec, obs, ephem)
 
+def DOF_corr(dt_per_bin: "double") -> "double":
+    return _presto.DOF_corr(dt_per_bin)
+
 def simplefold(data: "float *", dt: "double", tlo: "double", prof: "double *", startphs: "double", f0: "double", fdot: "double", fdotdot: "double", standard: "int") -> "double":
     return _presto.simplefold(data, dt, tlo, prof, startphs, f0, fdot, fdotdot, standard)
 

diff --git a/python/wrappers/presto.i b/python/wrappers/presto.i
@@ -904,6 +904,24 @@ double sphere_ang_diff(double ra1, double dec1, double ra2, double dec2);
 %clear (double *barytimes, long N2);
 %clear (double *voverc, long N3);
 
+double DOF_corr(double dt_per_bin);
+// Return a multiplicative correction for the effective number of degrees of
+// freedom in the chi^2 measurement resulting from a pulse profile folded by
+// PRESTO's fold() function (i.e. prepfold).  This is required because there are
+// correlations between the bins caused by the way that prepfold folds data
+// (i.e. treating a sample as finite duration and smearing it over potentially
+// several bins in the profile as opposed to instantaneous and going into just
+// one profile bin).  The correction is semi-analytic (thanks to Paul Demorest
+// and Walter Brisken) but the values for 'power' and 'factor' have been
+// determined from Monte Carlos.  The correction is good to a fractional error
+// of less than a few percent as long as dt_per_bin is > 0.5 or so (which it
+// usually is for pulsar candidates).  There is a very minimal number-of-bins
+// dependence, which is apparent when dt_per_bin < 0.7 or so.  dt_per_bin is the
+// width of a profile bin in samples (a float), and so for prepfold is pulse
+// period / nbins / sample time.  Note that the sqrt of this factor can be used
+// to 'inflate' the RMS of the profile as well, for radiometer eqn flux density
+// estimates, for instance (newrms = oldrms / sqrt(DOF_corr)).
+
 %apply (float* INPLACE_ARRAY1, long DIM1) {(float *data, long N1)};
 %apply (double* INPLACE_ARRAY1, long DIM1) {(double *prof, long N2)};
 %rename (simplefold) wrap_simplefold;

diff --git a/python/wrappers/presto.py b/python/wrappers/presto.py
@@ -468,6 +468,9 @@ def max_rzw_arr(data: "fcomplex", rin: "double", zin: "double", win: "double", d
 def barycenter(topotimes: "double *", barytimes: "double *", voverc: "double *", ra: "char *", dec: "char *", obs: "char *", ephem: "char *") -> "void":
     return _presto.barycenter(topotimes, barytimes, voverc, ra, dec, obs, ephem)
 
+def DOF_corr(dt_per_bin: "double") -> "double":
+    return _presto.DOF_corr(dt_per_bin)
+
 def simplefold(data: "float *", dt: "double", tlo: "double", prof: "double *", startphs: "double", f0: "double", fdot: "double", fdotdot: "double", standard: "int") -> "double":
     return _presto.simplefold(data, dt, tlo, prof, startphs, f0, fdot, fdotdot, standard)
 

diff --git a/python/wrappers/presto_wrap.c b/python/wrappers/presto_wrap.c
@@ -13551,6 +13551,45 @@ SWIGINTERN PyObject *_wrap_barycenter(PyObject *SWIGUNUSEDPARM(self), PyObject *
 }
 
 
+SWIGINTERN PyObject *_wrap_DOF_corr(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
+  PyObject *resultobj = 0;
+  double arg1 ;
+  double val1 ;
+  int ecode1 = 0 ;
+  PyObject *swig_obj[1] ;
+  double result;
+
+  if (!args) SWIG_fail;
+  swig_obj[0] = args;
+  ecode1 = SWIG_AsVal_double(swig_obj[0], &val1);
+  if (!SWIG_IsOK(ecode1)) {
+    SWIG_exception_fail(SWIG_ArgError(ecode1), "in method '" "DOF_corr" "', argument " "1"" of type '" "double""'");
+  } 
+  arg1 = (double)(val1);
+  {
+    errno = 0;
+    result = (double)DOF_corr(arg1);
+
+    if (errno != 0)
+    {
+      switch(errno)
+      {
+      case ENOMEM:
+        PyErr_Format(PyExc_MemoryError, "Failed malloc()");
+        break;
+      default:
+        PyErr_Format(PyExc_Exception, "Unknown exception");
+      }
+      SWIG_fail;
+    }
+  }
+  resultobj = SWIG_From_double((double)(result));
+  return resultobj;
+fail:
+  return NULL;
+}
+
+
 SWIGINTERN PyObject *_wrap_simplefold(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
   PyObject *resultobj = 0;
   float *arg1 = (float *) 0 ;
@@ -14064,6 +14103,7 @@ static PyMethodDef SwigMethods[] = {
 	 { "max_rzw_arr_harmonics", _wrap_max_rzw_arr_harmonics, METH_VARARGS, NULL},
 	 { "max_rzw_arr", _wrap_max_rzw_arr, METH_VARARGS, NULL},
 	 { "barycenter", _wrap_barycenter, METH_VARARGS, NULL},
+	 { "DOF_corr", _wrap_DOF_corr, METH_O, NULL},
 	 { "simplefold", _wrap_simplefold, METH_VARARGS, NULL},
 	 { "nice_output_1", _wrap_nice_output_1, METH_VARARGS, NULL},
 	 { "nice_output_2", _wrap_nice_output_2, METH_VARARGS, NULL},

diff --git a/src/fold.c b/src/fold.c
@@ -19,6 +19,30 @@
 /* Some helper functions */
 void hunt(double *xx, int n, double x, int *jlo);
 
+double DOF_corr(double dt_per_bin)
+// Return a multiplicative correction for the effective number of degrees of
+// freedom in the chi^2 measurement resulting from a pulse profile folded by
+// PRESTO's fold() function (i.e. prepfold).  This is required because there are
+// correlations between the bins caused by the way that prepfold folds data
+// (i.e. treating a sample as finite duration and smearing it over potentially
+// several bins in the profile as opposed to instantaneous and going into just
+// one profile bin).  The correction is semi-analytic (thanks to Paul Demorest
+// and Walter Brisken) but the values for 'power' and 'factor' have been
+// determined from Monte Carlos.  The correction is good to a fractional error
+// of less than a few percent as long as dt_per_bin is > 0.5 or so (which it
+// usually is for pulsar candidates).  There is a very minimal number-of-bins
+// dependence, which is apparent when dt_per_bin < 0.7 or so.  dt_per_bin is the
+// width of a profile bin in samples (a float), and so for prepfold is pulse
+// period / nbins / sample time.  Note that the sqrt of this factor can be used
+// to 'inflate' the RMS of the profile as well, for radiometer eqn flux density
+// estimates, for instance (newrms = oldrms / sqrt(DOF_corr)).
+{
+    double power = 1.806;       // From Monte Carlos
+    double factor = 0.96;       // From Monte Carlos
+    return dt_per_bin * factor * pow(1.0 + pow(dt_per_bin, power), -1.0 / power);
+}
+
+
 static void add_to_prof(double *prof, double *buffer, int N,
                         double lophase, double deltaphase,
                         double dataval, double *phaseadded)
@@ -118,7 +142,7 @@ void fold_errors(double *prof, int proflen, double dt, double N,
 /*      'datavar' is the variance of the original data       */
 /*      'p' is the folding period                            */
 /*      'pd' is the folding period derivative                */
-/*      'pdd' is the folding period 2nd dervivative          */
+/*      'pdd' is the folding period 2nd derivative          */
 /*      'perr' is the returned period standard deviation     */
 /*      'pderr' is the returned p-dot standard deviation     */
 /*      'pdderr' is the returned p-dotdot standard deviation */
@@ -128,6 +152,7 @@ void fold_errors(double *prof, int proflen, double dt, double N,
     double err, r, z, w, pwrfact = 0.0, pwrerr = 0.0, rerr, zerr, werr, dtmp;
     double rerrn = 0.0, zerrn = 0.0, werrn = 0.0, rerrd = 0.0, zerrd = 0.0, werrd =
         0.0;
+    double dt_per_bin = p / proflen / dt;
     float powargr, powargi;
     fcomplex *fftprof;
 
@@ -144,11 +169,13 @@ void fold_errors(double *prof, int proflen, double dt, double N,
     else
         w = 2.0 * z * z / r - pdd * r * r * T;
 
-    /* Calculate the normalization constant which converts the raw */
-    /* powers into normalized powers -- just as if we had FFTd the */
-    /* full data set.                                              */
+    // Calculate the normalization constant which converts the raw powers into
+    // normalized powers -- just as if we had FFTd the full data set.  This
+    // needs to be corrected for the fact that prepfold's folding algorithm
+    // causes neighboring bins to be slightly correlated (and therefore have
+    // less variance than it should).
 
-    norm = 1.0 / (N * datavar);
+    norm = 1.0 / (N * datavar / DOF_corr(dt_per_bin));
 
     /* Place the profile into a complex array */
 

diff --git a/src/prepfold_utils.c b/src/prepfold_utils.c
@@ -755,32 +755,6 @@ void correct_subbands_for_DM(double dm, prepfoldinfo * search,
 }
 
 
-double DOF_corr(double dt_per_bin)
-// Return a multiplicative correction for the effective number of
-// degrees of freedom in the chi^2 measurement resulting from a pulse
-// profile folded by PRESTO's fold() function (i.e. prepfold).  This
-// is required because there are correlations between the bins caused
-// by the way that prepfold folds data (i.e. treating a sample as
-// finite duration and smearing it over potenitally several bins in
-// the profile as opposed to instantaneous and going into just one
-// profile bin).  The correction is semi-analytic (thanks to Paul
-// Demorest and Walter Brisken) but the values for 'power' and
-// 'factor' have been determined from Monte Carlos.  The correction is
-// good to a fractional error of less than a few percent as long as
-// dt_per_bin is > 0.5 or so (which it usually is for pulsar
-// candidates).  There is a very minimal number-of-bins dependence,
-// which is apparent when dt_per_bin < 0.7 or so.  dt_per_bin is the
-// width of a profile bin in samples (a float), and so for prepfold is
-// pulse period / nbins / sample time.  Note that the sqrt of this
-// factor can be used to 'inflate' the RMS of the profile as well, for
-// radiometer eqn flux density estimates, for instance.
-{
-    double power = 1.806;       // From Monte Carlos
-    double factor = 0.96;       // From Monte Carlos
-    return dt_per_bin * factor * pow(1.0 + pow(dt_per_bin, power), -1.0 / power);
-}
-
-
 float estimate_offpulse_redchi2(double *inprofs, foldstats * stats,
                                 int numparts, int numsubbands,
                                 int proflen, int numtrials, double dofeff)