Merge pull request #14 from cosmodesi/analytic_counts_rpcut

rp cut implementation for analytic counts
cosmodesi · Feb 16, 2025 · 37554fb · 37554fb
2 parents 868ba4e + 41d67bc
commit 37554fb
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -56,11 +56,12 @@ See [pycorr docs](https://py2pcf.readthedocs.io/en/latest/user/building.html).
 
 ## Acknowledgments
 
-- Lehman Garrison and Manodeep Sinha for advice when implementing linear binning, and PIP and angular weights into Corrfunc.
-- Davide Bianchi for cross-checks of two-point counts with PIP weights.
-- Svyatoslav Trusov for script to compute jackknife covariance estimates based on https://arxiv.org/pdf/2109.07071.pdf: https://github.com/theonefromnowhere/JK_pycorr/blob/main/CF_JK_ST_conf.py.
+- Lehman Garrison and Manodeep Sinha for advice when implementing linear binning, and PIP and angular weights into Corrfunc
+- Davide Bianchi for cross-checks of two-point counts with PIP weights
+- Svyatoslav Trusov for script to compute jackknife covariance estimates based on https://arxiv.org/pdf/2109.07071.pdf: https://github.com/theonefromnowhere/JK_pycorr/blob/main/CF_JK_ST_conf.py
 - Enrique Paillas and Seshadri Nadathur for suggestions about reconstructed 2pcf measurements
 - Craig Warner for GPU-izing Corrfunc 'smu' counts
+- Misha Rashkovetskyi for rp-cut analytical pair counts
 - Craig Warner, James Lasker, Misha Rashkovetskyi and Edmond Chaussidon for spotting typos / bug reports
 
 # Citations

diff --git a/pycorr/correlation_function.py b/pycorr/correlation_function.py
@@ -364,7 +364,7 @@ def is_same(array1, array2):
                 size2 = counts['D1D2'].size2
             if boxsize is None:
                 raise ValueError('boxsize must be provided for analytic two-point counts {}.'.format(label12))
-            counts[label12] = AnalyticTwoPointCounter(mode, edges, boxsize, size1=size1, size2=size2)
+            counts[label12] = AnalyticTwoPointCounter(mode, edges, boxsize, size1=size1, size2=size2, selection_attrs=selection_attrs[label12])
             continue
         if log: logger.info('Computing two-point counts {}.'.format(label12))
         twopoint_kwargs = {'twopoint_weights': twopoint_weights[label12], 'weight_type': weight_type[label12], 'selection_attrs': selection_attrs[label12]}

diff --git a/pycorr/tests/test_twopoint_counter.py b/pycorr/tests/test_twopoint_counter.py
@@ -981,7 +981,7 @@ def test_pip_counts_correction():
 
 def test_analytic_twopoint_counter(mode='s'):
     edges = np.linspace(50, 100, 5)
-    size = 10000
+    size = 40000
     boxsize = (1000,) * 3
     if mode == 'smu':
         edges = (edges, np.linspace(-1, 1, 5))
@@ -991,21 +991,27 @@ def test_analytic_twopoint_counter(mode='s'):
     list_options = []
     list_options.append({})
     list_options.append({'autocorr': True})
+    if mode in ['smu']:
+        list_options.append({'autocorr': True, 'selection_attrs': {'rp': (70., 90.)}})
 
     for options in list_options:
         autocorr = options.pop('autocorr', False)
         data1, data2 = generate_catalogs(size, boxsize=boxsize, n_individual_weights=0, n_bitwise_weights=0)
         ref = TwoPointCounter(mode=mode, edges=edges, positions1=data1[:3], positions2=None if autocorr else data2[:3],
                               weights1=None, weights2=None, position_type='xyz', boxsize=boxsize, los='z', **options)
-        test = AnalyticTwoPointCounter(mode, edges, boxsize, size1=len(data1[0]), size2=None if autocorr else len(data2[0]))
-        ratio = np.absolute(test.wcounts / ref.wcounts - 1)
-        assert np.all(ratio < 0.1)
+        test = AnalyticTwoPointCounter(mode, edges, boxsize, size1=len(data1[0]), size2=None if autocorr else len(data2[0]), selection_attrs=options.get('selection_attrs', {}))
+        diff = np.abs(test.wcounts - ref.wcounts)
+        assert np.all(diff <= 3. * np.sqrt(ref.wcounts))
+        if mode == 'smu' and (edges[1][0], edges[1][-1]) == (-1, 1):
+            test2 = AnalyticTwoPointCounter('s', edges[:1], boxsize, size1=len(data1[0]), size2=None if autocorr else len(data2[0]), selection_attrs=options.get('selection_attrs', {}))
+            assert np.allclose(test2.wcounts, test.wcounts.sum(axis=-1))
+
         with tempfile.TemporaryDirectory() as tmp_dir:
             fn = os.path.join(tmp_dir, 'tmp.npy')
+            bak = np.copy(test.wcounts)
             test.save(fn)
             test = TwoPointCounter.load(fn)
-            ratio = np.absolute(test.wcounts / ref.wcounts - 1)
-            assert np.all(ratio < 0.1)
+            assert np.allclose(test.wcounts, bak)
             ref = test.copy()
             test.rebin((2, 2) if len(edges) == 2 else (2,))
             assert np.allclose(np.sum(test.wcounts), np.sum(ref.wcounts))

diff --git a/pycorr/twopoint_counter.py b/pycorr/twopoint_counter.py
@@ -1429,7 +1429,7 @@ class AnalyticTwoPointCounter(BaseTwoPointCounter):
     """
     name = 'analytic'
 
-    def __init__(self, mode, edges, boxsize, size1=10, size2=None, los='z'):
+    def __init__(self, mode, edges, boxsize, size1=10, size2=None, los='z', selection_attrs=None):
         r"""
         Initialize :class:`AnalyticTwoPointCounter`, and set :attr:`wcounts` and :attr:`sep`.
 
@@ -1474,17 +1474,44 @@ def __init__(self, mode, edges, boxsize, size1=10, size2=None, los='z'):
         self._set_default_seps()
         self._set_reversible()
         self.wnorm = self.normalization()
+        self._set_selection(selection_attrs)
         self.run()
 
     def run(self):
         """Set analytical two-point counts."""
+        if any(name != "rp" for name in self.selection_attrs.keys()):
+            raise TwoPointCounterError("Analytic random counts not implemented for selections other than rp")
+        rp_selection = self.selection_attrs.get("rp", None)
+        if rp_selection is not None and self.mode not in ['s', 'smu']:
+            raise TwoPointCounterError("Analytic random counts not implemented for rp selection with mode {}".format(self.mode))
         if self.mode == 's':
             v = 4. / 3. * np.pi * self.edges[0]**3
             dv = np.diff(v, axis=0)
+            if rp_selection:
+                v_rpcut = [4. / 3. * np.pi * (self.edges[0]**3 - np.fmax(self.edges[0]**2 - rp_cut**2, 0)**1.5) for rp_cut in rp_selection] # volume of the intersection of a cylinder with radius rp_cut and a sphere with radius s
+                v_rpcut = v_rpcut[1] - v_rpcut[0] # the volume that is removed between two rp cut limits
+                dv_rpcut = np.diff(v_rpcut, axis=0) # volume in each bin removed by rp selection
+                dv = np.where(dv_rpcut >= 1e-8 * dv, dv_rpcut, 0) # ensure that the volume is not negative and further remove small positive values that may arise due to rounding errors; assumes that dv is accurate
         elif self.mode == 'smu':
             # we bin in mu
             v = 2. / 3. * np.pi * self.edges[0][:, None]**3 * self.edges[1]
             dv = np.diff(np.diff(v, axis=0), axis=-1)
+            if rp_selection:
+                s, mu = self.edges[0][:, None], self.edges[1][None, :]
+                sin_theta = np.sqrt(1 - mu**2) * np.ones_like(s)
+                v_rpcut = []
+                for rp_cut in rp_selection:
+                    ss = s * np.ones_like(mu); c = ss * sin_theta > rp_cut; ss[c] = rp_cut / sin_theta[c] # this prevents division by zero, should work when rp_cut = 0, infinity or s = 0
+                    r = ss * sin_theta # = min(rp_cut, s * sin(theta))
+                    h = ss * mu # = cot(theta) * r, but avoids ambiguity/division by zero
+                    v_rpcut.append(2. / 3. * np.pi * (s**3 - (s**2 - r**2)**1.5 + r**2 * h + 2 * (mu > 0) * ((s**2 - r**2)**1.5 - np.fmax(s**2 - rp_cut**2, 0)**1.5))) # volume of the intersection of a cylinder with radius rp_cut and a spherical sector/cone between -1 and mu with radius s.
+                    # it can be decomposed into (1) intersection of a cylinder with the sphere, (2) a usual cylinder, (3) a usual cone and (4) only for mu>0 - intersection of the sphere with the space between two cylinders.
+                    # r is the radius of (1-3) from the line of sight; h is the height of the cylinder (2) and the cone (3).
+                    # the radii of cylinders for (4) are r and R = min(rp_cut, s), and r <= R always.
+                    # it may seem that (4) becomes a more complicated shape if r < s * sin(theta), but this can only happen if rp_cut < s * sin(theta) <= s, then r = R = rp_cut, and we obtain zero volume as it should be.
+                v_rpcut = v_rpcut[1] - v_rpcut[0] # the volume that is removed between two rp cut limits
+                dv_rpcut = np.diff(np.diff(v_rpcut, axis=0), axis=-1) # volume in each bin removed by rp selection
+                dv = np.where(dv_rpcut >= 1e-8 * dv, dv_rpcut, 0) # ensure that the volume is not negative and further remove small positive values that may arise due to rounding errors; assumes that dv is accurate
         elif self.mode == 'rppi':
             v = np.pi * self.edges[0][:, None]**2 * self.edges[1]
             dv = np.diff(np.diff(v, axis=0), axis=1)