[code_writer] add 'make source-mma', adjust the generated code

gudrunhe · Apr 29, 2024 · b146463 · b146463
1 parent d299c3a
commit b146463
Show file tree

Hide file tree

Showing 12 changed files with 407 additions and 116 deletions.
diff --git a/pySecDec/code_writer/make_package.py b/pySecDec/code_writer/make_package.py
@@ -605,6 +605,16 @@ def _make_sector_distsrc_files(sector_order_names):
             [f"distsrc/sector_{s}_{o}.cu" for s, o in orders]
     return " \\\n\t".join(files)
 
+def _make_sector_mma_files(sector_order_names):
+    """
+    Produce a Makefile-formatted list of source files that
+    export_sector will produce for a given sector for the
+    Mathematica output.
+    """
+    orders = sector_order_names.values()
+    files = [f"mma/sector_{s}_{o}.m" for s, o in orders]
+    return " \\\n\t".join(files)
+
 def _derivative_muliindex_to_name(basename, multiindex):
     '''
     Convert a derivative multiindex as returned by
@@ -1356,6 +1366,7 @@ def update_derivatives(basename, derivative_tracker, full_expression, derivative
     sector_order_names = _make_sector_order_names(sector_index, regulator_powers, highest_poles_current_sector)
     sector_cpp_files = _make_sector_cpp_files(sector_index, sector_order_names, contour_deformation_polynomial is not None)
     sector_distsrc_files = _make_sector_distsrc_files(sector_order_names)
+    sector_mma_files = _make_sector_mma_files(sector_order_names)
     regulator_powers = _make_FORM_shifted_orders(regulator_powers)
 
     # parse template file "sector.h"
@@ -1374,6 +1385,7 @@ def update_derivatives(basename, derivative_tracker, full_expression, derivative
     template_replacements['sector_cpp_files'] = sector_cpp_files
     template_replacements['sector_hpp_files'] = sector_cpp_files.replace(".cpp", ".hpp")
     template_replacements['sector_distsrc_files'] = sector_distsrc_files
+    template_replacements['sector_mma_files'] = sector_mma_files
     template_replacements['sector_codegen_sources'] = \
             "codegen/sector%i.h" % sector_index if contour_deformation_polynomial is None else \
             "codegen/sector%i.h codegen/contour_deformation_sector%i.h" % (sector_index, sector_index)
@@ -1386,7 +1398,7 @@ def update_derivatives(basename, derivative_tracker, full_expression, derivative
                         template_replacements)
     for key in 'functions', 'cal_I_derivatives', 'decomposed_polynomial_derivatives','insert_cal_I_procedure','insert_other_procedure','insert_decomposed_procedure', \
             'integrand_definition_procedure','highest_regulator_poles','required_orders','regulator_powers','number_of_orders', \
-            'sector_index', 'sector_cpp_files', 'sector_hpp_files', 'sector_distsrc_files', 'sector_codegen_sources':
+            'sector_index', 'sector_cpp_files', 'sector_hpp_files', 'sector_distsrc_files', 'sector_mma_files', 'sector_codegen_sources':
         del template_replacements[key]
 
     if contour_deformation_polynomial is not None:

diff --git a/pySecDec/code_writer/templates/make_package/Makefile b/pySecDec/code_writer/templates/make_package/Makefile
@@ -2,6 +2,7 @@ include Makefile.conf
 include $(wildcard codegen/sector*.d)
 
 source : $(SECTOR_CPP)
+source-mma : $(SECTOR_MMA)
 
 lib$(NAME).a : $(patsubst %%.cpp,%%.o,$(SECTOR_CPP)) src/integrands.o src/pole_structures.o src/prefactor.o
 	@rm -f $@
@@ -48,6 +49,10 @@ codegen/sector%%.done: codegen/sector%%.h
 	$(PYTHON) '$(SECDEC_CONTRIB)/bin/export_sector' $(patsubst %%.h,%%.info,$<) ./
 	touch $@
 
+codegen/sector%%.mma.done: codegen/sector%%.done
+	$(PYTHON) '$(SECDEC_CONTRIB)/bin/export_sector' --subset=mma $(patsubst %%.done,%%.info,$<) ./
+	touch $@
+
 # The following is for the distributed evaluation.
 
 SECTOR_ORDERS:=$(patsubst src/sector_%%.cpp,%%,$(filter src/sector_%%.cpp,$(SECTOR_CPP)))

diff --git a/pySecDec/code_writer/templates/make_package/codegen/sector.d b/pySecDec/code_writer/templates/make_package/codegen/sector.d
@@ -2,6 +2,10 @@ SECTOR%(sector_index)i_CPP = \
 	%(sector_cpp_files)s
 SECTOR%(sector_index)i_DISTSRC = \
 	%(sector_distsrc_files)s
+SECTOR%(sector_index)i_MMA = \
+	%(sector_mma_files)s
 SECTOR_CPP += $(SECTOR%(sector_index)i_CPP)
+SECTOR_MMA += $(SECTOR%(sector_index)i_MMA)
 
 $(SECTOR%(sector_index)i_DISTSRC) $(SECTOR%(sector_index)i_CPP) $(patsubst %%.cpp,%%.hpp,$(SECTOR%(sector_index)i_CPP)) : codegen/sector%(sector_index)i.done ;
+$(SECTOR%(sector_index)i_MMA) : codegen/sector%(sector_index)i.mma.done ;
diff --git a/pySecDec/code_writer/templates/make_package/distsrc/builtin.cpp b/pySecDec/code_writer/templates/make_package/distsrc/builtin.cpp
@@ -1,8 +1,8 @@
 #define SECDEC_RESULT_IS_COMPLEX 1
 #include "common_cpu.h"
 
-#define SecDecInternalSignCheckErrorPositivePolynomial(id) {*presult = nan("U"); return 1; }
-#define SecDecInternalSignCheckErrorContourDeformation(id) {*presult = nan("F"); return 2; }
+#define SecDecInternalSignCheckPositivePolynomial(cond, id) if (unlikely(cond)) {*presult = NAN; return 1; }
+#define SecDecInternalSignCheckContourDeformation(cond, id) if (unlikely(cond)) {*presult = NAN; return 2; }
 
 extern "C" int
 builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, order=0
@@ -23,7 +23,7 @@ builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, o
     const real_t m3sq = realp[3];
     const real_t SecDecInternalLambda0 = deformp[0];
     const real_t SecDecInternalLambda1 = deformp[1];
-    const real_t invlattice = 1.0/lattice;
+    const real_t invlattice = SecDecInternalDenominator((real_t)(double)lattice);
     resultvec_t acc = RESULTVEC_ZERO;
     uint64_t index = index1;
     int_t li_x0 = mulmod(genvec[0], index, lattice);
@@ -150,9 +150,9 @@ builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, o
         auto tmp3_64 = -tmp3_16 + tmp3_63;
         auto tmp3_65 = x1*tmp3_54*tmp3_34*__PowCall1*__PowCall2*__DenominatorCall1;
         auto _SignCheckExpression = SecDecInternalImagPart(tmp3_64);
-        if (unlikely(_SignCheckExpression>0)) SecDecInternalSignCheckErrorContourDeformation(1);
+        SecDecInternalSignCheckContourDeformation(!(_SignCheckExpression<=0), 1);
         auto tmp3_66 = SecDecInternalRealPart(tmp3_51);
-        if (unlikely(tmp3_66<0)) SecDecInternalSignCheckErrorPositivePolynomial(1);
+        SecDecInternalSignCheckPositivePolynomial(!(tmp3_66>=0), 1);
         acc = acc + w*(tmp3_65);
     }
     *presult = componentsum(acc);

diff --git a/pySecDec/code_writer/templates/make_package/distsrc/builtin.cu b/pySecDec/code_writer/templates/make_package/distsrc/builtin.cu
@@ -10,19 +10,19 @@
 
 #define sum_kernel(name, value_t) \
     extern "C" __global__ void \
-    name(value_t *dst, value_t *src, uint64_t n) \
+    name(value_t * __restrict__ dst, value_t * __restrict__ src, uint64_t n) \
     { \
         uint64_t bid = blockIdx.x; \
         uint64_t tid = threadIdx.x; \
         uint64_t idx = (bid*128 + tid)*8; \
-        value_t val1 = (idx+0 < n) ? src[idx+0] : value_t(0); \
-        value_t val2 = (idx+1 < n) ? src[idx+1] : value_t(0); \
-        value_t val3 = (idx+2 < n) ? src[idx+2] : value_t(0); \
-        value_t val4 = (idx+3 < n) ? src[idx+3] : value_t(0); \
-        value_t val5 = (idx+4 < n) ? src[idx+4] : value_t(0); \
-        value_t val6 = (idx+5 < n) ? src[idx+5] : value_t(0); \
-        value_t val7 = (idx+6 < n) ? src[idx+6] : value_t(0); \
-        value_t val8 = (idx+7 < n) ? src[idx+7] : value_t(0); \
+        value_t val1 = (idx+0 < n) ? src[idx+0] : value_t{}; \
+        value_t val2 = (idx+1 < n) ? src[idx+1] : value_t{}; \
+        value_t val3 = (idx+2 < n) ? src[idx+2] : value_t{}; \
+        value_t val4 = (idx+3 < n) ? src[idx+3] : value_t{}; \
+        value_t val5 = (idx+4 < n) ? src[idx+4] : value_t{}; \
+        value_t val6 = (idx+5 < n) ? src[idx+5] : value_t{}; \
+        value_t val7 = (idx+6 < n) ? src[idx+6] : value_t{}; \
+        value_t val8 = (idx+7 < n) ? src[idx+7] : value_t{}; \
         value_t val = ((val1 + val2) + (val3 + val4)) + ((val5 + val6) + (val7 + val8)); \
         typedef cub::BlockReduce<value_t, 128, cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY> Reduce; \
         __shared__ typename Reduce::TempStorage shared; \
@@ -33,8 +33,8 @@
 sum_kernel(sum_d_b128_x1024, real_t)
 sum_kernel(sum_c_b128_x1024, complex_t)
 
-#define SecDecInternalSignCheckErrorPositivePolynomial(id) {val = nan("U"); break;}
-#define SecDecInternalSignCheckErrorContourDeformation(id) {val = nan("F"); break;}
+#define SecDecInternalSignCheckPositivePolynomial(cond, id) if (unlikely(cond)) { val = REAL_NAN; break; }
+#define SecDecInternalSignCheckContourDeformation(cond, id) if (unlikely(cond)) { val = REAL_NAN; break; }
 
 extern "C" __global__ void
 builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, order=0
@@ -58,15 +58,15 @@ builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, o
     const real_t m3sq = realp[3];
     const real_t SecDecInternalLambda0 = deformp[0];
     const real_t SecDecInternalLambda1 = deformp[1];
-    const real_t invlattice = 1.0/lattice;
-    result_t val = 0.0;
+    const real_t invlattice = SecDecInternalDenominator((real_t)(double)lattice);
+    result_t val = {};
     uint64_t index = index1 + (bid*128 + tid)*8;
     uint64_t li_x0 = mulmod(index, genvec[0], lattice);
     uint64_t li_x1 = mulmod(index, genvec[1], lattice);
     for (uint64_t i = 0; (i < 8) && (index < index2); i++, index++) {
-        real_t x0 = warponce(li_x0*invlattice + shift[0], 1.0);
+        real_t x0 = warponce(invlattice*(double)li_x0 + shift[0], 1.0);
         li_x0 = warponce_i(li_x0 + genvec[0], lattice);
-        real_t x1 = warponce(li_x1*invlattice + shift[1], 1.0);
+        real_t x1 = warponce(invlattice*(double)li_x1 + shift[1], 1.0);
         li_x1 = warponce_i(li_x1 + genvec[1], lattice);
         real_t w_x0 = korobov3x3_w(x0);
         real_t w_x1 = korobov3x3_w(x1);
@@ -197,10 +197,10 @@ builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, o
         auto tmp3_76 = tmp3_75 + tmp3_74 + tmp3_72;
         auto tmp3_77 = __DenominatorCall1*tmp3_76*tmp3_60*tmp3_36*__PowCall1*__PowCall2;
         auto _SignCheckExpression = SecDecInternalImagPart(tmp3_70);
-        if (unlikely(_SignCheckExpression>0)) SecDecInternalSignCheckErrorContourDeformation(1);
+        SecDecInternalSignCheckContourDeformation(!(_SignCheckExpression<=0), 1);
         auto tmp3_78 = SecDecInternalRealPart(tmp3_57);
-        if (unlikely(tmp3_78<0)) SecDecInternalSignCheckErrorPositivePolynomial(1);
-        val += w*(tmp3_77);
+        SecDecInternalSignCheckPositivePolynomial(!(tmp3_78>=0), 1);
+        val = val + w*(tmp3_77);
     }
     // Sum up 128*8=1024 values across 4 warps.
     typedef cub::BlockReduce<result_t, 128, cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY> Reduce;