From b1464637ee98b6afa6996b1a443fac5cde83b740 Mon Sep 17 00:00:00 2001
From: Vitaly Magerya <magv@tx97.net>
Date: Wed, 20 Mar 2024 17:14:54 +0100
Subject: [PATCH] [code_writer] add 'make source-mma', adjust the generated
 code

---
 pySecDec/code_writer/make_package.py          |  14 +-
 .../templates/make_package/Makefile           |   5 +
 .../templates/make_package/codegen/sector.d   |   4 +
 .../make_package/distsrc/builtin.cpp          |  10 +-
 .../templates/make_package/distsrc/builtin.cu |  36 +--
 .../make_package/distsrc/common_cpu.h         | 155 +++++++++++--
 .../make_package/distsrc/common_cuda.h        |  51 +++--
 .../src/SecDecInternalFunctions.hpp           |  12 +-
 .../templates/sum_package/Makefile            |   6 +-
 pySecDecContrib/bin/export_sector             | 210 ++++++++++++++----
 .../lib/write_contour_deformation.frm         |   8 +-
 pySecDecContrib/lib/write_integrand.frm       |  12 +-
 12 files changed, 407 insertions(+), 116 deletions(-)

diff --git a/pySecDec/code_writer/make_package.py b/pySecDec/code_writer/make_package.py
index b4bd948e..212b9a64 100644
--- a/pySecDec/code_writer/make_package.py
+++ b/pySecDec/code_writer/make_package.py
@@ -605,6 +605,16 @@ def _make_sector_distsrc_files(sector_order_names):
             [f"distsrc/sector_{s}_{o}.cu" for s, o in orders]
     return " \\\n\t".join(files)
 
+def _make_sector_mma_files(sector_order_names):
+    """
+    Produce a Makefile-formatted list of source files that
+    export_sector will produce for a given sector for the
+    Mathematica output.
+    """
+    orders = sector_order_names.values()
+    files = [f"mma/sector_{s}_{o}.m" for s, o in orders]
+    return " \\\n\t".join(files)
+
 def _derivative_muliindex_to_name(basename, multiindex):
     '''
     Convert a derivative multiindex as returned by
@@ -1356,6 +1366,7 @@ def update_derivatives(basename, derivative_tracker, full_expression, derivative
     sector_order_names = _make_sector_order_names(sector_index, regulator_powers, highest_poles_current_sector)
     sector_cpp_files = _make_sector_cpp_files(sector_index, sector_order_names, contour_deformation_polynomial is not None)
     sector_distsrc_files = _make_sector_distsrc_files(sector_order_names)
+    sector_mma_files = _make_sector_mma_files(sector_order_names)
     regulator_powers = _make_FORM_shifted_orders(regulator_powers)
 
     # parse template file "sector.h"
@@ -1374,6 +1385,7 @@ def update_derivatives(basename, derivative_tracker, full_expression, derivative
     template_replacements['sector_cpp_files'] = sector_cpp_files
     template_replacements['sector_hpp_files'] = sector_cpp_files.replace(".cpp", ".hpp")
     template_replacements['sector_distsrc_files'] = sector_distsrc_files
+    template_replacements['sector_mma_files'] = sector_mma_files
     template_replacements['sector_codegen_sources'] = \
             "codegen/sector%i.h" % sector_index if contour_deformation_polynomial is None else \
             "codegen/sector%i.h codegen/contour_deformation_sector%i.h" % (sector_index, sector_index)
@@ -1386,7 +1398,7 @@ def update_derivatives(basename, derivative_tracker, full_expression, derivative
                         template_replacements)
     for key in 'functions', 'cal_I_derivatives', 'decomposed_polynomial_derivatives','insert_cal_I_procedure','insert_other_procedure','insert_decomposed_procedure', \
             'integrand_definition_procedure','highest_regulator_poles','required_orders','regulator_powers','number_of_orders', \
-            'sector_index', 'sector_cpp_files', 'sector_hpp_files', 'sector_distsrc_files', 'sector_codegen_sources':
+            'sector_index', 'sector_cpp_files', 'sector_hpp_files', 'sector_distsrc_files', 'sector_mma_files', 'sector_codegen_sources':
         del template_replacements[key]
 
     if contour_deformation_polynomial is not None:
diff --git a/pySecDec/code_writer/templates/make_package/Makefile b/pySecDec/code_writer/templates/make_package/Makefile
index 5354fdc4..406932a2 100644
--- a/pySecDec/code_writer/templates/make_package/Makefile
+++ b/pySecDec/code_writer/templates/make_package/Makefile
@@ -2,6 +2,7 @@ include Makefile.conf
 include $(wildcard codegen/sector*.d)
 
 source : $(SECTOR_CPP)
+source-mma : $(SECTOR_MMA)
 
 lib$(NAME).a : $(patsubst %%.cpp,%%.o,$(SECTOR_CPP)) src/integrands.o src/pole_structures.o src/prefactor.o
 	@rm -f $@
@@ -48,6 +49,10 @@ codegen/sector%%.done: codegen/sector%%.h
 	$(PYTHON) '$(SECDEC_CONTRIB)/bin/export_sector' $(patsubst %%.h,%%.info,$<) ./
 	touch $@
 
+codegen/sector%%.mma.done: codegen/sector%%.done
+	$(PYTHON) '$(SECDEC_CONTRIB)/bin/export_sector' --subset=mma $(patsubst %%.done,%%.info,$<) ./
+	touch $@
+
 # The following is for the distributed evaluation.
 
 SECTOR_ORDERS:=$(patsubst src/sector_%%.cpp,%%,$(filter src/sector_%%.cpp,$(SECTOR_CPP)))
diff --git a/pySecDec/code_writer/templates/make_package/codegen/sector.d b/pySecDec/code_writer/templates/make_package/codegen/sector.d
index 34a8945a..7fd482c7 100644
--- a/pySecDec/code_writer/templates/make_package/codegen/sector.d
+++ b/pySecDec/code_writer/templates/make_package/codegen/sector.d
@@ -2,6 +2,10 @@ SECTOR%(sector_index)i_CPP = \
 	%(sector_cpp_files)s
 SECTOR%(sector_index)i_DISTSRC = \
 	%(sector_distsrc_files)s
+SECTOR%(sector_index)i_MMA = \
+	%(sector_mma_files)s
 SECTOR_CPP += $(SECTOR%(sector_index)i_CPP)
+SECTOR_MMA += $(SECTOR%(sector_index)i_MMA)
 
 $(SECTOR%(sector_index)i_DISTSRC) $(SECTOR%(sector_index)i_CPP) $(patsubst %%.cpp,%%.hpp,$(SECTOR%(sector_index)i_CPP)) : codegen/sector%(sector_index)i.done ;
+$(SECTOR%(sector_index)i_MMA) : codegen/sector%(sector_index)i.mma.done ;
diff --git a/pySecDec/code_writer/templates/make_package/distsrc/builtin.cpp b/pySecDec/code_writer/templates/make_package/distsrc/builtin.cpp
index 28794822..e5834e7a 100644
--- a/pySecDec/code_writer/templates/make_package/distsrc/builtin.cpp
+++ b/pySecDec/code_writer/templates/make_package/distsrc/builtin.cpp
@@ -1,8 +1,8 @@
 #define SECDEC_RESULT_IS_COMPLEX 1
 #include "common_cpu.h"
 
-#define SecDecInternalSignCheckErrorPositivePolynomial(id) {*presult = nan("U"); return 1; }
-#define SecDecInternalSignCheckErrorContourDeformation(id) {*presult = nan("F"); return 2; }
+#define SecDecInternalSignCheckPositivePolynomial(cond, id) if (unlikely(cond)) {*presult = NAN; return 1; }
+#define SecDecInternalSignCheckContourDeformation(cond, id) if (unlikely(cond)) {*presult = NAN; return 2; }
 
 extern "C" int
 builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, order=0
@@ -23,7 +23,7 @@ builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, o
     const real_t m3sq = realp[3];
     const real_t SecDecInternalLambda0 = deformp[0];
     const real_t SecDecInternalLambda1 = deformp[1];
-    const real_t invlattice = 1.0/lattice;
+    const real_t invlattice = SecDecInternalDenominator((real_t)(double)lattice);
     resultvec_t acc = RESULTVEC_ZERO;
     uint64_t index = index1;
     int_t li_x0 = mulmod(genvec[0], index, lattice);
@@ -150,9 +150,9 @@ builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, o
         auto tmp3_64 = -tmp3_16 + tmp3_63;
         auto tmp3_65 = x1*tmp3_54*tmp3_34*__PowCall1*__PowCall2*__DenominatorCall1;
         auto _SignCheckExpression = SecDecInternalImagPart(tmp3_64);
-        if (unlikely(_SignCheckExpression>0)) SecDecInternalSignCheckErrorContourDeformation(1);
+        SecDecInternalSignCheckContourDeformation(!(_SignCheckExpression<=0), 1);
         auto tmp3_66 = SecDecInternalRealPart(tmp3_51);
-        if (unlikely(tmp3_66<0)) SecDecInternalSignCheckErrorPositivePolynomial(1);
+        SecDecInternalSignCheckPositivePolynomial(!(tmp3_66>=0), 1);
         acc = acc + w*(tmp3_65);
     }
     *presult = componentsum(acc);
diff --git a/pySecDec/code_writer/templates/make_package/distsrc/builtin.cu b/pySecDec/code_writer/templates/make_package/distsrc/builtin.cu
index 1fa51914..ce76ed9f 100644
--- a/pySecDec/code_writer/templates/make_package/distsrc/builtin.cu
+++ b/pySecDec/code_writer/templates/make_package/distsrc/builtin.cu
@@ -10,19 +10,19 @@
 
 #define sum_kernel(name, value_t) \
     extern "C" __global__ void \
-    name(value_t *dst, value_t *src, uint64_t n) \
+    name(value_t * __restrict__ dst, value_t * __restrict__ src, uint64_t n) \
     { \
         uint64_t bid = blockIdx.x; \
         uint64_t tid = threadIdx.x; \
         uint64_t idx = (bid*128 + tid)*8; \
-        value_t val1 = (idx+0 < n) ? src[idx+0] : value_t(0); \
-        value_t val2 = (idx+1 < n) ? src[idx+1] : value_t(0); \
-        value_t val3 = (idx+2 < n) ? src[idx+2] : value_t(0); \
-        value_t val4 = (idx+3 < n) ? src[idx+3] : value_t(0); \
-        value_t val5 = (idx+4 < n) ? src[idx+4] : value_t(0); \
-        value_t val6 = (idx+5 < n) ? src[idx+5] : value_t(0); \
-        value_t val7 = (idx+6 < n) ? src[idx+6] : value_t(0); \
-        value_t val8 = (idx+7 < n) ? src[idx+7] : value_t(0); \
+        value_t val1 = (idx+0 < n) ? src[idx+0] : value_t{}; \
+        value_t val2 = (idx+1 < n) ? src[idx+1] : value_t{}; \
+        value_t val3 = (idx+2 < n) ? src[idx+2] : value_t{}; \
+        value_t val4 = (idx+3 < n) ? src[idx+3] : value_t{}; \
+        value_t val5 = (idx+4 < n) ? src[idx+4] : value_t{}; \
+        value_t val6 = (idx+5 < n) ? src[idx+5] : value_t{}; \
+        value_t val7 = (idx+6 < n) ? src[idx+6] : value_t{}; \
+        value_t val8 = (idx+7 < n) ? src[idx+7] : value_t{}; \
         value_t val = ((val1 + val2) + (val3 + val4)) + ((val5 + val6) + (val7 + val8)); \
         typedef cub::BlockReduce<value_t, 128, cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY> Reduce; \
         __shared__ typename Reduce::TempStorage shared; \
@@ -33,8 +33,8 @@
 sum_kernel(sum_d_b128_x1024, real_t)
 sum_kernel(sum_c_b128_x1024, complex_t)
 
-#define SecDecInternalSignCheckErrorPositivePolynomial(id) {val = nan("U"); break;}
-#define SecDecInternalSignCheckErrorContourDeformation(id) {val = nan("F"); break;}
+#define SecDecInternalSignCheckPositivePolynomial(cond, id) if (unlikely(cond)) { val = REAL_NAN; break; }
+#define SecDecInternalSignCheckContourDeformation(cond, id) if (unlikely(cond)) { val = REAL_NAN; break; }
 
 extern "C" __global__ void
 builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, order=0
@@ -58,15 +58,15 @@ builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, o
     const real_t m3sq = realp[3];
     const real_t SecDecInternalLambda0 = deformp[0];
     const real_t SecDecInternalLambda1 = deformp[1];
-    const real_t invlattice = 1.0/lattice;
-    result_t val = 0.0;
+    const real_t invlattice = SecDecInternalDenominator((real_t)(double)lattice);
+    result_t val = {};
     uint64_t index = index1 + (bid*128 + tid)*8;
     uint64_t li_x0 = mulmod(index, genvec[0], lattice);
     uint64_t li_x1 = mulmod(index, genvec[1], lattice);
     for (uint64_t i = 0; (i < 8) && (index < index2); i++, index++) {
-        real_t x0 = warponce(li_x0*invlattice + shift[0], 1.0);
+        real_t x0 = warponce(invlattice*(double)li_x0 + shift[0], 1.0);
         li_x0 = warponce_i(li_x0 + genvec[0], lattice);
-        real_t x1 = warponce(li_x1*invlattice + shift[1], 1.0);
+        real_t x1 = warponce(invlattice*(double)li_x1 + shift[1], 1.0);
         li_x1 = warponce_i(li_x1 + genvec[1], lattice);
         real_t w_x0 = korobov3x3_w(x0);
         real_t w_x1 = korobov3x3_w(x1);
@@ -197,10 +197,10 @@ builtin__gauge( // sunset, nu=(1,2,3), realp=(q2, m1sq, m2sq, m3sq), sector=1, o
         auto tmp3_76 = tmp3_75 + tmp3_74 + tmp3_72;
         auto tmp3_77 = __DenominatorCall1*tmp3_76*tmp3_60*tmp3_36*__PowCall1*__PowCall2;
         auto _SignCheckExpression = SecDecInternalImagPart(tmp3_70);
-        if (unlikely(_SignCheckExpression>0)) SecDecInternalSignCheckErrorContourDeformation(1);
+        SecDecInternalSignCheckContourDeformation(!(_SignCheckExpression<=0), 1);
         auto tmp3_78 = SecDecInternalRealPart(tmp3_57);
-        if (unlikely(tmp3_78<0)) SecDecInternalSignCheckErrorPositivePolynomial(1);
-        val += w*(tmp3_77);
+        SecDecInternalSignCheckPositivePolynomial(!(tmp3_78>=0), 1);
+        val = val + w*(tmp3_77);
     }
     // Sum up 128*8=1024 values across 4 warps.
     typedef cub::BlockReduce<result_t, 128, cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY> Reduce;
diff --git a/pySecDec/code_writer/templates/make_package/distsrc/common_cpu.h b/pySecDec/code_writer/templates/make_package/distsrc/common_cpu.h
index 3476c96c..f859858f 100644
--- a/pySecDec/code_writer/templates/make_package/distsrc/common_cpu.h
+++ b/pySecDec/code_writer/templates/make_package/distsrc/common_cpu.h
@@ -5,6 +5,8 @@ typedef int64_t int_t;
 typedef double real_t;
 typedef std::complex<real_t> complex_t;
 
+#define REAL_NAN NAN
+
 // Vector extension are available in GCC since v4.9, and in Clang
 // since v3.5. Ternary operator works on vectors since GCC v4.9,
 // and Clang v10.0.
@@ -49,9 +51,14 @@ typedef std::complex<real_t> complex_t;
 mathfn int_t warponce_i(const int_t a, const int_t b)
 { int_t ab = a - b; return ab >= 0 ? ab : a; }
 
-#define SecDecInternalDenominator(x) (1.0/(x))
+#define SecDecInternalQuo(n, d) (((real_t)(n))/(d))
 #define i_ (complex_t{0,1})
 
+mathfn real_t SecDecInternalDenominator(real_t x) { return real_t(1)/x; }
+mathfn complex_t SecDecInternalDenominator(complex_t x) { return real_t(1)/x; }
+
+mathfn real_t SecDecInternalSqr(const real_t &a) { return a*a; }
+mathfn complex_t SecDecInternalSqr(const complex_t &a) { return a*a; }
 mathfn real_t SecDecInternalRealPart(const real_t &a) { return a; }
 mathfn real_t SecDecInternalImagPart(const real_t &a) { return 0; }
 mathfn real_t SecDecInternalAbs(const real_t &a) { return std::abs(a); }
@@ -59,6 +66,21 @@ mathfn real_t SecDecInternalAbs(const complex_t &a) { return std::abs(a); }
 mathfn complex_t SecDecInternalI(const real_t &a) { return complex_t{0, a}; }
 mathfn complex_t SecDecInternalI(const complex_t &a) { return complex_t{-a.imag(), a.real()}; }
 
+template<typename T> mathfn T
+SecDecInternalNPow(const T &a, unsigned n) {
+    T s = T(1.0);
+    T r = a;
+    if (likely(n > 0)) {
+        for (;;) {
+            if (n %% 2) { s = s*r; }
+            n /= 2;
+            if (n == 0) break;
+            r = SecDecInternalSqr(r);
+        }
+    }
+    return s;
+}
+
 static uint64_t mulmod(uint64_t a, uint64_t b, uint64_t k) {
     // assume 0 <= a,b <= k < 2^53
     if (k <= 3037000499) { // floor(Int, sqrt(2^63-1))
@@ -135,6 +157,9 @@ DEF_SCALAR_OPERATOR(realvec_t, operator /, realvec_t, real_t, /)
     mathfn realvec_t vec_min(const realvec_t &a, const realvec_t &b)
     { return realvec_t{a.x < b.x ? a.x : b.x}; }
 
+    mathfn realvec_t vec_le_choose(const realvec_t &a, real_t b, const realvec_t &t, const realvec_t &f)
+    { return realvec_t{a.x <= b ? t.x : f.x}; }
+
     mathfn realvec_t warponce(const realvec_t &a, const real_t b)
     { realvec_t ab = realvec_t{a.x - b};
       return realvec_t{ab.x >= 0 ? ab.x : a.x}; }
@@ -153,6 +178,12 @@ DEF_SCALAR_OPERATOR(realvec_t, operator /, realvec_t, real_t, /)
                          a.x[2] < b.x[2] ? a.x[2] : b.x[2],
                          a.x[3] < b.x[3] ? a.x[3] : b.x[3] }}; }
 
+    mathfn realvec_t vec_le_choose(const realvec_t &a, real_t b, const realvec_t &t, const realvec_t &f)
+    { return realvec_t{{ a.x[0] <= b ? t.x[0] : f.x[0],
+                         a.x[1] <= b ? t.x[1] : f.x[1],
+                         a.x[2] <= b ? t.x[2] : f.x[2],
+                         a.x[3] <= b ? t.x[3] : f.x[3] }}; }
+
     mathfn realvec_t warponce(const realvec_t &a, const real_t b)
     { realvec_t ab = a - b;
       return realvec_t{{ ab.x[0] >= 0 ? ab.x[0] : a.x[0],
@@ -188,12 +219,13 @@ mathfn realvec_t vec_min(const real_t &a, const realvec_t &b)
     static inline realvec_t fname(const realvec_t &a, a1decl a1) \
     { return realvec_t{{ fn(a.x[0], a1), fn(a.x[1], a1), fn(a.x[2], a1), fn(a.x[3], a1) }}; }
 
-DEF_SCALAR_BOOL_OPERATOR(operator >, realvec_t, real_t, >, ||)
-DEF_SCALAR_BOOL_OPERATOR(operator <, realvec_t, real_t, <, ||)
+DEF_SCALAR_BOOL_OPERATOR(operator >=, realvec_t, real_t, >=, &&)
+DEF_SCALAR_BOOL_OPERATOR(operator <=, realvec_t, real_t, <=, &&)
 DEF_FUNCTION(realvec_t, SecDecInternalAbs, realvec_t, std::abs)
 
 mathfn realvec_t SecDecInternalRealPart(const realvec_t &a) { return a; }
 mathfn realvec_t SecDecInternalImagPart(const realvec_t &a) { return REALVEC_ZERO; }
+mathfn realvec_t SecDecInternalSqr(const realvec_t &a) { return a*a; }
 
 mathfn real_t componentsum(const realvec_t &a)
 { return a.x[0] + a.x[1] + a.x[2] + a.x[3]; }
@@ -203,23 +235,111 @@ mathfn real_t componentmin(const realvec_t &a)
   real_t m2 = a.x[2] < a.x[3] ? a.x[2] : a.x[3];
   return m1 < m2 ? m1 : m2; }
 
-mathfn realvec_t clamp01(const realvec_t &a)
-{ return vec_max(vec_min(a, REALVEC_CONST(1)), REALVEC_CONST(0)); }
-
 mathfn realvec_t none_f(const realvec_t &x) { return x; }
 mathfn realvec_t none_w(const realvec_t &x) { return REALVEC_CONST(1); }
 mathfn realvec_t baker_f(const realvec_t &x) { return vec_min(2*x, 2-2*x); }
 mathfn realvec_t baker_w(const realvec_t &x) { return REALVEC_CONST(1); }
-mathfn realvec_t korobov1x1_f(const realvec_t &x) { return x*x*((-2)*x + 3); }
-mathfn realvec_t korobov1x1_w(const realvec_t &x) { return (1 - x)*x*6; }
-mathfn realvec_t korobov2x2_f(const realvec_t &x) { return x*x*x*((6*x - 15)*x + 10); }
-mathfn realvec_t korobov2x2_w(const realvec_t &x) { auto xx = (1 - x)*x; return xx*xx*30; }
-mathfn realvec_t korobov3x3_f(const realvec_t &x) { auto xx = x*x; return xx*xx*((((-20)*x + 70)*x - 84)*x + 35); }
-mathfn realvec_t korobov3x3_w(const realvec_t &x) { auto xx = (1 - x)*x; return xx*xx*xx*140; }
-mathfn realvec_t korobov4x4_f(const realvec_t &x) { auto xx = x*x; return xx*xx*x*((((70*x - 315)*x + 540)*x - 420)*x + 126); }
-mathfn realvec_t korobov4x4_w(const realvec_t &x) { auto xx = (1 - x)*x; auto xx2 = xx*xx; return xx2*xx2*630; }
-mathfn realvec_t korobov5x5_f(const realvec_t &x) { auto x3 = x*x*x; return x3*x3*((((((-252)*x + 1386)*x - 3080)*x + 3465)*x - 1980)*x + 462); }
-mathfn realvec_t korobov5x5_w(const realvec_t &x) { auto xx = (1 - x)*x; auto xx2 = xx*xx; return xx2*xx2*xx*2772; }
+
+mathfn realvec_t korobov1x1_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 6*u;
+}
+mathfn realvec_t korobov1x1_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y2 = SecDecInternalSqr(y);
+    auto h = y2*(3 - 2*y);
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov2x2_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 30*SecDecInternalSqr(u);
+}
+mathfn realvec_t korobov2x2_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y3 = y*SecDecInternalSqr(y);
+    auto h = y3*(10 + y*(-15 + 6*y));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov3x3_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 140*u*SecDecInternalSqr(u);
+}
+mathfn realvec_t korobov3x3_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y4 = SecDecInternalSqr(SecDecInternalSqr(y));
+    auto h = y4*(35 + y*(-84 + (70 - 20*y)*y));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov4x4_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 630*SecDecInternalSqr(SecDecInternalSqr(u));
+}
+mathfn realvec_t korobov4x4_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y5 = y*SecDecInternalSqr(SecDecInternalSqr(y));
+    auto h = y5*(126 + y*(-420 + y*(540 + y*(-315 + 70*y))));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov5x5_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 2772*u*SecDecInternalSqr(SecDecInternalSqr(u));
+}
+mathfn realvec_t korobov5x5_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y6 = SecDecInternalSqr(y*SecDecInternalSqr(y));
+    auto h = y6*(462 + y*(-1980 + y*(3465 + y*(-3080 + (1386 - 252*y)*y))));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov6x6_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 12012*SecDecInternalSqr(u*SecDecInternalSqr(u));
+}
+mathfn realvec_t korobov6x6_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y7 = y*SecDecInternalSqr(y*SecDecInternalSqr(y));
+    auto h = y7*(1716 + y*(-9009 + y*(20020 + y*(-24024 + y*(16380 + y*(-6006 + 924*y))))));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov7x7_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 51480*u*SecDecInternalSqr(u*SecDecInternalSqr(u));
+}
+mathfn realvec_t korobov7x7_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y8 = SecDecInternalSqr(SecDecInternalSqr(SecDecInternalSqr(y)));
+    auto h = y8*(6435 + y*(-40040 + y*(108108 + y*(-163800 + y*(150150 + y*(-83160 + (25740 - 3432*y)*y))))));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov8x8_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 218790*SecDecInternalSqr(SecDecInternalSqr(SecDecInternalSqr(u)));
+}
+mathfn realvec_t korobov8x8_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y9 = y*SecDecInternalSqr(SecDecInternalSqr(SecDecInternalSqr(y)));
+    auto h = y9*(24310 + y*(-175032 + y*(556920 + y*(-1021020 + y*(1178100 + y*(-875160 + y*(408408 + y*(-109395 + 12870*y))))))));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov9x9_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 923780*u*SecDecInternalSqr(SecDecInternalSqr(SecDecInternalSqr(u)));
+}
+mathfn realvec_t korobov9x9_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y10 = SecDecInternalSqr(y*SecDecInternalSqr(SecDecInternalSqr(y)));
+    auto h = y10*(92378 + y*(-755820 + y*(2771340 + y*(-5969040 + y*(8314020 + y*(-7759752 + y*(4849845 + y*(-1956240 + (461890 - 48620*y)*y))))))));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
+mathfn realvec_t korobov10x10_w(const realvec_t &x) {
+    auto u = (1-x)*x;
+    return 3879876*SecDecInternalSqr(u*SecDecInternalSqr(SecDecInternalSqr(u)));
+}
+mathfn realvec_t korobov10x10_f(const realvec_t &x) {
+    auto y = vec_le_choose(x, 0.5, x, 1-x);
+    auto y11 = y*SecDecInternalSqr(y*SecDecInternalSqr(SecDecInternalSqr(y)));
+    auto h = y11*(352716 + y*(-3233230 + y*(13430340 + y*(-33256080 + y*(54318264 + y*(-61108047 + y*(47927880 + y*(-25865840 + y*(9189180 + y*(-1939938 + 184756*y))))))))));
+    return vec_le_choose(x, 0.5, h, 1-h);
+}
 
 // Complex vectors
 
@@ -318,6 +438,9 @@ mathfn real_t SecDecInternalRealPart(const complex_t &a) { return a.real(); }
 mathfn real_t SecDecInternalImagPart(const complex_t &a) { return a.imag(); }
 mathfn realvec_t SecDecInternalRealPart(const complexvec_t &a) { return a.re; }
 mathfn realvec_t SecDecInternalImagPart(const complexvec_t &a) { return a.im; }
+mathfn realvec_t SecDecInternalDenominator(const realvec_t &x) { return REALVEC_CONST(1)/x; }
+mathfn complexvec_t SecDecInternalDenominator(const complexvec_t &x) { return REALVEC_CONST(1)/x; }
+mathfn complexvec_t SecDecInternalSqr(const complexvec_t &a) { return a*a; }
 
 #define DEF_CC_FUNCTION(fname, fn) \
     static inline complexvec_t fname(const complexvec_t &a) { \
diff --git a/pySecDec/code_writer/templates/make_package/distsrc/common_cuda.h b/pySecDec/code_writer/templates/make_package/distsrc/common_cuda.h
index 71c80b94..42d76066 100644
--- a/pySecDec/code_writer/templates/make_package/distsrc/common_cuda.h
+++ b/pySecDec/code_writer/templates/make_package/distsrc/common_cuda.h
@@ -2,12 +2,16 @@
 
 #include <cinttypes>
 #include <cub/block/block_reduce.cuh>
+#include <math_constants.h>
 #include <thrust/complex.h>
 
+#define likely(x) __builtin_expect((x), 1)
+#define unlikely(x) __builtin_expect((x), 0)
 #define mathfn __device__ static inline
 
 typedef double real_t;
 typedef thrust::complex<real_t> complex_t;
+#define REAL_NAN CUDART_NAN
 
 #if SECDEC_RESULT_IS_COMPLEX 
     typedef complex_t result_t;
@@ -29,8 +33,8 @@ typedef thrust::complex<real_t> complex_t;
     { return pow(x, n); }
 #endif
 
-mathfn double sqr(const double x) { return x*x; }
-mathfn complex_t sqr(const complex_t x) { return x*x; }
+mathfn double SecDecInternalSqr(const double x) { return x*x; }
+mathfn complex_t SecDecInternalSqr(const complex_t x) { return x*x; }
 mathfn real_t SecDecInternalRealPart(const real_t x) { return x; }
 mathfn real_t SecDecInternalRealPart(const complex_t x) { return x.real(); }
 mathfn real_t SecDecInternalImagPart(const real_t x) { return 0; }
@@ -40,11 +44,24 @@ mathfn complex_t SecDecInternalI(const complex_t x) { return complex_t{-x.imag()
 
 mathfn real_t exp(int n) { return exp(real_t(n)); }
 
-#define SecDecInternalDenominator(x) (1.0/(x))
-#define i_ (complex_t{0,1})
+template<typename T> mathfn T
+SecDecInternalNPow(const T &a, unsigned n) {
+    T s = T(1.0);
+    T r = a;
+    if (likely(n > 0)) {
+        for (;;) {
+            if (n %% 2) { s = s*r; }
+            n /= 2;
+            if (n == 0) break;
+            r = SecDecInternalSqr(r);
+        }
+    }
+    return s;
+}
 
-#define likely(x) __builtin_expect((x), 1)
-#define unlikely(x) __builtin_expect((x), 0)
+#define SecDecInternalQuo(n, d) (((real_t)(n))/(d))
+#define SecDecInternalDenominator(x) ((real_t)1/(x))
+#define i_ (complex_t{0,1})
 
 mathfn real_t none_f(real_t x) { return x; }
 mathfn real_t none_w(real_t x) { return 1; }
@@ -53,42 +70,42 @@ mathfn real_t baker_w(real_t x) { return 1; }
 mathfn real_t korobov1x1_w(const real_t x) { auto u = (1-x)*x; return 6*u; }
 mathfn real_t korobov1x1_f(const real_t x) {
     auto y = (x <= 0.5) ? x : 1-x;
-    auto y2 = sqr(y);
+    auto y2 = SecDecInternalSqr(y);
     auto h = y2*(3 - 2*y);
     return (x <= 0.5) ? h : 1-h;
 }
-mathfn real_t korobov2x2_w(const real_t x) { auto u = (1-x)*x; return 30*sqr(u); }
+mathfn real_t korobov2x2_w(const real_t x) { auto u = (1-x)*x; return 30*SecDecInternalSqr(u); }
 mathfn real_t korobov2x2_f(const real_t x) {
     auto y = (x <= 0.5) ? x : 1-x;
-    auto y3 = y*sqr(y);
+    auto y3 = y*SecDecInternalSqr(y);
     auto h = y3*(10 + y*(-15 + 6*y));
     return (x <= 0.5) ? h : 1-h;
 }
-mathfn real_t korobov3x3_w(const real_t x) { auto u = (1-x)*x; return 140*u*sqr(u); }
+mathfn real_t korobov3x3_w(const real_t x) { auto u = (1-x)*x; return 140*u*SecDecInternalSqr(u); }
 mathfn real_t korobov3x3_f(const real_t x) {
     auto y = (x <= 0.5) ? x : 1-x;
-    auto y4 = sqr(sqr(y));
+    auto y4 = SecDecInternalSqr(SecDecInternalSqr(y));
     auto h = y4*(35 + y*(-84 + (70 - 20*y)*y));
     return (x <= 0.5) ? h : 1-h;
 }
-mathfn real_t korobov4x4_w(const real_t x) { auto u = (1-x)*x; return 630*sqr(sqr(u)); }
+mathfn real_t korobov4x4_w(const real_t x) { auto u = (1-x)*x; return 630*SecDecInternalSqr(SecDecInternalSqr(u)); }
 mathfn real_t korobov4x4_f(const real_t x) {
     auto y = (x <= 0.5) ? x : 1-x;
-    auto y5 = y*sqr(sqr(y));
+    auto y5 = y*SecDecInternalSqr(SecDecInternalSqr(y));
     auto h = y5*(126 + y*(-420 + y*(540 + y*(-315 + 70*y))));
     return (x <= 0.5) ? h : 1-h;
 }
-mathfn real_t korobov5x5_w(const real_t x) { auto u = (1-x)*x; return 2772*u*sqr(sqr(u)); }
+mathfn real_t korobov5x5_w(const real_t x) { auto u = (1-x)*x; return 2772*u*SecDecInternalSqr(SecDecInternalSqr(u)); }
 mathfn real_t korobov5x5_f(const real_t x) {
     auto y = (x <= 0.5) ? x : 1-x;
-    auto y6 = sqr(y*sqr(y));
+    auto y6 = SecDecInternalSqr(y*SecDecInternalSqr(y));
     auto h = y6*(462 + y*(-1980 + y*(3465 + y*(-3080 + (1386 - 252*y)*y))));
     return (x <= 0.5) ? h : 1-h;
 }
-mathfn real_t korobov6x6_w(const real_t x) { auto u = (1-x)*x; return 12012*sqr(u*sqr(u)); }
+mathfn real_t korobov6x6_w(const real_t x) { auto u = (1-x)*x; return 12012*SecDecInternalSqr(u*SecDecInternalSqr(u)); }
 mathfn real_t korobov6x6_f(const real_t x) {
     auto y = (x <= 0.5) ? x : 1-x;
-    auto y7 = y*sqr(y*sqr(y));
+    auto y7 = y*SecDecInternalSqr(y*SecDecInternalSqr(y));
     auto h = y7*(1716 + y*(-9009 + y*(20020 + y*(-24024 + y*(16380 + y*(-6006 + 924*y))))));
     return (x <= 0.5) ? h : 1-h;
 }
diff --git a/pySecDec/code_writer/templates/make_package/src/SecDecInternalFunctions.hpp b/pySecDec/code_writer/templates/make_package/src/SecDecInternalFunctions.hpp
index 49533bc1..65b0ae8c 100644
--- a/pySecDec/code_writer/templates/make_package/src/SecDecInternalFunctions.hpp
+++ b/pySecDec/code_writer/templates/make_package/src/SecDecInternalFunctions.hpp
@@ -269,6 +269,8 @@ namespace %(name)s
     SecDecFn real_t SecDecInternalImagPart(const complex_t x) { return x.imag(); }
     SecDecFn complex_t SecDecInternalI(const real_t x) { return complex_t{0, x}; }
     SecDecFn complex_t SecDecInternalI(const complex_t x) { return complex_t{-x.imag(), x.real()}; }
+    SecDecFn real_t SecDecInternalSqr(const real_t x) { return x*x; }
+    SecDecFn complex_t SecDecInternalSqr(const complex_t x) { return x*x; }
 
     #undef %(name)s_contour_deformation
     #undef %(name)s_has_complex_parameters
@@ -285,6 +287,8 @@ namespace %(name)s
 #define SecDecInternalLog(x) log(x)
 #define SecDecInternalExp(x) exp(x)
 #define SecDecInternalPow(x, n) pow(x, n)
+#define SecDecInternalNPow(x, n) pow(x, n)
+#define SecDecInternalQuo(n, d) ((real_t)(n)/(d))
 
 #ifdef SECDEC_WITH_CUDA
 #define SecDecInternalAbs(x) thrust::abs(complex_t{x})
@@ -296,15 +300,15 @@ namespace %(name)s
 #define unlikely(x) (x)
 
 #define SecDecInternalOutputDeformationParameters(i, v) output_deformation_parameters[i] = (v);
-#define SecDecInternalSignCheckErrorPositivePolynomial(id) \
-    { \
+#define SecDecInternalSignCheckPositivePolynomial(cond, id) \
+    if (unlikely(cond)) { \
       secdecutil::ResultInfo current_result; \
       current_result.return_value = secdecutil::ResultInfo::ReturnValue::sign_check_error_positive_polynomial; \
       current_result.signCheckId = (id); \
       result_info->fill_if_empty_threadsafe(current_result); \
     };
-#define SecDecInternalSignCheckErrorContourDeformation(id) \
-    { \
+#define SecDecInternalSignCheckContourDeformation(cond, id) \
+    if (unlikely(cond)) { \
       secdecutil::ResultInfo current_result; \
       current_result.return_value = secdecutil::ResultInfo::ReturnValue::sign_check_error_contour_deformation; \
       current_result.signCheckId = (id); \
diff --git a/pySecDec/code_writer/templates/sum_package/Makefile b/pySecDec/code_writer/templates/sum_package/Makefile
index 50a9461f..71820ad4 100644
--- a/pySecDec/code_writer/templates/sum_package/Makefile
+++ b/pySecDec/code_writer/templates/sum_package/Makefile
@@ -83,7 +83,11 @@ endif
 
 # Source generation without compilation
 
+source: $(foreach I,$(INTEGRALS),$I/source)
+source-mma: $(foreach I,$(INTEGRALS),$I/source-mma)
+
 $(foreach I,$(INTEGRALS),$I/source)::
 	$(MAKE) -C $(dir $@) source
 
-source: $(foreach I,$(INTEGRALS),$I/source)
+$(foreach I,$(INTEGRALS),$I/source-mma)::
+	$(MAKE) -C $(dir $@) source-mma
diff --git a/pySecDecContrib/bin/export_sector b/pySecDecContrib/bin/export_sector
index 8c046909..6b1df07d 100755
--- a/pySecDecContrib/bin/export_sector
+++ b/pySecDecContrib/bin/export_sector
@@ -15,6 +15,7 @@
 
 import collections
 import contextlib
+import getopt
 import glob
 import os
 import os.path
@@ -56,6 +57,9 @@ def getintlist(text, separator=","):
 def sed(text, rx, template):
     return re.sub(rx, template, text, flags=re.M)
 
+def grep(text, rx):
+    return re.findall(rx, text, flags=re.M)
+
 def cleanup_code(text):
     """
     Take a code dump from FORM, reformat it into C++, rename
@@ -66,20 +70,30 @@ def cleanup_code(text):
     code = sed(code, r"SecDecInternalAbbreviation\[([0-9]+)\]", r"tmp1_\1")
     code = sed(code, r"SecDecInternalAbbreviations[0-9]+\(([0-9]+)\)", r"tmp1_\1")
     code = sed(code, r"SecDecInternalSecondAbbreviation\[([0-9]+)\]", r"tmp2_\1")
+    code = sed(code, r"SecDecInternalSecondAbbreviation\(([0-9]+)\)", r"tmp2_\1")
     term = "[+-]?[a-zA-Z0-9_*]+"
-    code = sed(code, fr"pow\(({term}),2\)", r"\1*\1")
-    code = sed(code, fr"pow\(({term}),3\)", r"\1*\1*\1")
-    code = sed(code, fr"pow\(({term}),4\)", r"(\1*\1)*(\1*\1)")
-    code = sed(code, r"pow\(", r"SecDecInternalPow(")
-    code = sed(code, r"log\(", r"SecDecInternalLog(")
+    code = sed(code, fr"\bpow\(({term}),2\)", r"SecDecInternalSqr(\1)")
+    code = sed(code, fr"\bpow\(({term}),3\)", r"SecDecInternalSqr(\1)*\1")
+    code = sed(code, fr"\bpow\(({term}),4\)", r"SecDecInternalSqr(SecDecInternalSqr(\1))")
+    code = sed(code, fr"\bpow\(({term}),([0-9]+)\)", r"SecDecInternalNPow(\1,\2)")
+    code = sed(code, r"\bpow\(", r"SecDecInternalPow(")
+    code = sed(code, r"\blog\(", r"SecDecInternalLog(")
+    code = sed(code, r"if *\((.*)\) *SecDecInternalSignCheckError(.*)\(", r"SecDecInternalSignCheck\2(\1,")
+    # Convert i_*expr into SecDecInternalI(expr)
     code = sed(code, fr"= *({term})\*i_\*({term});", r"= SecDecInternalI(\1*\2);")
     code = sed(code, fr"= *({term})\*i_;", r"= SecDecInternalI(\1);")
     code = sed(code, fr"= *([+-]?)i_\*({term});", r"= SecDecInternalI(\1\2);")
     code = sed(code, fr"= *([+-]?)i_;", r"= SecDecInternalI(\g<1>1);")
-    code = sed(code, r"if *\((.*)\) *SecDecInternalSignCheck", r"if (unlikely(\1)) SecDecInternalSignCheck")
+    # Make numbers either integrers or rationals (via SecDecInternalQuo)
     code = sed(code, r"[.]E[+]0([^0-9])", r"\1")
-    code = sed(code, r" *= *", " = ")
-    code = sed(code, r" *[+] *([^0-9])", r" + \1")
+    code = sed(code, r"[.]([^0-9])", r"\1")
+    assert "." not in code
+    assert re.search(r"\b[0-9]*[eE][0-9+-]", code) is None
+    code = sed(code, r"([0-9]+)/([0-9]+)", r"SecDecInternalQuo(\1,\2)")
+    assert "/" not in code
+    # Put some cosmetic spaces back in for prettier formatting
+    code = sed(code, fr"\b *= *\b", r" = ")
+    code = sed(code, r"\b *[+] *\b", r" + ")
     code = sed(code, r" *, *", ", ")
     code = sed(code, r"  *", r" ")
     # Switch to static single assignment form, for variable type
@@ -111,6 +125,23 @@ def cleanup_code(text):
     code = sed(code, r"^result_t (.*SecDecInternal.*Part.*)$", r"real_t \1")
     return code
 
+assert cleanup_code("x=1*2./3.E+0*4.E+0;") == "auto x = 1*SecDecInternalQuo(2, 3)*4;"
+assert cleanup_code("x=1/2*2/3;") == "auto x = SecDecInternalQuo(1, 2)*SecDecInternalQuo(2, 3);"
+assert cleanup_code("x1e2=x+y;x<=1;y+=z;") == "auto x1e2 = x + y;\nx<=1;\ny+=z;"
+
+def code_variables(code):
+    return list(set(grep(code, "^auto ([^ ]+) ")))
+
+def code_to_mma(code):
+    code = sed(code, r"^auto ", "")
+    code = sed(code, r"^return\(", "Return(")
+    code = sed(code, r"\(", "[")
+    code = sed(code, r"\)", "]")
+    code = sed(code, r"_", "$")
+    code = sed(code, r"\bi\b", "I")
+    #code = sed(code, r"([^a-zA-Z0-9_$])\[([^\]]*)\]", r"\1(\2)")
+    return code
+
 def template_writer(template_source, *argnames):
     """
     A templating language: turns each `${code}` into `{code}`,
@@ -422,8 +453,8 @@ DIST_SECTOR_ORDER_CPP = template_writer("""\
 #define SECDEC_RESULT_IS_COMPLEX ${1 if complex else 0}
 #include "common_cpu.h"
 
-#define SecDecInternalSignCheckErrorPositivePolynomial(id) {*presult = nan("U"); return 1; }
-#define SecDecInternalSignCheckErrorContourDeformation(id) {*presult = nan("F"); return 2; }
+#define SecDecInternalSignCheckPositivePolynomial(cond, id) if (unlikely(cond)) {*presult = REAL_NAN; return 1; }
+#define SecDecInternalSignCheckContourDeformation(cond, id) if (unlikely(cond)) {*presult = REAL_NAN; return 2; }
 
 extern "C" int
 ${i.namespace}__sector_${i.sector}_order_${i.order_name}(
@@ -445,7 +476,7 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}(
 @@ for j, v in enumerate(getlist(i.order_deformationParameters)):
     const real_t ${v} = deformp[${j}];
 @@ pass
-    const real_t invlattice = 1.0/lattice;
+    const real_t invlattice = SecDecInternalDenominator((real_t)(double)lattice);
     resultvec_t acc = RESULTVEC_ZERO;
     uint64_t index = index1;
 @@ intvars = getlist(i.order_integrationVariables)
@@ -467,7 +498,7 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}(
 @@ for k in range(1, VECSIZE):
         if (unlikely(index + ${k} >= index2)) w.x[${k}] = 0;
 @@ for j, v in enumerate(intvars):
-        ${v} = clamp01(${i.qmcTransform}_f(${v}));
+        ${v} = ${i.qmcTransform}_f(${v});
 @@ for line in cleanup_code(i.order_integrandBody).splitlines():
         ${line.replace("return(", "acc = acc + w*(")}
 @@ pass
@@ -497,7 +528,7 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}__maxdeformp(
     const complex_t ${v} = complexp[${j}]; (void)${v};
 @@     ndeformp = len(getlist(i.order_deformationParameters))
 @@     deformp_init = ', '.join(['REALVEC_CONST(10.0)']*ndeformp)
-    const real_t invlattice = 1.0/lattice;
+    const real_t invlattice = SecDecInternalDenominator((real_t)(double)lattice);
     realvec_t deformp[${ndeformp}] = { ${deformp_init} };
     uint64_t index = index1;
 @@     intvars = getlist(i.order_integrationVariables)
@@ -512,7 +543,7 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}__maxdeformp(
         realvec_t ${v} = {{ ${li_list} }};
         ${v} = warponce(${v} + shift[${j}], 1);
 @@     for j, v in enumerate(intvars):
-        ${v} = clamp01(${i.qmcTransform}_f(${v}));
+        ${v} = ${i.qmcTransform}_f(${v});
 @@     code = cleanup_code(i.order_optimizeDeformationParametersBody)
 @@     for line in code.splitlines():
         ${line}
@@ -540,9 +571,9 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}__fpolycheck(
 @@     for j, v in enumerate(getlist(i.complexParameters)):
     const complex_t ${v} = complexp[${j}]; (void)${v};
 @@     for j, v in enumerate(getlist(i.order_deformationParameters)):
-    const real_t ${v} = deformp[${j}];
+    const real_t ${v} = deformp[${j}]; (void)${v};
 @@     pass
-    const real_t invlattice = 1.0/lattice;
+    const real_t invlattice = SecDecInternalDenominator((real_t)(double)lattice);
     uint64_t index = index1;
 @@     intvars = getlist(i.order_integrationVariables)
 @@     for j, v in enumerate(intvars):
@@ -556,12 +587,12 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}__fpolycheck(
         realvec_t ${v} = {{ ${li_list} }};
         ${v} = warponce(${v} + shift[${j}], 1);
 @@     for j, v in enumerate(intvars):
-        ${v} = clamp01(${i.qmcTransform}_f(${v}));
+        ${v} = ${i.qmcTransform}_f(${v});
 @@     code = cleanup_code(i.order_contourDeformationPolynomialBody)
 @@     for line in code.splitlines():
         ${line.replace("return(", "auto fpoly_im = SecDecInternalImagPart(")}
 @@     pass
-        if (unlikely(fpoly_im > 0)) return 1;
+        if (unlikely(!(fpoly_im <= 0))) return 1;
     }
     return 0;
 }
@@ -572,8 +603,8 @@ DIST_SECTOR_ORDER_CU = template_writer("""\
 #define SECDEC_RESULT_IS_COMPLEX ${1 if complex else 0}
 #include "common_cuda.h"
 
-#define SecDecInternalSignCheckErrorPositivePolynomial(id) {val = nan("U"); break;}
-#define SecDecInternalSignCheckErrorContourDeformation(id) {val = nan("F"); break;}
+#define SecDecInternalSignCheckPositivePolynomial(cond, id) if (unlikely(cond)) {val = REAL_NAN; break;}
+#define SecDecInternalSignCheckContourDeformation(cond, id) if (unlikely(cond)) {val = REAL_NAN; break;}
 
 extern "C" __global__ void
 ${i.namespace}__sector_${i.sector}_order_${i.order_name}(
@@ -596,10 +627,10 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}(
 @@ for j, v in enumerate(getlist(i.complexParameters)):
     const complex_t ${v} = complexp[${j}]; (void)${v};
 @@ for j, v in enumerate(getlist(i.order_deformationParameters)):
-    const real_t ${v} = deformp[${j}];
+    const real_t ${v} = deformp[${j}]; (void)${v};
 @@ pass
-    const real_t invlattice = 1.0/lattice;
-    result_t val = 0.0;
+    const real_t invlattice = SecDecInternalDenominator((real_t)(double)lattice);
+    result_t val = (result_t)0;
     uint64_t index = index1 + (bid*128 + tid)*8;
 @@ intvars = getlist(i.order_integrationVariables)
 @@ for j, v in enumerate(intvars):
@@ -607,7 +638,7 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}(
 @@ pass
     for (uint64_t i = 0; (i < 8) && (index < index2); i++, index++) {
 @@ for j, v in enumerate(intvars):
-        real_t ${v} = warponce(li_${v}*invlattice + shift[${j}], 1.0);
+        real_t ${v} = warponce(invlattice*(double)li_${v} + shift[${j}], 1.0);
         li_${v} = warponce_i(li_${v} + genvec[${j}], lattice);
 @@ for j, v in enumerate(intvars):
         real_t w_${v} = ${i.qmcTransform}_w(${v});
@@ -616,7 +647,7 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}(
 @@ for j, v in enumerate(intvars):
         ${v} = ${i.qmcTransform}_f(${v});
 @@ for line in cleanup_code(i.order_integrandBody).splitlines():
-        ${line.replace("return(", "val += w*(")}
+        ${line.replace("return(", "val = val + w*(")}
 @@ pass
     }
     // Sum up 128*8=1024 values across 4 warps.
@@ -627,36 +658,127 @@ ${i.namespace}__sector_${i.sector}_order_${i.order_name}(
 }
 """, "i")
 
+SECTOR_ORDER_MMA = template_writer("""\
+SecDecInternalQuo[n_,d_] := n/d;
+SecDecInternalI[ex_] := I * ex;
+SecDecInternalRealPart[ex_] := Re[ex];
+SecDecInternalImagPart[ex_] := Im[ex];
+SecDecInternalAbs[ex_] := Abs[ex];
+SecDecInternalDenominator[ex_] := 1/ex;
+SecDecInternalNPow[ex_, n_] := ex^n;
+SecDecInternalLog[ex_] := Conjugate[Log[Conjugate[ex]]];
+SecDecInternalPow[ex_, n_] := Conjugate[Power[Conjugate[ex],n]];
+SecDecInternalSignCheckPositivePolynomial[cond_, id_] If[cond, Throw[$PositivePolynomial[id]]];
+SecDecInternalSignCheckContourDeformation[cond_, id_] If[cond, Throw[$ContourDeformation[id]]];
+SecDecInternalSqr[ex_] := ex*ex;
+
+${i.namespace.replace("_", "$")}$sector$${i.sector}$order$${i.order_name}Integrand[
+    {${", ".join([x + "_" for x in getlist(i.order_integrationVariables)])}, ___},
+    {${", ".join([x + "_" for x in getlist(i.realParameters) + getlist(i.complexParameters)])}},
+    {${", ".join([x + "_" for x in getlist(i.order_deformationParameters)])}}
+] :=
+@@ code = cleanup_code(i.order_integrandBody).replace("_", "$")
+@@ varnames = code_variables(code)
+Module[{${", ".join(varnames)}},
+@@ for line in code_to_mma(code).splitlines():
+    ${line}
+@@ pass
+];
+
+${i.namespace.replace("_", "$")}$sector$${i.sector}$order$${i.order_name}MaxDeformP[
+    {${", ".join([x + "_" for x in getlist(i.order_integrationVariables)])}, ___},
+    {${", ".join([x + "_" for x in getlist(i.realParameters) + getlist(i.complexParameters)])}}
+@@ if int(i.contourDeformation):
+] :=
+@@     code = cleanup_code(i.order_optimizeDeformationParametersBody).replace("_", "$")
+@@     varnames = code_variables(code)
+Module[{${", ".join(varnames)}, SecDecInternalOutputDeformationParameters, deformp},
+    deformp = Table[0, ${len(getlist(i.order_deformationParameters))}];
+    SecDecInternalOutputDeformationParameters[i_, v_] := (deformp[[i+1]] = v);
+@@     for line in code_to_mma(code).splitlines():
+    ${line}
+@@     pass
+    deformp
+];
+@@ else:
+] := {}
+@@ pass
+
+${i.namespace.replace("_", "$")}$sector$${i.sector}$order$${i.order_name}F[
+    {${", ".join([x + "_" for x in getlist(i.order_integrationVariables)])}, ___},
+    {${", ".join([x + "_" for x in getlist(i.realParameters) + getlist(i.complexParameters)])}},
+    {${", ".join([x + "_" for x in getlist(i.order_deformationParameters)])}}
+@@ if int(i.contourDeformation):
+] :=
+@@     code = cleanup_code(i.order_contourDeformationPolynomialBody).replace("_", "$")
+@@     varnames = code_variables(code)
+Module[{${", ".join(varnames)}},
+@@     for line in code_to_mma(code).splitlines():
+    ${line}
+@@     pass
+];
+@@ else:
+] := None
+@@ pass
+
+${i.namespace.replace("_", "$")}$sector$${i.sector}$order$${i.order_name}DeformOK[
+    integrationVariables_List,
+    parameters_List,
+    deformationParameters_List
+@@ if int(i.contourDeformation):
+] :=
+  SecDecInternalImagPart[
+    ${i.namespace.replace("_", "$")}$sector$${i.sector}$order$${i.order_name}F[
+      integrationVariables, parameters, deformationParameters]] <= 0
+@@ else:
+] := True
+@@ pass
+""", "i")
 
 if __name__ == "__main__":
 
-    if len(sys.argv) != 3:
-        print("usage: ${sys.argv[0]} sector.info destination-dir")
+    subset = ["cpp", "disteval"]
+    optlist, arglist = getopt.gnu_getopt(sys.argv[1:], "", ["subset="])
+    for opt, val in optlist:
+        if opt == "--subset": subset = val.split(",")
+    if len(arglist) != 2:
+        print("usage: ${sys.argv[0]} [--subset=cpp,disteval,mma] sector.info destination-dir")
         exit(1)
-
-    sectorfile = sys.argv[1]
-    dstdir = sys.argv[2]
+    sectorfile = arglist[0]
+    dstdir = arglist[1]
 
     info = load_info(sectorfile)
 
-    fname = os.path.join(dstdir, "src/sector_" + info["sector"] + ".cpp")
-    with open(fname, "w") as f:
-        SECTOR_CPP(f, DictionaryWrapper(info))
+    if "cpp" in subset:
+        fname = os.path.join(dstdir, "src/sector_" + info["sector"] + ".cpp")
+        with open(fname, "w") as f:
+            SECTOR_CPP(f, DictionaryWrapper(info))
+    if "mma" in subset:
+        os.makedirs("mma", exist_ok=True)
 
     for oidx in range(1, int(info["numOrders"]) + 1):
         so = "sector_" + info["sector"] + "_" + info[f"order{oidx}_name"]
-        files = {
-            f"distsrc/{so}.cpp": DIST_SECTOR_ORDER_CPP,
-            f"distsrc/{so}.cu": DIST_SECTOR_ORDER_CU,
-            f"src/{so}.cpp": SECTOR_ORDER_CPP,
-            f"src/{so}.hpp": SECTOR_ORDER_HPP,
-        }
-        if int(info["contourDeformation"]):
+        files = {}
+        if "cpp" in subset:
+            files.update({
+                f"src/{so}.cpp": SECTOR_ORDER_CPP,
+                f"src/{so}.hpp": SECTOR_ORDER_HPP,
+            })
+            if int(info["contourDeformation"]):
+                files.update({
+                    f"src/contour_deformation_{so}.cpp": CONTOUR_DEFORMATION_SECTOR_ORDER_CPP,
+                    f"src/contour_deformation_{so}.hpp": CONTOUR_DEFORMATION_SECTOR_ORDER_HPP,
+                    f"src/optimize_deformation_parameters_{so}.cpp": OPTIMIZE_DEFORMATION_PARAMETERS_SECTOR_ORDER_CPP,
+                    f"src/optimize_deformation_parameters_{so}.hpp": OPTIMIZE_DEFORMATION_PARAMETERS_SECTOR_ORDER_HPP,
+                })
+        if "disteval" in subset:
+            files.update({
+                f"distsrc/{so}.cpp": DIST_SECTOR_ORDER_CPP,
+                f"distsrc/{so}.cu": DIST_SECTOR_ORDER_CU,
+            })
+        if "mma" in subset:
             files.update({
-                f"src/contour_deformation_{so}.cpp": CONTOUR_DEFORMATION_SECTOR_ORDER_CPP,
-                f"src/contour_deformation_{so}.hpp": CONTOUR_DEFORMATION_SECTOR_ORDER_HPP,
-                f"src/optimize_deformation_parameters_{so}.cpp": OPTIMIZE_DEFORMATION_PARAMETERS_SECTOR_ORDER_CPP,
-                f"src/optimize_deformation_parameters_{so}.hpp": OPTIMIZE_DEFORMATION_PARAMETERS_SECTOR_ORDER_HPP,
+                f"mma/{so}.m": SECTOR_ORDER_MMA,
             })
         # Mungle the key names so that templates could access
         # the current order keys as `order_xxx`, instead of
diff --git a/pySecDecContrib/lib/write_contour_deformation.frm b/pySecDecContrib/lib/write_contour_deformation.frm
index 3d2f6f10..c085ffe2 100644
--- a/pySecDecContrib/lib/write_contour_deformation.frm
+++ b/pySecDecContrib/lib/write_contour_deformation.frm
@@ -156,7 +156,7 @@
 
   #optimize expressionF
 
-  Format float 20;
+  Format rational;
   Format C;
   Format 255;
 
@@ -264,7 +264,7 @@
   .sort
 
 * write the optimization symbols
-  Format float 20;
+  Format rational;
   Format C;
   Format 255;
   #write <sector`sectorID'.info> "%O"
@@ -280,10 +280,10 @@
       Format C;
       Format 255;
       #write <sector`sectorID'.info> "SecDecInternalOutputDeformationParameters(`$cppidx',"
-      Format float 20;
+      Format rational;
       Format C;
       Format 255;
-      #write <sector`sectorID'.info> "1.0/SecDecInternalAbs(SecDecInternalRealPart(%E)));" expr(#@FAIL@#)
+      #write <sector`sectorID'.info> "SecDecInternalDenominator(SecDecInternalAbs(SecDecInternalRealPart(%E))));" expr(#@FAIL@#)
     #EndIf
   #EndDo
 
diff --git a/pySecDecContrib/lib/write_integrand.frm b/pySecDecContrib/lib/write_integrand.frm
index 9282d590..006c5234 100644
--- a/pySecDecContrib/lib/write_integrand.frm
+++ b/pySecDecContrib/lib/write_integrand.frm
@@ -891,7 +891,7 @@ Format 255;
   #EndIf
 
 * define the abbreviations in c
-  Format float 20;
+  Format rational;
   Format C;
   Format 255;
 
@@ -1037,7 +1037,7 @@ Format 255;
 *   optimize and write to c file
 *   {
     skip unparsed;
-    Format float 20;
+    Format rational;
     Format C;
     Format O`optimizationLevel';
     Format 255;
@@ -1085,7 +1085,7 @@ Format 255;
           #EndIf
           #write <sector`sectorID'.info> "%E);" exponent(#@FAIL@#)
           Format C;
-          Format float 20;
+          Format rational;
           Format 255;
           drop exponent;
         #Else
@@ -1138,7 +1138,7 @@ Format 255;
     #EndDo
   #EndIf
 
-  Format float 20;
+  Format rational;
   Format C;
   Format O`optimizationLevel';
   Format 255;
@@ -1172,7 +1172,7 @@ Format 255;
 
       #If termsin(expr) > 0
         #write <sector`sectorID'.info> "SecDecInternalSignCheckExpression = SecDecInternalImagPart(%E);" expr(#@FAIL@#)
-        #write <sector`sectorID'.info> "if (SecDecInternalSignCheckExpression > 0) SecDecInternalSignCheckErrorContourDeformation(`signCheckId');"
+        #write <sector`sectorID'.info> "if (!(SecDecInternalSignCheckExpression <= 0)) SecDecInternalSignCheckErrorContourDeformation(`signCheckId');"
       #EndIf
 
     #EndDo
@@ -1190,7 +1190,7 @@ Format 255;
 
       #If termsin(expr) > 0
         #write <sector`sectorID'.info> "SecDecInternalSignCheckExpression = SecDecInternalRealPart(%E);" expr(#@FAIL@#)
-        #write <sector`sectorID'.info> "if (SecDecInternalSignCheckExpression < 0)  SecDecInternalSignCheckErrorPositivePolynomial(`signCheckId');"
+        #write <sector`sectorID'.info> "if (!(SecDecInternalSignCheckExpression >= 0)) SecDecInternalSignCheckErrorPositivePolynomial(`signCheckId');"
       #EndIf
 
     #EndDo