KhronosGroup · shajder · Nov 22, 2024
diff --git a/test_conformance/math_brute_force/binary_operator_half.cpp b/test_conformance/math_brute_force/binary_operator_half.cpp
@@ -102,6 +102,42 @@ const cl_half specialValuesHalf[] = {
 
 constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
 
+cl_float ComputeRefDouble(cl_float p0, cl_float p1, void *data)
+{
+    TestInfo *job = (TestInfo *)data;
+    fptr func = job->f->func;
+    return func.f_ff(p0, p1);
+}
+
+cl_float ComputeRefSingle(cl_float p0, cl_float p1, void *data)
+{
+    TestInfo *job = (TestInfo *)data;
+    fptr func = job->f->hfunc;
+    return func.f_ff_f(p0, p1);
+}
+
+struct CorrectRoundScopeGuard
+{
+    CorrectRoundScopeGuard(int ftz): mFtz(ftz)
+    {
+        memset(&mOldMode, 0, sizeof(mOldMode));
+        if (ftz) ForceFTZ(&mOldMode);
+
+        // Set the rounding mode to match the device
+        if (gIsInRTZMode) mOldRoundMode = set_round(kRoundTowardZero, kfloat);
+    }
+
+    ~CorrectRoundScopeGuard()
+    {
+        if (mFtz) RestoreFPState(&mOldMode);
+        if (gIsInRTZMode) (void)set_round(mOldRoundMode, kfloat);
+    }
+
+    int mFtz;
+    FPU_mode_type mOldMode;
+    RoundingMode mOldRoundMode = kRoundToNearestEven;
+};
+
 cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
 {
     TestInfo *job = (TestInfo *)data;
@@ -110,19 +146,20 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     cl_uint base = job_id * (cl_uint)job->step;
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
-    fptr func = job->f->func;
     int ftz = job->ftz;
     MTdata d = tinfo->d;
     cl_int error;
 
     const char *name = job->f->name;
     cl_half *r = 0;
     std::vector<float> s(0), s2(0);
-    RoundingMode oldRoundMode;
 
     cl_event e[VECTOR_SIZE_COUNT];
     cl_half *out[VECTOR_SIZE_COUNT];
 
+    auto ref_fnptr = &ComputeRefDouble;
+    if (0 == strcmp(job->f->name, "divide")) ref_fnptr = &ComputeRefSingle;
+
     if (gHostFill)
     {
         // start the map of the output arrays
@@ -270,13 +307,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     }
 
     // Calculate the correctly rounded reference result
-    FPU_mode_type oldMode;
-    memset(&oldMode, 0, sizeof(oldMode));
-    if (ftz) ForceFTZ(&oldMode);
-
-    // Set the rounding mode to match the device
-    oldRoundMode = kRoundToNearestEven;
-    if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
+    CorrectRoundScopeGuard crsg(ftz);
 
     // Calculate the correctly rounded reference result
     r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
@@ -287,11 +318,9 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     {
         s[j] = HTF(p[j]);
         s2[j] = HTF(p2[j]);
-        r[j] = HFF(func.f_ff(s[j], s2[j]));
+        r[j] = HFF(ref_fnptr(s[j], s2[j], data));
     }
 
-    if (ftz) RestoreFPState(&oldMode);
-
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -320,7 +349,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             if (r[j] != q[j])
             {
                 float test = HTF(q[j]);
-                float correct = func.f_ff(s[j], s2[j]);
+                float correct = HTF(r[j]);
 
                 // Per section 10 paragraph 6, accept any result if an input or
                 // output is a infinity or NaN or overflow
@@ -353,8 +382,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
                         double correct2, correct3;
                         float err2, err3;
 
-                        correct2 = func.f_ff(0.0, s2[j]);
-                        correct3 = func.f_ff(-0.0, s2[j]);
+                        correct2 = ref_fnptr(0.0, s2[j], data);
+                        correct3 = ref_fnptr(-0.0, s2[j], data);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow
@@ -393,10 +422,10 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
                             double correct4, correct5;
                             float err4, err5;
 
-                            correct2 = func.f_ff(0.0, 0.0);
-                            correct3 = func.f_ff(-0.0, 0.0);
-                            correct4 = func.f_ff(0.0, -0.0);
-                            correct5 = func.f_ff(-0.0, -0.0);
+                            correct2 = ref_fnptr(0.0, 0.0, data);
+                            correct3 = ref_fnptr(-0.0, 0.0, data);
+                            correct4 = ref_fnptr(0.0, -0.0, data);
+                            correct5 = ref_fnptr(-0.0, -0.0, data);
 
                             // Per section 10 paragraph 6, accept any result if
                             // an input or output is a infinity or NaN or
@@ -446,9 +475,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
                         double correct2, correct3;
                         float err2, err3;
 
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
-
+                        correct2 = ref_fnptr(s[j], 0.0, data);
+                        correct3 = ref_fnptr(s[j], -0.0, data);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow
@@ -499,8 +527,6 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
         }
     }
 
-    if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],

diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
@@ -32,28 +32,35 @@
 #define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type)            \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
-            RELAXED_OFF, _type                                                 \
+            { NULL }, _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY,          \
+            INFINITY, _rmode, RELAXED_OFF, _type                               \
     }
 #define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
                   _type, _relaxed_embedded_ulp)                                \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, _relaxed_ulp,                \
+            { NULL }, _ulp, _ulp, _half_ulp, _embedded_ulp, _relaxed_ulp,      \
             _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type                   \
     }
 #define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
     {                                                                          \
         "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL },          \
-            { NULL }, { NULL }, _ulp, _ulp, _ulp, _embedded_ulp, INFINITY,     \
-            INFINITY, _rmode, RELAXED_OFF, _type                               \
+            { NULL }, { NULL }, { NULL }, _ulp, _ulp, _ulp, _embedded_ulp,     \
+            INFINITY, INFINITY, _rmode, RELAXED_OFF, _type                     \
     }
 #define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp,       \
                        _rmode, _type)                                          \
     {                                                                          \
-        STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
-            _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
-            _type                                                              \
+        STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, { NULL },   \
+            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
+            RELAXED_OFF, _type                                                 \
+    }
+#define OPERATOR_ENTRY_EXT(_name, _operator, _ulp, _embedded_ulp, _half_ulp,   \
+                           _rmode, _type)                                      \
+    {                                                                          \
+        STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, { NULL },   \
+            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
+            RELAXED_OFF, _type                                                 \
     }
 
 #define unaryF NULL
@@ -81,37 +88,48 @@
 #define reference_divide NULL
 #define reference_dividel NULL
 #define reference_relaxed_divide NULL
+#define reference_sqrtf NULL
+#define reference_dividef NULL
 
 #else // FUNCTION_LIST_ULPS_ONLY
 
 #define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type)            \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
             { (void*)reference_##_name##l }, { (void*)reference_##_name },     \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
-            RELAXED_OFF, _type                                                 \
+            { (void*)reference_##_name }, _ulp, _ulp, _half_ulp,               \
+            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
     }
 #define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
                   _type, _relaxed_embedded_ulp)                                \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
             { (void*)reference_##_name##l },                                   \
-            { (void*)reference_##relaxed_##_name }, _ulp, _ulp, _half_ulp,     \
-            _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp, _rmode,        \
-            RELAXED_ON, _type                                                  \
+            { (void*)reference_##relaxed_##_name }, { NULL }, _ulp, _ulp,      \
+            _half_ulp, _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp,     \
+            _rmode, RELAXED_ON, _type                                          \
     }
 #define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
     {                                                                          \
         "half_" STRINGIFY(_name), "half_" STRINGIFY(_name),                    \
-            { (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp,      \
-            _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF,      \
-            _type                                                              \
+            { (void*)reference_##_name }, { NULL }, { NULL }, { NULL }, _ulp,  \
+            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
+            RELAXED_OFF, _type                                                 \
     }
 #define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp,       \
                        _rmode, _type)                                          \
     {                                                                          \
         STRINGIFY(_name), _operator, { (void*)reference_##_name },             \
-            { (void*)reference_##_name##l }, { NULL }, _ulp, _ulp, _half_ulp,  \
+            { (void*)reference_##_name##l }, { NULL }, { NULL }, _ulp, _ulp,   \
+            _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
+            _type                                                              \
+    }
+#define OPERATOR_ENTRY_EXT(_name, _operator, _ulp, _embedded_ulp, _half_ulp,   \
+                           _rmode, _type)                                      \
+    {                                                                          \
+        STRINGIFY(_name), _operator, { (void*)reference_##_name },             \
+            { (void*)reference_##_name##l }, { NULL },                         \
+            { (void*)reference_##_name##f }, _ulp, _ulp, _half_ulp,            \
             _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
     }
 
@@ -266,6 +284,7 @@ const Func functionList[] = {
       { (void*)reference_copysignf },
       { (void*)reference_copysign },
       { (void*)reference_copysignf },
+      { (void*)reference_copysignf },
       0.0f,
       0.0f,
       0.0f,
@@ -367,6 +386,7 @@ const Func functionList[] = {
       { (void*)reference_sqrt },
       { (void*)reference_sqrtl },
       { NULL },
+      { (void*)reference_sqrtf },
       3.0f,
       0.0f,
       0.0f,
@@ -380,7 +400,8 @@ const Func functionList[] = {
       "sqrt",
       { (void*)reference_sqrt },
       { nullptr },
-      { NULL },
+      { nullptr },
+      { nullptr },
       0.0f,
       INFINITY,
       INFINITY,
@@ -423,6 +444,7 @@ const Func functionList[] = {
       { (void*)reference_divide },
       { (void*)reference_dividel },
       { (void*)reference_relaxed_divide },
+      { (void*)reference_dividef },
       2.5f,
       0.0f,
       0.0f,
@@ -437,6 +459,7 @@ const Func functionList[] = {
       { (void*)reference_divide },
       { nullptr },
       { nullptr },
+      { nullptr },
       0.0f,
       INFINITY,
       INFINITY,

diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
@@ -33,6 +33,7 @@
 union fptr {
     void *p;
     double (*f_f)(double);
+    float (*f_f_f)(float);
     double (*f_u)(cl_uint);
     int (*i_f)(double);
     int (*i_f_f)(float);
@@ -84,6 +85,7 @@ struct Func
     fptr func;
     dptr dfunc;
     fptr rfunc;
+    fptr hfunc;
     float float_ulps;
     float double_ulps;
     float half_ulps;

diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
@@ -5771,4 +5771,7 @@ long double reference_erfcl(long double x) { return erfc(x); }
 long double reference_erfl(long double x) { return erf(x); }
 
 double reference_erfc(double x) { return erfc(x); }
-double reference_erf(double x) { return erf(x); }
+double reference_erf(double x) { return erf(x); }
+
+float reference_sqrtf(float x) { return sqrtf(x); }
+float reference_dividef(float x, float y) { return x / y; }
diff --git a/test_conformance/math_brute_force/reference_math.h b/test_conformance/math_brute_force/reference_math.h
@@ -240,4 +240,7 @@ long double reference_erfcl(long double x);
 long double reference_erfl(long double x);
 double reference_erfc(double x);
 double reference_erf(double x);
+
+float reference_sqrtf(float x);
+float reference_dividef(float x, float y);
 #endif