From b981be01012859e3d61e134932d8dd6cdfedc9a2 Mon Sep 17 00:00:00 2001 From: mloubout Date: Sat, 18 Jan 2025 18:16:03 -0500 Subject: [PATCH] compiler: fix missing algorithm include for min/max --- devito/operator/operator.py | 5 +++-- devito/passes/iet/languages/CXX.py | 1 + devito/passes/iet/misc.py | 31 ++++++++++++++++-------------- tests/test_gpu_openacc.py | 13 ++++++++++++- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 3966da4378..a5fef9b84f 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -470,6 +470,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs): * Finalize (e.g., symbol definitions, array casts) """ name = kwargs.get("name", "Kernel") + lang = cls._Target.lang() # Wrap the IET with an EntryFunction (a special Callable representing # the entry point of the generated library) @@ -488,10 +489,10 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs): cls._Target.instrument(graph, profiler=profiler, **kwargs) # Extract the necessary macros from the symbolic objects - generate_macros(graph, **kwargs) + generate_macros(graph, lang=lang, **kwargs) # Add type specific metadata - lower_dtypes(graph, lang=cls._Target.lang(), **kwargs) + lower_dtypes(graph, lang=lang, **kwargs) # Target-independent optimizations minimize_symbols(graph) diff --git a/devito/passes/iet/languages/CXX.py b/devito/passes/iet/languages/CXX.py index 17003c0d8f..bfa6bebe35 100644 --- a/devito/passes/iet/languages/CXX.py +++ b/devito/passes/iet/languages/CXX.py @@ -45,6 +45,7 @@ class CXXBB(LangBB): mapper = { 'header-memcpy': 'string.h', + 'header-algorithm': 'algorithm', 'host-alloc': lambda i, j, k: Call('posix_memalign', (i, j, k)), 'host-alloc-pin': lambda i, j, k: diff --git a/devito/passes/iet/misc.py b/devito/passes/iet/misc.py index 28e1cc4f7b..d978936053 100644 --- a/devito/passes/iet/misc.py +++ b/devito/passes/iet/misc.py @@ -144,7 +144,7 @@ def generate_macros(graph, **kwargs): @iet_pass -def _generate_macros(iet, tracker=None, **kwargs): +def _generate_macros(iet, tracker=None, lang=None, **kwargs): # Derive the Macros necessary for the FIndexeds iet = _generate_macros_findexeds(iet, tracker=tracker, **kwargs) @@ -152,7 +152,8 @@ def _generate_macros(iet, tracker=None, **kwargs): headers = sorted((ccode(define), ccode(expr)) for define, expr in headers) # Generate Macros from higher-level SymPy objects - headers.extend(_generate_macros_math(iet)) + mheaders, includes = _generate_macros_math(iet, lang=lang) + headers.extend(mheaders) # Remove redundancies while preserving the order headers = filter_ordered(headers) @@ -160,7 +161,6 @@ def _generate_macros(iet, tracker=None, **kwargs): # Some special Symbols may represent Macros defined in standard libraries, # so we need to include the respective includes limits = FindApplications(ValueLimit).visit(iet) - includes = set() if limits & (set(limits_mapper[np.int32]) | set(limits_mapper[np.int64])): includes.add('limits.h') elif limits & (set(limits_mapper[np.float32]) | set(limits_mapper[np.float64])): @@ -195,35 +195,38 @@ def _generate_macros_findexeds(iet, sregistry=None, tracker=None, **kwargs): return iet -def _generate_macros_math(iet): +def _generate_macros_math(iet, lang=None): headers = [] + includes = [] for i in FindApplications().visit(iet): - headers.extend(_lower_macro_math(i)) + header, include = _lower_macro_math(i, lang) + headers.extend(header) + includes.extend(include) - return headers + return headers, set(includes) - {None} @singledispatch -def _lower_macro_math(expr): - return () +def _lower_macro_math(expr, lang): + return (), {} @_lower_macro_math.register(Min) @_lower_macro_math.register(sympy.Min) -def _(expr): +def _(expr, lang): if has_integer_args(*expr.args) and len(expr.args) == 2: - return (('MIN(a,b)', ('(((a) < (b)) ? (a) : (b))')),) + return (('MIN(a,b)', ('(((a) < (b)) ? (a) : (b))')),), {} else: - return () + return (), (lang.get('header-algorithm'),) @_lower_macro_math.register(Max) @_lower_macro_math.register(sympy.Max) -def _(expr): +def _(expr, lang): if has_integer_args(*expr.args) and len(expr.args) == 2: - return (('MAX(a,b)', ('(((a) > (b)) ? (a) : (b))')),) + return (('MAX(a,b)', ('(((a) > (b)) ? (a) : (b))')),), {} else: - return () + return (), (lang.get('header-algorithm'),) @_lower_macro_math.register(SafeInv) diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py index bdf732a12d..8c4813db0b 100644 --- a/tests/test_gpu_openacc.py +++ b/tests/test_gpu_openacc.py @@ -2,7 +2,7 @@ import numpy as np from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Eq, Operator, - norm, solve) + norm, solve, Max) from conftest import skipif, assert_blocking, opts_device_tiling from devito.data import LEFT from devito.exceptions import InvalidOperator @@ -171,6 +171,17 @@ def test_multi_tile_blocking_structure(self): assert len(iters) == len(v) assert all(i.step == j for i, j in zip(iters, v)) + def test_std_max(self): + grid = Grid(shape=(3, 3, 3)) + x, y, z = grid.dimensions + + u = Function(name='u', grid=grid) + + op = Operator(Eq(u, Max(1.2 * x / y, 2.3 * y / x)), + platform='nvidiaX', language='openacc') + + assert '' in str(op) + class TestOperator: