diff --git a/loki/transformations/tests/test_array_indexing.py b/loki/transformations/tests/test_array_indexing.py index bef668ff8..9abf12a68 100644 --- a/loki/transformations/tests/test_array_indexing.py +++ b/loki/transformations/tests/test_array_indexing.py @@ -22,7 +22,9 @@ LowerConstantArrayIndices, remove_explicit_array_dimensions, add_explicit_array_dimensions ) -from loki.transformations.transpile import FortranCTransformation +from loki.transformations.transpile import ( + FortranCTransformation, FortranISOCWrapperTransformation +) @pytest.fixture(scope='function', name='builder') @@ -546,8 +548,13 @@ def validate_routine(routine): f2c_routine = Subroutine.from_source(fcode, frontend=frontend) f2c = FortranCTransformation() f2c.apply(source=f2c_routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation() + f2cwrap.apply(source=f2c_routine, path=tmp_path) libname = f'fc_{f2c_routine.name}_{start_index}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [tmp_path/f'{f2c_routine.name}_fc.F90', tmp_path/f'{f2c_routine.name}_c.c'], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.transf_flatten_arr_fc_mod.transf_flatten_arr_fc f2c_x1, f2c_x2, f2c_x3, f2c_x4 = init_arguments(l1, l2, l3, l4, flattened=True) fc_function(f2c_x1, f2c_x2, f2c_x3, f2c_x4, l1, l2, l3, l4) diff --git a/loki/transformations/transpile/__init__.py b/loki/transformations/transpile/__init__.py index b70ac3348..359e638cb 100644 --- a/loki/transformations/transpile/__init__.py +++ b/loki/transformations/transpile/__init__.py @@ -6,4 +6,5 @@ # nor does it submit to any jurisdiction. from loki.transformations.transpile.fortran_c import * # noqa +from loki.transformations.transpile.fortran_iso_c_wrapper import * # noqa from loki.transformations.transpile.fortran_python import * # noqa diff --git a/loki/transformations/transpile/fortran_c.py b/loki/transformations/transpile/fortran_c.py index 69f1396d0..3711dbbeb 100644 --- a/loki/transformations/transpile/fortran_c.py +++ b/loki/transformations/transpile/fortran_c.py @@ -6,43 +6,41 @@ # nor does it submit to any jurisdiction. from pathlib import Path -from collections import OrderedDict -from loki.backend import cgen, fgen, cudagen, cppgen +from loki.backend import cgen, cudagen, cppgen from loki.batch import Transformation from loki.expression import ( - symbols as sym, Variable, InlineCall, RangeIndex, Scalar, Array, + symbols as sym, Variable, InlineCall, Scalar, Array, ProcedureSymbol, Dereference, Reference, ExpressionRetriever, SubstituteExpressionsMapper ) from loki.ir import ( - Section, Import, Intrinsic, Interface, CallStatement, - VariableDeclaration, TypeDef, Assignment, Transformer, FindNodes, - Pragma, Comment, SubstituteExpressions, FindInlineCalls + Import, Intrinsic, Interface, CallStatement, Assignment, + Transformer, FindNodes, Comment, SubstituteExpressions, + FindInlineCalls ) from loki.logging import debug -from loki.module import Module from loki.sourcefile import Sourcefile -from loki.subroutine import Subroutine from loki.tools import as_tuple, flatten -from loki.types import BasicType, DerivedType, SymbolAttributes +from loki.types import BasicType, DerivedType from loki.transformations.array_indexing import ( shift_to_zero_indexing, invert_array_indices, resolve_vector_notation, normalize_array_shape_and_access, flatten_arrays ) -from loki.transformations.utilities import ( - convert_to_lower_case, replace_intrinsics, sanitise_imports -) -from loki.transformations.sanitise import do_resolve_associates from loki.transformations.inline import ( inline_constant_parameters, inline_elemental_functions ) +from loki.transformations.sanitise import do_resolve_associates +from loki.transformations.utilities import ( + convert_to_lower_case, replace_intrinsics, sanitise_imports +) __all__ = ['FortranCTransformation'] + class DeReferenceTrafo(Transformer): """ Transformation to apply/insert Dereference = `*` and @@ -96,30 +94,22 @@ def visit_CallStatement(self, o, **kwargs): class FortranCTransformation(Transformation): """ - Fortran-to-C transformation that translates the given routine - into C and generates the corresponding ISO-C wrappers. + Fortran-to-C transformation that translates the given routine into C. Parameters ---------- inline_elementals : bool, optional Inline known elemental function via expression substitution. Default is ``True``. - use_c_ptr : bool, optional - Use ``c_ptr`` for array declarations in the F2C wrapper and ``c_loc(...)`` to pass - the corresponding argument. Default is ``False``. - codegen : - Wrapper function calling the Stringifier instance. - path : str, optional - Path to generate C sources. + language : str + C-style language to generate; should be one of ``['c', 'cpp', 'cuda']``. """ # pylint: disable=unused-argument # Set of standard module names that have no C equivalent __fortran_intrinsic_modules = ['ISO_FORTRAN_ENV', 'ISO_C_BINDING'] - def __init__(self, inline_elementals=True, use_c_ptr=False, path=None, language='c'): + def __init__(self, inline_elementals=True, language='c'): self.inline_elementals = inline_elementals - self.use_c_ptr = use_c_ptr - self.path = Path(path) if path is not None else None self.language = language.lower() self._supported_languages = ['c', 'cpp', 'cuda'] @@ -133,40 +123,18 @@ def __init__(self, inline_elementals=True, use_c_ptr=False, path=None, language= raise ValueError(f'language "{self.language}" is not supported!' f' (supported languages: "{self._supported_languages}")') - # Maps from original type name to ISO-C and C-struct types - self.c_structs = OrderedDict() - def file_suffix(self): if self.language == 'cpp': return '.cpp' return '.c' - def transform_module(self, module, **kwargs): - if self.path is None: - path = Path(kwargs.get('path')) - else: - path = self.path - role = kwargs.get('role', 'kernel') - - for name, td in module.typedef_map.items(): - self.c_structs[name.lower()] = self.c_struct_typedef(td) - - if role == 'header': - # Generate Fortran wrapper module - wrapper = self.generate_iso_c_wrapper_module(module) - self.wrapperpath = (path/wrapper.name.lower()).with_suffix('.F90') - Sourcefile.to_file(source=fgen(wrapper), path=self.wrapperpath) - - # Generate C header file from module - c_header = self.generate_c_header(module) - self.c_path = (path/c_header.name.lower()).with_suffix('.h') - Sourcefile.to_file(source=self.codegen(c_header), path=self.c_path) - def transform_subroutine(self, routine, **kwargs): - if self.path is None: - path = Path(kwargs.get('path')) + if 'path' in kwargs: + path = kwargs.get('path') else: - path = self.path + build_args = kwargs.get('build_args') + path = Path(build_args.get('output_dir')) + role = kwargs.get('role', 'kernel') item = kwargs.get('item', None) depths = kwargs.get('depths', None) @@ -185,28 +153,13 @@ def transform_subroutine(self, routine, **kwargs): self.interface_to_import(routine, targets) return - for arg in routine.arguments: - if isinstance(arg.type.dtype, DerivedType): - self.c_structs[arg.type.dtype.name.lower()] = self.c_struct_typedef(arg.type) - # for calls and inline calls: convert kwarguments to arguments self.convert_kwargs_to_args(routine, targets) if role == 'kernel': - # Generate Fortran wrapper module - bind_name = None if self.language in ['c', 'cpp'] else f'{routine.name.lower()}_c_launch' - wrapper = self.generate_iso_c_wrapper_routine(routine, self.c_structs, bind_name=bind_name) - contains = Section(body=(Intrinsic('CONTAINS'), wrapper)) - self.wrapperpath = (path/wrapper.name.lower()).with_suffix('.F90') - module = Module(name=f'{wrapper.name.upper()}_MOD', contains=contains) - module.spec = Section(body=(Import(module='iso_c_binding'),)) - # Generate C source file from Loki IR c_kernel = self.generate_c_kernel(routine, targets=targets) - self.c_path = (path/c_kernel.name.lower()).with_suffix(self.file_suffix()) - Sourcefile.to_file(source=fgen(module), path=self.wrapperpath) - # Generate C source file from Loki IR for successor in successors: c_kernel.spec.prepend(Import(module=f'{successor.ir.name.lower()}_c.h', c_import=True)) @@ -214,8 +167,8 @@ def transform_subroutine(self, routine, **kwargs): if self.language != 'c': c_kernel_launch = c_kernel.clone(name=f"{c_kernel.name}_launch", prefix="extern_c") self.generate_c_kernel_launch(c_kernel_launch, c_kernel) - self.c_path = (path/c_kernel_launch.name.lower()).with_suffix('.h') - Sourcefile.to_file(source=self.codegen(c_kernel_launch, extern=True), path=self.c_path) + c_path = (path/c_kernel_launch.name.lower()).with_suffix('.h') + Sourcefile.to_file(source=self.codegen(c_kernel_launch, extern=True), path=c_path) assignments = FindNodes(Assignment).visit(c_kernel.body) assignments2remove = ['griddim', 'blockdim'] @@ -225,9 +178,8 @@ def transform_subroutine(self, routine, **kwargs): if depth > 1: c_kernel.spec.prepend(Import(module=f'{c_kernel.name.lower()}.h', c_import=True)) - self.c_path = (path/c_kernel.name.lower()).with_suffix(self.file_suffix()) - Sourcefile.to_file(source=self.codegen(c_kernel, extern=self.language=='cpp'), - path=self.c_path) + c_path = (path/c_kernel.name.lower()).with_suffix(self.file_suffix()) + Sourcefile.to_file(source=self.codegen(c_kernel, extern=self.language=='cpp'), path=c_path) header_path = (path/c_kernel.name.lower()).with_suffix('.h') Sourcefile.to_file(source=self.codegen(c_kernel, header=True), path=header_path) @@ -267,299 +219,6 @@ def interface_to_import(self, routine, targets): if removal_map: routine.spec = Transformer(removal_map).visit(routine.spec) - def c_struct_typedef(self, derived): - """ - Create the :class:`TypeDef` for the C-wrapped struct definition. - """ - typename = f'{derived.name if isinstance(derived, TypeDef) else derived.dtype.name}_c' - typedef = TypeDef(name=typename.lower(), body=(), bind_c=True) # pylint: disable=unexpected-keyword-arg - if isinstance(derived, TypeDef): - variables = derived.variables - else: - variables = derived.dtype.typedef.variables - declarations = [] - for v in variables: - ctype = v.type.clone(kind=self.iso_c_intrinsic_kind(v.type, typedef)) - vnew = v.clone(name=v.basename.lower(), scope=typedef, type=ctype) - declarations += (VariableDeclaration(symbols=(vnew,)),) - typedef._update(body=as_tuple(declarations)) - return typedef - - def iso_c_intrinsic_import(self, scope): - import_symbols = ['c_int', 'c_double', 'c_float'] - if self.use_c_ptr: - import_symbols += ['c_ptr', 'c_loc'] - symbols = as_tuple(Variable(name=name, scope=scope) for name in import_symbols) - isoc_import = Import(module='iso_c_binding', symbols=symbols) - return isoc_import - - def iso_c_intrinsic_kind(self, _type, scope, **kwargs): - is_array = kwargs.get('is_array', False) - if _type.dtype == BasicType.INTEGER: - return Variable(name='c_int', scope=scope) - if _type.dtype == BasicType.REAL: - kind = str(_type.kind) - if kind.lower() in ('real32', 'c_float'): - return Variable(name='c_float', scope=scope) - if kind.lower() in ('real64', 'jprb', 'selected_real_kind(13, 300)', 'c_double', 'c_ptr'): - if self.use_c_ptr and is_array: - return Variable(name='c_ptr', scope=scope) - return Variable(name='c_double', scope=scope) - return None - - @staticmethod - def c_intrinsic_kind(_type, scope): - if _type.dtype == BasicType.LOGICAL: - return Variable(name='int', scope=scope) - if _type.dtype == BasicType.INTEGER: - return Variable(name='int', scope=scope) - if _type.dtype == BasicType.REAL: - kind = str(_type.kind) - if kind.lower() in ('real32', 'c_float'): - return Variable(name='float', scope=scope) - if kind.lower() in ('real64', 'jprb', 'selected_real_kind(13, 300)', 'c_double'): - return Variable(name='double', scope=scope) - return None - - def generate_iso_c_wrapper_routine(self, routine, c_structs, bind_name=None): - wrapper = Subroutine(name=f'{routine.name}_fc') - - if bind_name is None: - bind_name = f'{routine.name.lower()}_c' - interface = self.generate_iso_c_interface(routine, bind_name, c_structs, scope=wrapper) - - # Generate the wrapper function - wrapper_spec = Transformer().visit(routine.spec) - wrapper_spec.prepend(self.iso_c_intrinsic_import(wrapper)) - wrapper_spec.append(struct.clone(parent=wrapper) for struct in c_structs.values()) - wrapper_spec.append(interface) - wrapper.spec = wrapper_spec - - # Create the wrapper function with casts and interface invocation - local_arg_map = OrderedDict() - casts_in = [] - casts_out = [] - for arg in routine.arguments: - if isinstance(arg.type.dtype, DerivedType): - ctype = SymbolAttributes(DerivedType(name=c_structs[arg.type.dtype.name.lower()].name)) - cvar = Variable(name=f'{arg.name}_c', type=ctype, scope=wrapper) - cast_in = InlineCall(ProcedureSymbol('transfer', scope=wrapper), - parameters=(arg,), kw_parameters={'mold': cvar}) - casts_in += [Assignment(lhs=cvar, rhs=cast_in)] - - cast_out = InlineCall(ProcedureSymbol('transfer', scope=wrapper), - parameters=(cvar,), kw_parameters={'mold': arg}) - casts_out += [Assignment(lhs=arg, rhs=cast_out)] - local_arg_map[arg.name] = cvar - - arguments = tuple(local_arg_map[a] if a in local_arg_map else Variable(name=a) - for a in routine.argnames) - use_device_addr = [] - if self.use_c_ptr: - arg_map = {} - for arg in routine.arguments: - if isinstance(arg, Array): - new_dims = tuple(sym.RangeIndex((None, None)) for _ in arg.dimensions) - arg_map[arg] = arg.clone(dimensions=new_dims, type=arg.type.clone(target=True)) - routine.spec = SubstituteExpressions(arg_map).visit(routine.spec) - - call_arguments = [] - for arg in routine.arguments: - if isinstance(arg, Array): - new_arg = arg.clone(dimensions=None) - c_loc = sym.InlineCall( - function=sym.ProcedureSymbol(name="c_loc", scope=routine), - parameters=(new_arg,)) - call_arguments.append(c_loc) - use_device_addr.append(arg.name) - elif isinstance(arg.type.dtype, DerivedType): - cvar = Variable(name=f'{arg.name}_c', type=ctype, scope=wrapper) - call_arguments.append(cvar) - else: - call_arguments.append(arg) - else: - call_arguments = arguments - - wrapper_body = casts_in - if self.language in ['cuda', 'hip']: - wrapper_body += [Pragma(keyword='acc', content=f'host_data use_device({", ".join(use_device_addr)})')] - wrapper_body += [ - CallStatement(name=Variable(name=interface.body[0].name), arguments=call_arguments) # pylint: disable=unsubscriptable-object - ] - if self.language in ['cuda', 'hip']: - wrapper_body += [Pragma(keyword='acc', content='end host_data')] - wrapper_body += casts_out - wrapper.body = Section(body=as_tuple(wrapper_body)) - - # Copy internal argument and declaration definitions - wrapper.variables = tuple(arg.clone(scope=wrapper) for arg in routine.arguments) + tuple(local_arg_map.values()) - wrapper.arguments = tuple(arg.clone(scope=wrapper) for arg in routine.arguments) - - # Remove any unused imports - sanitise_imports(wrapper) - return wrapper - - def generate_iso_c_wrapper_module(self, module): - """ - Generate the ISO-C wrapper module for a raw Fortran module. - - Note, we only create getter functions for module variables here, - since certain type definitions cannot be used in ISO-C interfaces - due to pointer variables, etc. - """ - modname = f'{module.name}_fc' - wrapper_module = Module(name=modname) - - # Generate bind(c) intrinsics for module variables - original_import = Import(module=module.name) - isoc_import = self.iso_c_intrinsic_import(module) - implicit_none = Intrinsic(text='implicit none') - spec = [original_import, isoc_import, implicit_none] - - # Create getter methods for module-level variables (I know... :( ) - if self.language == 'c': - wrappers = [] - for decl in FindNodes(VariableDeclaration).visit(module.spec): - for v in decl.symbols: - if isinstance(v.type.dtype, DerivedType) or v.type.pointer or v.type.allocatable: - continue - gettername = f'{module.name.lower()}__get__{v.name.lower()}' - getter = Subroutine(name=gettername, bind=gettername, is_function=True, parent=wrapper_module) - - getter.spec = Section(body=(Import(module=module.name, symbols=(v.clone(scope=getter), )), )) - isoctype = SymbolAttributes(v.type.dtype, kind=self.iso_c_intrinsic_kind(v.type, getter)) - if isoctype.kind in ['c_int', 'c_float', 'c_double']: - getter.spec.append(Import(module='iso_c_binding', symbols=(isoctype.kind, ))) - getter.body = Section(body=(Assignment(lhs=Variable(name=gettername, scope=getter), rhs=v),)) - getter.variables = as_tuple(Variable(name=gettername, type=isoctype, scope=getter)) - wrappers += [getter] - wrapper_module.contains = Section(body=(Intrinsic('CONTAINS'), *wrappers)) - - # Create function interface definitions for module functions - intfs = [] - for fct in module.subroutines: - if fct.is_function: - intf_fct = fct.clone(bind=f'{fct.name.lower()}') - intf_fct.body = Section(body=()) - - intf_args = [] - for arg in intf_fct.arguments: - # Only scalar, intent(in) arguments are pass by value - # Pass by reference for array types - value = isinstance(arg, Scalar) and arg.type.intent and arg.type.intent.lower() == 'in' - kind = self.iso_c_intrinsic_kind(arg.type, intf_fct) - ctype = SymbolAttributes(arg.type.dtype, value=value, kind=kind) - dimensions = arg.dimensions if isinstance(arg, Array) else None - var = Variable(name=arg.name, dimensions=dimensions, type=ctype, scope=intf_fct) - intf_args += (var,) - intf_fct.arguments = intf_args - sanitise_imports(intf_fct) - intfs.append(intf_fct) - spec.append(Interface(body=(as_tuple(intfs),))) - - # Remove any unused imports - sanitise_imports(wrapper_module) - return wrapper_module - - def generate_iso_c_interface(self, routine, bind_name, c_structs, scope): - """ - Generate the ISO-C subroutine interface - """ - intf_name = f'{routine.name}_iso_c' - intf_routine = Subroutine(name=intf_name, body=None, args=(), parent=scope, bind=bind_name) - intf_spec = Section(body=as_tuple(self.iso_c_intrinsic_import(intf_routine))) - if self.language == 'c': - for im in FindNodes(Import).visit(routine.spec): - if not im.c_import: - im_symbols = tuple(s.clone(scope=intf_routine) for s in im.symbols) - intf_spec.append(im.clone(symbols=im_symbols)) - intf_spec.append(Intrinsic(text='implicit none')) - intf_spec.append(c_structs.values()) - intf_routine.spec = intf_spec - - # Generate variables and types for argument declarations - for arg in routine.arguments: - if isinstance(arg.type.dtype, DerivedType): - struct_name = c_structs[arg.type.dtype.name.lower()].name - ctype = SymbolAttributes(DerivedType(name=struct_name), shape=arg.type.shape) - else: - # Only scalar, intent(in) arguments are pass by value - # Pass by reference for array types - value = isinstance(arg, Scalar) and arg.type.intent.lower() == 'in' and not arg.type.optional - kind = self.iso_c_intrinsic_kind(arg.type, intf_routine, is_array=isinstance(arg, Array)) - if self.use_c_ptr: - if isinstance(arg, Array): - ctype = SymbolAttributes(DerivedType(name="c_ptr"), value=True, kind=None) - else: - ctype = SymbolAttributes(arg.type.dtype, value=value, kind=kind) - else: - ctype = SymbolAttributes(arg.type.dtype, value=value, kind=kind) - if self.use_c_ptr: - dimensions = None - else: - dimensions = arg.dimensions if isinstance(arg, Array) else None - var = Variable(name=arg.name, dimensions=dimensions, type=ctype, scope=intf_routine) - intf_routine.variables += (var,) - intf_routine.arguments += (var,) - - sanitise_imports(intf_routine) - - return Interface(body=(intf_routine, )) - - def generate_c_header(self, module, **kwargs): - """ - Re-generate the C header as a module with all pertinent nodes, - but not Fortran-specific intrinsics (eg. implicit none or save). - """ - header_module = Module(name=f'{module.name}_c') - - # Generate stubs for getter functions - spec = [] - for decl in FindNodes(VariableDeclaration).visit(module.spec): - assert len(decl.symbols) == 1 - v = decl.symbols[0] - # Bail if not a basic type - if isinstance(v.type.dtype, DerivedType): - continue - ctype = self.c_intrinsic_kind(v.type, module) - tmpl_function = f'{ctype} {module.name.lower()}__get__{v.name.lower()}();' - spec += [Intrinsic(text=tmpl_function)] - - # Re-create type definitions with range indices (``:``) replaced by pointers - for td in FindNodes(TypeDef).visit(module.spec): - header_td = TypeDef(name=td.name.lower(), body=(), parent=header_module) # pylint: disable=unexpected-keyword-arg - declarations = [] - for decl in td.declarations: - variables = [] - for v in decl.symbols: - # Note that we force lower-case on all struct variables - if isinstance(v, Array): - new_shape = as_tuple(d for d in v.shape if not isinstance(d, RangeIndex)) - new_type = v.type.clone(shape=new_shape) - variables += [v.clone(name=v.name.lower(), type=new_type, scope=header_td)] - else: - variables += [v.clone(name=v.name.lower(), scope=header_td)] - declarations += [VariableDeclaration(symbols=as_tuple(variables), dimensions=decl.dimensions, - comment=decl.comment, pragma=decl.pragma)] - header_td._update(body=as_tuple(declarations)) - spec += [header_td] - - # Generate a header declaration for module routines - for fct in module.subroutines: - if fct.is_function: - fct_type = 'void' - if fct.name in fct.variables: - fct_type = self.c_intrinsic_kind(fct.variable_map[fct.name.lower()].type, header_module) - - args = [f'{self.c_intrinsic_kind(a.type, header_module)} {a.name.lower()}' - for a in fct.arguments] - fct_decl = f'{fct_type} {fct.name.lower()}({", ".join(args)});' - spec.append(Intrinsic(text=fct_decl)) - - header_module.spec = spec - header_module.rescope_symbols() - return header_module - @staticmethod def apply_de_reference(routine): """ diff --git a/loki/transformations/transpile/fortran_iso_c_wrapper.py b/loki/transformations/transpile/fortran_iso_c_wrapper.py new file mode 100644 index 000000000..7f77fe8f6 --- /dev/null +++ b/loki/transformations/transpile/fortran_iso_c_wrapper.py @@ -0,0 +1,509 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from pathlib import Path +from collections import OrderedDict + +from loki.backend import cgen, fgen, cudagen, cppgen +from loki.batch import Transformation, ProcedureItem, ModuleItem +from loki.expression import symbols as sym +from loki.ir import ( + nodes as ir, FindNodes, SubstituteExpressions, Transformer +) +from loki.module import Module +from loki.sourcefile import Sourcefile +from loki.subroutine import Subroutine +from loki.types import BasicType, DerivedType, SymbolAttributes +from loki.tools import as_tuple + +from loki.transformations.utilities import sanitise_imports + + +__all__ = [ + 'c_intrinsic_kind', 'iso_c_intrinsic_import', + 'iso_c_intrinsic_kind', 'c_struct_typedef', + 'generate_iso_c_interface', 'generate_iso_c_wrapper_routine', + 'generate_iso_c_wrapper_module', 'generate_c_header', + 'FortranISOCWrapperTransformation' +] + + +class FortranISOCWrapperTransformation(Transformation): + """ + Wrapper transformation that generates ISO-C Fortran wrappers and C + headers for translated kernels or additional header modules. + + In addition to :any:`Subroutine` objects with the role + ``'kernel'``, this transformation will process :any:`Module` + objects with the role ``'header'``. This will generate ISO-C + wrappers for derived types and the corresponding C-compatible + structs in C header files. + + Parameters + ---------- + use_c_ptr : bool, optional + Use ``c_ptr`` for array declarations and ``c_loc(...)`` to + pass the corresponding argument. Default is ``False``. + language : string + Actual C-style language to generate; must be on of ``'c'``, + ``'cpp'`` or ``'cuda'`` for C, C++ and CUDA respectively. + """ + + item_filter = (ProcedureItem, ModuleItem) + + _supported_languages = ['c', 'cpp', 'cuda'] + + def __init__(self, use_c_ptr=False, language='c'): + self.use_c_ptr = use_c_ptr + self.language = language.lower() + + if self.language == 'c': + self.codegen = cgen + elif self.language == 'cpp': + self.codegen = cppgen + elif self.language == 'cuda': + self.codegen = cudagen + else: + raise ValueError(f'language "{self.language}" is not supported!' + f' (supported languages: "{self._supported_languages}")') + + def transform_module(self, module, **kwargs): + if 'path' in kwargs: + path = kwargs.get('path') + else: + build_args = kwargs.get('build_args') + path = Path(build_args.get('output_dir')) + + role = kwargs.get('role', 'kernel') + + if role == 'header': + # Generate Fortran wrapper module + wrapper = generate_iso_c_wrapper_module( + module, use_c_ptr=self.use_c_ptr, language=self.language + ) + wrapperpath = (path/wrapper.name.lower()).with_suffix('.F90') + Sourcefile.to_file(source=fgen(wrapper), path=wrapperpath) + + # Generate C header file from module + c_header = generate_c_header(module) + c_path = (path/c_header.name.lower()).with_suffix('.h') + Sourcefile.to_file(source=self.codegen(c_header), path=c_path) + + + def transform_subroutine(self, routine, **kwargs): + if 'path' in kwargs: + path = kwargs.get('path') + else: + build_args = kwargs.get('build_args') + path = Path(build_args.get('output_dir')) + + role = kwargs.get('role', 'kernel') + + if role == 'kernel': + c_structs = {} + for arg in routine.arguments: + if isinstance(arg.type.dtype, DerivedType): + c_structs[arg.type.dtype.name.lower()] = c_struct_typedef(arg.type, use_c_ptr=self.use_c_ptr) + + # Generate Fortran wrapper module + bind_name = None if self.language in ['c', 'cpp'] else f'{routine.name.lower()}_c_launch' + wrapper = generate_iso_c_wrapper_routine( + routine, c_structs, bind_name=bind_name, + use_c_ptr=self.use_c_ptr, language=self.language + ) + contains = ir.Section(body=(ir.Intrinsic('CONTAINS'), wrapper)) + wrapperpath = (path/wrapper.name.lower()).with_suffix('.F90') + module = Module(name=f'{wrapper.name.upper()}_MOD', contains=contains) + module.spec = ir.Section(body=(ir.Import(module='iso_c_binding'),)) + Sourcefile.to_file(source=fgen(module), path=wrapperpath) + + +def c_intrinsic_kind(_type, scope): + """ + Determine the intrinsic C-type for a given symbol table entry. + + Parameters + ---------- + _type : :any:`SymbolAttr` + The symbols type attribute to determine type and kind + scope : :any:`Scope` + The containing scope in which to clone the type symbol + """ + if _type.dtype == BasicType.LOGICAL: + return sym.Variable(name='int', scope=scope) + if _type.dtype == BasicType.INTEGER: + return sym.Variable(name='int', scope=scope) + if _type.dtype == BasicType.REAL: + kind = str(_type.kind) + if kind.lower() in ('real32', 'c_float'): + return sym.Variable(name='float', scope=scope) + if kind.lower() in ('real64', 'jprb', 'selected_real_kind(13, 300)', 'c_double'): + return sym.Variable(name='double', scope=scope) + return None + + +def iso_c_intrinsic_import(scope, use_c_ptr=False): + """ + Create :any:`Import` object for the intrinsic C base types. + + Parameters + ---------- + scope : :any:`Scope` + The scope in which to create the import node and type symbols. + use_c_ptr : bool, optional + Use ``c_ptr`` for array declarations and ``c_loc(...)`` to + pass the corresponding argument. Default is ``False``. + """ + import_symbols = ['c_int', 'c_double', 'c_float'] + if use_c_ptr: + import_symbols += ['c_ptr', 'c_loc'] + symbols = as_tuple(sym.Variable(name=name, scope=scope) for name in import_symbols) + isoc_import = ir.Import(module='iso_c_binding', symbols=symbols) + return isoc_import + + +def iso_c_intrinsic_kind(_type, scope, is_array=False, use_c_ptr=False): + """ + Determine the intrinsic ISO-C type for a given symbol table entry. + + Parameters + ---------- + _type : :any:`SymbolAttr` + The symbols type attribute to determine type and kind + is_array : bool + Flag indicating if the passed type belongs to an array symbol. + use_c_ptr : bool, optional + Use ``c_ptr`` for array declarations and ``c_loc(...)`` to + pass the corresponding argument. Default is ``False``. + """ + if _type.dtype == BasicType.INTEGER: + return sym.Variable(name='c_int', scope=scope) + + if _type.dtype == BasicType.REAL: + kind = str(_type.kind) + if kind.lower() in ('real32', 'c_float'): + return sym.Variable(name='c_float', scope=scope) + if kind.lower() in ('real64', 'jprb', 'selected_real_kind(13, 300)', 'c_double', 'c_ptr'): + if use_c_ptr and is_array: + return sym.Variable(name='c_ptr', scope=scope) + return sym.Variable(name='c_double', scope=scope) + + return None + + +def c_struct_typedef(derived, use_c_ptr=False): + """ + Create the :class:`TypeDef` for the C-wrapped struct definition. + + Parameters + ---------- + use_c_ptr : bool, optional + Use ``c_ptr`` for array declarations and ``c_loc(...)`` to + pass the corresponding argument. Default is ``False``. + """ + typename = f'{derived.name if isinstance(derived, ir.TypeDef) else derived.dtype.name}_c' + typedef = ir.TypeDef(name=typename.lower(), body=(), bind_c=True) # pylint: disable=unexpected-keyword-arg + if isinstance(derived, ir.TypeDef): + variables = derived.variables + else: + variables = derived.dtype.typedef.variables + declarations = [] + for v in variables: + ctype = v.type.clone(kind=iso_c_intrinsic_kind(v.type, typedef, use_c_ptr=use_c_ptr)) + vnew = v.clone(name=v.basename.lower(), scope=typedef, type=ctype) + declarations += (ir.VariableDeclaration(symbols=(vnew,)),) + typedef._update(body=as_tuple(declarations)) + return typedef + + +def generate_iso_c_interface(routine, bind_name, c_structs, scope, use_c_ptr=False, language='c'): + """ + Generate the ISO-C subroutine :any:`Interface` object for a given :any:`Subroutine`. + + Parameters + ---------- + routine : :any:`Subroutine` + The subroutine for which to generate the interface + bind_name : str + Name of the C-function to which this interface corresponds. + c_structs : dict of str to str + Map from Fortran derived type name to C-struct type name + scope : :any:`Scope` + Parent scope in which to create the :any:`Interface` + use_c_ptr : bool, optional + Use ``c_ptr`` for array declarations and ``c_loc(...)`` to + pass the corresponding argument. Default is ``False``. + language : string + C-style language to generate; if this is ``'c'``, we resolve + non-C imports. + """ + intf_name = f'{routine.name}_iso_c' + intf_routine = Subroutine(name=intf_name, body=None, args=(), parent=scope, bind=bind_name) + intf_spec = ir.Section( + body=as_tuple(iso_c_intrinsic_import(intf_routine, use_c_ptr=use_c_ptr)) + ) + if language == 'c': + for im in FindNodes(ir.Import).visit(routine.spec): + if not im.c_import: + im_symbols = tuple(s.clone(scope=intf_routine) for s in im.symbols) + intf_spec.append(im.clone(symbols=im_symbols)) + intf_spec.append(ir.Intrinsic(text='implicit none')) + intf_spec.append(c_structs.values()) + intf_routine.spec = intf_spec + + # Generate variables and types for argument declarations + for arg in routine.arguments: + if isinstance(arg.type.dtype, DerivedType): + struct_name = c_structs[arg.type.dtype.name.lower()].name + ctype = SymbolAttributes(DerivedType(name=struct_name), shape=arg.type.shape) + else: + # Only scalar, intent(in) arguments are pass by value + # Pass by reference for array types + value = isinstance(arg, sym.Scalar) and arg.type.intent.lower() == 'in' and not arg.type.optional + kind = iso_c_intrinsic_kind(arg.type, intf_routine, is_array=isinstance(arg, sym.Array)) + if use_c_ptr: + if isinstance(arg, sym.Array): + ctype = SymbolAttributes(DerivedType(name="c_ptr"), value=True, kind=None) + else: + ctype = SymbolAttributes(arg.type.dtype, value=value, kind=kind) + else: + ctype = SymbolAttributes(arg.type.dtype, value=value, kind=kind) + if use_c_ptr: + dimensions = None + else: + dimensions = arg.dimensions if isinstance(arg, sym.Array) else None + var = sym.Variable(name=arg.name, dimensions=dimensions, type=ctype, scope=intf_routine) + intf_routine.variables += (var,) + intf_routine.arguments += (var,) + + sanitise_imports(intf_routine) + + return ir.Interface(body=(intf_routine, )) + + +def generate_iso_c_wrapper_routine(routine, c_structs, bind_name=None, use_c_ptr=False, language='c'): + """ + Generate Fortran ISO-C wrapper :any:`Subroutine` that corresponds + to a transpiled C method. + + The new wrapper subroutine will have the suffix ``'_fc'`` appended + to the name original subroutine name and bind to a C function with + the suffix ``'_c'``. + + This method will call :meth:`generate_iso_c_interface` to generate + the ISO-C compatible interface for the C function and generate a + wrapper :any:`Subroutine` that converts the native Fortran arguments + to a call to the C function with ISO-C compatible arguments. + + Parameters + ---------- + routine : :any:`Subroutine` + The subroutine for which to generate the interface + c_structs : dict of str to str + Map from Fortran derived type name to C-struct type name + bind_name : str + Name of the C-function to which this interface corresponds. + use_c_ptr : bool, optional + Use ``c_ptr`` for array declarations and ``c_loc(...)`` to + pass the corresponding argument. Default is ``False``. + language : string + C-style language to generate; if this is ``'c'``, we resolve + non-C imports. + """ + wrapper = Subroutine(name=f'{routine.name}_fc') + + if bind_name is None: + bind_name = f'{routine.name.lower()}_c' + interface = generate_iso_c_interface( + routine, bind_name, c_structs, scope=wrapper, use_c_ptr=use_c_ptr, language=language + ) + + # Generate the wrapper function + wrapper_spec = Transformer().visit(routine.spec) + wrapper_spec.prepend(iso_c_intrinsic_import(wrapper, use_c_ptr=use_c_ptr)) + wrapper_spec.append(struct.clone(parent=wrapper) for struct in c_structs.values()) + wrapper_spec.append(interface) + wrapper.spec = wrapper_spec + + # Create the wrapper function with casts and interface invocation + local_arg_map = OrderedDict() + casts_in = [] + casts_out = [] + for arg in routine.arguments: + if isinstance(arg.type.dtype, DerivedType): + ctype = SymbolAttributes(DerivedType(name=c_structs[arg.type.dtype.name.lower()].name)) + cvar = sym.Variable(name=f'{arg.name}_c', type=ctype, scope=wrapper) + cast_in = sym.InlineCall(sym.ProcedureSymbol('transfer', scope=wrapper), + parameters=(arg,), kw_parameters={'mold': cvar}) + casts_in += [ir.Assignment(lhs=cvar, rhs=cast_in)] + + cast_out = sym.InlineCall(sym.ProcedureSymbol('transfer', scope=wrapper), + parameters=(cvar,), kw_parameters={'mold': arg}) + casts_out += [ir.Assignment(lhs=arg, rhs=cast_out)] + local_arg_map[arg.name] = cvar + + arguments = tuple(local_arg_map[a] if a in local_arg_map else sym.Variable(name=a) + for a in routine.argnames) + use_device_addr = [] + if use_c_ptr: + arg_map = {} + for arg in routine.arguments: + if isinstance(arg, sym.Array): + new_dims = tuple(sym.RangeIndex((None, None)) for _ in arg.dimensions) + arg_map[arg] = arg.clone(dimensions=new_dims, type=arg.type.clone(target=True)) + routine.spec = SubstituteExpressions(arg_map).visit(routine.spec) + + call_arguments = [] + for arg in routine.arguments: + if isinstance(arg, sym.Array): + new_arg = arg.clone(dimensions=None) + c_loc = sym.InlineCall( + function=sym.ProcedureSymbol(name="c_loc", scope=routine), + parameters=(new_arg,)) + call_arguments.append(c_loc) + use_device_addr.append(arg.name) + elif isinstance(arg.type.dtype, DerivedType): + cvar = sym.Variable(name=f'{arg.name}_c', type=ctype, scope=wrapper) + call_arguments.append(cvar) + else: + call_arguments.append(arg) + else: + call_arguments = arguments + + wrapper_body = casts_in + if language in ['cuda', 'hip']: + wrapper_body += [ + ir.Pragma(keyword='acc', content=f'host_data use_device({", ".join(use_device_addr)})') + ] + wrapper_body += [ + ir.CallStatement(name=sym.Variable(name=interface.body[0].name), arguments=call_arguments) + ] + if language in ['cuda', 'hip']: + wrapper_body += [ir.Pragma(keyword='acc', content='end host_data')] + wrapper_body += casts_out + wrapper.body = ir.Section(body=as_tuple(wrapper_body)) + + # Copy internal argument and declaration definitions + wrapper.variables = tuple(arg.clone(scope=wrapper) for arg in routine.arguments) + tuple(local_arg_map.values()) + wrapper.arguments = tuple(arg.clone(scope=wrapper) for arg in routine.arguments) + + # Remove any unused imports + sanitise_imports(wrapper) + return wrapper + + +def generate_iso_c_wrapper_module(module, use_c_ptr=False, language='c'): + """ + Generate the ISO-C wrapper module for a raw Fortran module. + + The new wrapper module will have the suffix ``'_fc'`` appended to + the name and contain ISO-C function interfaces for contained + :any:`Subroutine` objects. This method will call + :meth:`generate_iso_c_routine` to generate the ISO-C compatible + procedure interfaces. + + Note + ---- + If the module contains global variables, we generate templated + getter functions here, as global Fortran variables are not + accessible via ISO-C interfaces. + + Parameters + ---------- + module : :any:`Module` + The module for which to generate the interface module + use_c_ptr : bool, optional + Use ``c_ptr`` for array declarations and ``c_loc(...)`` to + pass the corresponding argument. Default is ``False``. + language : string + C-style language to generate; if this is ``'c'``, we resolve + non-C imports. + """ + modname = f'{module.name}_fc' + wrapper_module = Module(name=modname) + + # Create getter methods for module-level variables (I know... :( ) + if language == 'c': + wrappers = [] + for decl in FindNodes(ir.VariableDeclaration).visit(module.spec): + for v in decl.symbols: + if isinstance(v.type.dtype, DerivedType) or v.type.pointer or v.type.allocatable: + continue + gettername = f'{module.name.lower()}__get__{v.name.lower()}' + getter = Subroutine(name=gettername, bind=gettername, is_function=True, parent=wrapper_module) + + getter.spec = ir.Section( + body=(ir.Import(module=module.name, symbols=(v.clone(scope=getter), )), ) + ) + isoctype = SymbolAttributes( + v.type.dtype, kind=iso_c_intrinsic_kind(v.type, getter, use_c_ptr=use_c_ptr) + ) + if isoctype.kind in ['c_int', 'c_float', 'c_double']: + getter.spec.append(ir.Import(module='iso_c_binding', symbols=(isoctype.kind, ))) + getter.body = ir.Section( + body=(ir.Assignment(lhs=sym.Variable(name=gettername, scope=getter), rhs=v),) + ) + getter.variables = as_tuple(sym.Variable(name=gettername, type=isoctype, scope=getter)) + wrappers += [getter] + wrapper_module.contains = ir.Section(body=(ir.Intrinsic('CONTAINS'), *wrappers)) + + # Remove any unused imports + sanitise_imports(wrapper_module) + return wrapper_module + + +def generate_c_header(module): + """ + Re-generate the C header as a module with all pertinent nodes, + but not Fortran-specific intrinsics (eg. implicit none or save). + + The new header module will have the suffix ``'_c'`` appended to + the original module name. + + Parameters + ---------- + module : :any:`Module` + The module for which to generate the C header + """ + header_module = Module(name=f'{module.name}_c') + + # Generate stubs for getter functions + spec = [] + for decl in FindNodes(ir.VariableDeclaration).visit(module.spec): + assert len(decl.symbols) == 1 + v = decl.symbols[0] + # Bail if not a basic type + if isinstance(v.type.dtype, DerivedType): + continue + ctype = c_intrinsic_kind(v.type, scope=module) + tmpl_function = f'{ctype} {module.name.lower()}__get__{v.name.lower()}();' + spec += [ir.Intrinsic(text=tmpl_function)] + + # Re-create type definitions with range indices (``:``) replaced by pointers + for td in FindNodes(ir.TypeDef).visit(module.spec): + header_td = ir.TypeDef(name=td.name.lower(), body=(), parent=header_module) # pylint: disable=unexpected-keyword-arg + declarations = [] + for decl in td.declarations: + variables = [] + for v in decl.symbols: + # Note that we force lower-case on all struct variables + if isinstance(v, sym.Array): + new_shape = as_tuple(d for d in v.shape if not isinstance(d, sym.RangeIndex)) + new_type = v.type.clone(shape=new_shape) + variables += [v.clone(name=v.name.lower(), type=new_type, scope=header_td)] + else: + variables += [v.clone(name=v.name.lower(), scope=header_td)] + declarations += [ir.VariableDeclaration( + symbols=as_tuple(variables), dimensions=decl.dimensions, + comment=decl.comment, pragma=decl.pragma + )] + header_td._update(body=as_tuple(declarations)) + spec += [header_td] + + header_module.spec = spec + header_module.rescope_symbols() + return header_module diff --git a/loki/transformations/transpile/tests/test_scc_cuda.py b/loki/transformations/transpile/tests/test_scc_cuda.py new file mode 100644 index 000000000..cecde153a --- /dev/null +++ b/loki/transformations/transpile/tests/test_scc_cuda.py @@ -0,0 +1,256 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from pathlib import Path +import pytest + +from loki import Scheduler, Dimension, read_file +from loki.frontend import available_frontends +from loki.ir import nodes as ir, FindNodes + +from loki.transformations.transpile import ( + FortranCTransformation, FortranISOCWrapperTransformation +) +from loki.transformations.single_column import ( + SCCLowLevelHoist, SCCLowLevelParametrise +) + + +@pytest.fixture(scope='module', name='horizontal') +def fixture_horizontal(): + return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'iend')) + + +@pytest.fixture(scope='module', name='vertical') +def fixture_vertical(): + return Dimension(name='vertical', size='nz', index='jk') + + +@pytest.fixture(scope='module', name='blocking') +def fixture_blocking(): + return Dimension(name='blocking', size='nb', index='b') + + +@pytest.fixture(scope='module', name='here') +def fixture_here(): + return Path(__file__).parent + + +@pytest.fixture(name='config') +def fixture_config(): + """ + Default configuration dict with basic options. + """ + return { + 'default': { + 'mode': 'idem', + 'role': 'kernel', + 'expand': True, + 'strict': False, # cudafor import + }, + 'routines': { + 'driver': {'role': 'driver'} + } + } + +def remove_whitespace_linebreaks(text): + return text.replace(' ', '').replace('\n', ' ').replace('\r', '').replace('\t', '').lower() + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_scc_cuda_parametrise(tmp_path, here, frontend, config, horizontal, vertical, blocking): + """ + Test SCC-CUF transformation type 0, thus including parametrising (array dimension(s)) + """ + + proj = here / '../../tests/sources/projSccCuf/module' + + scheduler = Scheduler( + paths=[proj], config=config, seed_routines=['driver'], + output_dir=tmp_path, frontend=frontend, xmods=[tmp_path] + ) + + dic2p = {'nz': 137} + cuda_transform = SCCLowLevelParametrise( + horizontal=horizontal, vertical=vertical, block_dim=blocking, + transformation_type='parametrise', + dim_vars=(vertical.size,), as_kwarguments=True, remove_vector_section=True, + use_c_ptr=True, dic2p=dic2p, path=here, mode='cuda' + ) + scheduler.process(transformation=cuda_transform) + f2c_transformation = FortranCTransformation(language='cuda') + scheduler.process(transformation=f2c_transformation) + f2cwrap = FortranISOCWrapperTransformation(language='cuda', use_c_ptr=True) + scheduler.process(transformation=f2cwrap) + + kernel = scheduler['kernel_mod#kernel'].ir + kernel_variable_map = kernel.variable_map + assert kernel_variable_map[horizontal.index].type.intent is None + assert kernel_variable_map[horizontal.index].scope == kernel + device = scheduler['kernel_mod#device'].ir + device_variable_map = device.variable_map + assert device_variable_map[horizontal.index].type.intent.lower() == 'in' + assert device_variable_map[horizontal.index].scope == device + + fc_kernel = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_fc.F90')) + c_kernel = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c.c')) + c_kernel_header = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c.h')) + c_kernel_launch = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c_launch.h')) + c_device = remove_whitespace_linebreaks(read_file(tmp_path/'device_c.c')) + c_elemental_device = remove_whitespace_linebreaks(read_file(tmp_path/'elemental_device_c.c')) + c_some_func = remove_whitespace_linebreaks(read_file(tmp_path/'some_func_c.c')) + c_some_func_header = remove_whitespace_linebreaks(read_file(tmp_path/'some_func_c.h')) + + calls = FindNodes(ir.CallStatement).visit(scheduler["driver_mod#driver"].ir.body) + assert len(calls) == 3 + for call in calls: + assert str(call.name).lower() == 'kernel' + assert call.pragma[0].keyword == 'loki' + assert 'removed_loop' in call.pragma[0].content + # kernel_fc.F90 + assert '!$acchost_datause_device(q,t,z)' in fc_kernel + assert 'kernel_iso_c(start,nlon,c_loc(q),c_loc(t),c_loc(z),nb,tot,iend)' in fc_kernel + assert 'bind(c,name="kernel_c_launch")' in fc_kernel + assert 'useiso_c_binding' in fc_kernel + # kernel_c.c + assert '#include' in c_kernel + assert '#include' in c_kernel + assert '#include"kernel_c.h"' in c_kernel + assert '#include"kernel_c_launch.h"' in c_kernel + assert 'include"elemental_device_c.h"' in c_kernel + assert 'include"device_c.h"' in c_kernel + assert 'include"some_func_c.h"' in c_kernel + assert '__global__voidkernel_c' in c_kernel + assert 'jl=threadidx.x;' in c_kernel + assert 'b=blockidx.x;' in c_kernel + assert 'device_c(' in c_kernel + assert 'elemental_device_c(' in c_kernel + assert '=some_func_c(' in c_kernel + # kernel_c.h + assert '__global__voidkernel_c' in c_kernel_header + assert 'jl=threadidx.x;' not in c_kernel_header + assert 'b=blockidx.x;' not in c_kernel_header + # kernel_c_launch.h + assert 'extern"c"' in c_kernel_launch + assert 'voidkernel_c_launch(' in c_kernel_launch + assert 'structdim3blockdim;' in c_kernel_launch + assert 'structdim3griddim;' in c_kernel_launch + assert 'griddim=dim3(' in c_kernel_launch + assert 'blockdim=dim3(' in c_kernel_launch + assert 'kernel_c<<>>(' in c_kernel_launch + assert 'cudadevicesynchronize();' in c_kernel_launch + # device_c.c + assert '#include' in c_device + assert '#include' in c_device + assert '#include"device_c.h"' in c_device + # elemental_device_c.c + assert '__device__voiddevice_c(' in c_device + assert '#include' in c_elemental_device + assert '#include' in c_elemental_device + assert '#include"elemental_device_c.h"' in c_elemental_device + # some_func_c.c + assert 'doublesome_func_c(doublea)' in c_some_func + assert 'returnsome_func' in c_some_func + # some_func_c.h + assert 'doublesome_func_c(doublea);' in c_some_func_header + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_scc_cuda_hoist(tmp_path, here, frontend, config, horizontal, vertical, blocking): + """ + Test SCC-CUF transformation type 0, thus including parametrising (array dimension(s)) + """ + + proj = here / '../../tests/sources/projSccCuf/module' + + scheduler = Scheduler( + paths=[proj], config=config, seed_routines=['driver'], + output_dir=tmp_path, frontend=frontend, xmods=[tmp_path] + ) + + cuda_transform = SCCLowLevelHoist( + horizontal=horizontal, vertical=vertical, block_dim=blocking, + transformation_type='parametrise', + dim_vars=(vertical.size,), as_kwarguments=True, remove_vector_section=True, + use_c_ptr=True, path=here, mode='cuda' + ) + scheduler.process(transformation=cuda_transform) + f2c_transformation = FortranCTransformation(language='cuda') + scheduler.process(transformation=f2c_transformation) + f2cwrap = FortranISOCWrapperTransformation(language='cuda', use_c_ptr=True) + scheduler.process(transformation=f2cwrap) + + kernel = scheduler['kernel_mod#kernel'].ir + kernel_variable_map = kernel.variable_map + assert kernel_variable_map[horizontal.index].type.intent is None + assert kernel_variable_map[horizontal.index].scope == kernel + device = scheduler['kernel_mod#device'].ir + device_variable_map = device.variable_map + assert device_variable_map[horizontal.index].type.intent.lower() == 'in' + assert device_variable_map[horizontal.index].scope == device + + fc_kernel = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_fc.F90')) + c_kernel = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c.c')) + c_kernel_header = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c.h')) + c_kernel_launch = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c_launch.h')) + c_device = remove_whitespace_linebreaks(read_file(tmp_path/'device_c.c')) + c_elemental_device = remove_whitespace_linebreaks(read_file(tmp_path/'elemental_device_c.c')) + c_some_func = remove_whitespace_linebreaks(read_file(tmp_path/'some_func_c.c')) + c_some_func_header = remove_whitespace_linebreaks(read_file(tmp_path/'some_func_c.h')) + + calls = FindNodes(ir.CallStatement).visit(scheduler["driver_mod#driver"].ir.body) + assert len(calls) == 3 + for call in calls: + assert str(call.name).lower() == 'kernel' + assert call.pragma[0].keyword == 'loki' + assert 'removed_loop' in call.pragma[0].content + # kernel_fc.F90 + assert '!$acchost_datause_device(q,t,z,local_z,device_local_x)' in fc_kernel + assert 'kernel_iso_c(start,nlon,nz,c_loc(q),c_loc(t),c_loc(z)' in fc_kernel + assert 'c_loc(z),nb,tot,iend,c_loc(local_z),c_loc(device_local_x))' in fc_kernel + assert 'bind(c,name="kernel_c_launch")' in fc_kernel + assert 'useiso_c_binding' in fc_kernel + # kernel_c.c + assert '#include' in c_kernel + assert '#include' in c_kernel + assert '#include"kernel_c.h"' in c_kernel + assert '#include"kernel_c_launch.h"' in c_kernel + assert '#include"elemental_device_c.h"' in c_kernel + assert '#include"device_c.h"' in c_kernel + assert 'include"some_func_c.h"' in c_kernel + assert '__global__voidkernel_c' in c_kernel + assert 'jl=threadidx.x;' in c_kernel + assert 'b=blockidx.x;' in c_kernel + assert 'device_c(' in c_kernel + assert 'elemental_device_c(' in c_kernel + assert '=some_func_c(' in c_kernel + # kernel_c.h + assert '__global__voidkernel_c' in c_kernel_header + assert 'jl=threadidx.x;' not in c_kernel_header + assert 'b=blockidx.x;' not in c_kernel_header + # kernel_c_launch.h + assert 'extern"c"' in c_kernel_launch + assert 'voidkernel_c_launch(' in c_kernel_launch + assert 'structdim3blockdim;' in c_kernel_launch + assert 'structdim3griddim;' in c_kernel_launch + assert 'griddim=dim3(' in c_kernel_launch + assert 'blockdim=dim3(' in c_kernel_launch + assert 'kernel_c<<>>(' in c_kernel_launch + assert 'cudadevicesynchronize();' in c_kernel_launch + # device_c.c + assert '#include' in c_device + assert '#include' in c_device + assert '#include"device_c.h"' in c_device + assert '__device__voiddevice_c(' in c_device + # elemental_device_c.c + assert '#include' in c_elemental_device + assert '#include' in c_elemental_device + assert '#include"elemental_device_c.h"' in c_elemental_device + # some_func_c.c + assert 'doublesome_func_c(doublea)' in c_some_func + assert 'returnsome_func' in c_some_func + # some_func_c.h + assert 'doublesome_func_c(doublea);' in c_some_func_header diff --git a/loki/transformations/transpile/tests/test_transpile.py b/loki/transformations/transpile/tests/test_transpile.py index 03c083612..95f8336de 100644 --- a/loki/transformations/transpile/tests/test_transpile.py +++ b/loki/transformations/transpile/tests/test_transpile.py @@ -5,21 +5,38 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -from pathlib import Path import pytest import numpy as np -from loki import Subroutine, Module, cgen, cppgen, cudagen, FindNodes, Dimension, Scheduler, read_file +from loki import Subroutine, Module, cgen, cppgen, cudagen, FindNodes from loki.build import jit_compile, jit_compile_lib, clean_test, Builder, Obj import loki.expression.symbols as sym from loki.frontend import available_frontends from loki import ir -from loki.transformations.transpile import FortranCTransformation -from loki.transformations.single_column import SCCLowLevelHoist, SCCLowLevelParametrise +from loki.transformations.transpile import ( + FortranCTransformation, FortranISOCWrapperTransformation +) # pylint: disable=too-many-lines + +def wrapperpath(path, module_or_routine): + """ + Utility that generates the ``_fc.F90`` path for Fortran wrappers + """ + name = f'{module_or_routine.name}_fc' + return (path/name.lower()).with_suffix('.F90') + + +def cpath(path, module_or_routine, suffix='.c'): + """ + Utility that generates the ``_c.h`` path for Fortran wrappers + """ + name = f'{module_or_routine.name}_c' + return (path/name.lower()).with_suffix(suffix) + + @pytest.fixture(scope='function', name='builder') def fixture_builder(tmp_path): yield Builder(source_dirs=tmp_path, build_dir=tmp_path) @@ -68,9 +85,9 @@ def convert_case(_str, case_sensitive): inline_call_assignment = ir.Assignment(lhs=routine.variable_map['a'], rhs=inline_call) routine.body = (routine.body, assignment, call, inline_call_assignment) - f2c = FortranCTransformation(language=language, use_c_ptr=language=='cuda') + f2c = FortranCTransformation(language=language) f2c.apply(source=routine, path=tmp_path) - ccode = f2c.c_path.read_text().replace(' ', '').replace('\n', ' ').replace('\r', '').replace('\t', '') + ccode = cpath(tmp_path, routine).read_text().replace(' ', '').replace('\n', ' ').replace('\r', '').replace('\t', '') assert convert_case('transpile_case_sensitivity_c(inta,intsOmE_vAr,intoTher_VaR)', case_sensitive) in ccode assert convert_case('a=threadIdx%x;', case_sensitive) in ccode assert convert_case('somE_cALl(a);', case_sensitive) in ccode @@ -124,14 +141,20 @@ def test_transpile_simple_loops(tmp_path, builder, frontend, use_c_ptr): [13., 23., 33., 43.]]) # Generate and test the transpiled C kernel - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.simple_loops_fc_mod.simple_loops_fc # check the generated F2C wrapper - with open(f2c.wrapperpath, 'r') as f2c_f: + with open(wrapperpath(tmp_path, routine), 'r') as f2c_f: f2c_str = f2c_f.read().upper().replace(' ', '') if use_c_ptr: assert f2c_str.count('TARGET') == 2 @@ -217,14 +240,20 @@ def test_transpile_arguments(tmp_path, builder, frontend, use_c_ptr): assert a == 8 and np.isclose(b, 3.2) and np.isclose(c, 4.1) # Generate and test the transpiled C kernel - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.transpile_arguments_fc_mod.transpile_arguments_fc # check the generated F2C wrapper - with open(f2c.wrapperpath, 'r') as f2c_f: + with open(wrapperpath(tmp_path, routine), 'r') as f2c_f: f2c_str = f2c_f.read().upper().replace(' ', '') if use_c_ptr: assert f2c_str.count('TARGET') == 2 @@ -299,15 +328,16 @@ def test_transpile_derived_type(tmp_path, builder, frontend, use_c_ptr): assert a_struct.c == 12. # Translate the header module to expose parameters - mod2c = FortranCTransformation(use_c_ptr=use_c_ptr) - mod2c.apply(source=module, path=tmp_path, role='header') + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=module, path=tmp_path, role='header') # Create transformation object and apply - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path, role='kernel') + f2cwrap.apply(source=routine, path=tmp_path, role='kernel') # Build and wrap the cross-compiled library - sources = [module, f2c.wrapperpath, f2c.c_path] + sources = [module, wrapperpath(tmp_path, routine), cpath(tmp_path, routine)] libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' c_kernel = jit_compile_lib(sources=sources, path=tmp_path, name=libname, builder=builder) @@ -373,15 +403,16 @@ def test_transpile_associates(tmp_path, builder, frontend, use_c_ptr): assert a_struct.c == 24. # Translate the header module to expose parameters - mod2c = FortranCTransformation() - mod2c.apply(source=module, path=tmp_path, role='header') + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=module, path=tmp_path, role='header') # Create transformation object and apply - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path, role='kernel') + f2cwrap.apply(source=routine, path=tmp_path, role='kernel') # Build and wrap the cross-compiled library - sources = [module, f2c.wrapperpath, f2c.c_path] + sources = [module, wrapperpath(tmp_path, routine), cpath(tmp_path, routine)] libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' c_kernel = jit_compile_lib(sources=sources, path=tmp_path, name=libname, builder=builder) @@ -474,16 +505,20 @@ def test_transpile_module_variables(tmp_path, builder, frontend, use_c_ptr): assert a == 3 and b == 5. and c == 4. # Translate the header module to expose parameters - mod2c = FortranCTransformation(use_c_ptr=use_c_ptr) - mod2c.apply(source=module, path=tmp_path, role='header') + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=module, path=tmp_path, role='header') # Create transformation object and apply - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path, role='kernel') + f2cwrap.apply(source=routine, path=tmp_path, role='kernel') # Build and wrap the cross-compiled library - sources = [module, mod2c.wrapperpath, f2c.wrapperpath, f2c.c_path] - wrap = [tmp_path/'mod_var_type_mod.f90', f2c.wrapperpath.name] + sources = [ + module, wrapperpath(tmp_path, module), + wrapperpath(tmp_path, routine), cpath(tmp_path, routine) + ] + wrap = [tmp_path/'mod_var_type_mod.f90', wrapperpath(tmp_path, routine).name] libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' c_kernel = jit_compile_lib(sources=sources, wrap=wrap, path=tmp_path, name=libname, builder=builder) @@ -535,10 +570,16 @@ def test_transpile_vectorization(tmp_path, builder, frontend, use_c_ptr): assert v2[0] == 1. and np.all(v2[1:] == 4.) # Generate and test the transpiled C kernel - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.transp_vect_fc_mod.transp_vect_fc # Test the trnapiled C kernel @@ -586,10 +627,16 @@ def test_transpile_intrinsics(tmp_path, builder, frontend, use_c_ptr): assert vmin_nested == 1. and vmax_nested == 5. # Generate and test the transpiled C kernel - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.transpile_intrinsics_fc_mod.transpile_intrinsics_fc vmin, vmax, vabs, vmin_nested, vmax_nested = fc_function(v1, v2, v3, v4) @@ -650,10 +697,16 @@ def test_transpile_loop_indices(tmp_path, builder, frontend, use_c_ptr): assert mask3[-1] == 3. # Generate and test the transpiled C kernel - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.transp_loop_ind_fc_mod.transp_loop_ind_fc mask1 = np.zeros(shape=(n,), order='F', dtype=np.int32) @@ -707,10 +760,16 @@ def test_transpile_logical_statements(tmp_path, builder, frontend, use_c_ptr): assert v_val[0] and not v_val[1] # Generate and test the transpiled C kernel - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.logical_stmts_fc_mod.logical_stmts_fc for v1 in range(2): @@ -773,10 +832,16 @@ def test_transpile_multibody_conditionals(tmp_path, builder, frontend, use_c_ptr clean_test(filepath) # Generate and test the transpiled C kernel - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.multibody_cond_fc_mod.multibody_cond_fc out1, out2 = fc_function(5) @@ -840,10 +905,16 @@ def test_transpile_inline_elemental_functions(tmp_path, builder, frontend, use_c # Now transpile with supplied elementals but without module routine = Subroutine.from_source(fcode, definitions=module, frontend=frontend, xmods=[tmp_path]) - f2c = FortranCTransformation(inline_elementals=True, use_c_ptr=use_c_ptr) + f2c = FortranCTransformation(inline_elementals=True) f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_mod = c_kernel.inline_elemental_fc_mod v2, v3 = fc_mod.inline_elemental_fc(11.) @@ -913,10 +984,16 @@ def test_transpile_inline_elementals_recursive(tmp_path, builder, frontend, use_ # Now transpile with supplied elementals but without module routine = Subroutine.from_source(fcode, definitions=module, frontend=frontend, xmods=[tmp_path]) - f2c = FortranCTransformation(inline_elementals=True, use_c_ptr=use_c_ptr) + f2c = FortranCTransformation(inline_elementals=True) f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_mod = c_kernel.inline_elementals_rec_fc_mod v2, v3 = fc_mod.inline_elementals_rec_fc(10.) @@ -962,14 +1039,20 @@ def test_transpile_expressions(tmp_path, builder, frontend, use_c_ptr): assert np.all(vector == [i * scalar for i in range(1, n+1)]) # Generate and test the transpiled C kernel - f2c = FortranCTransformation(use_c_ptr=use_c_ptr) + f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(use_c_ptr=use_c_ptr) + f2cwrap.apply(source=routine, path=tmp_path) + libname = f'fc_{routine.name}{"_c_ptr" if use_c_ptr else ""}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.transpile_expressions_fc_mod.transpile_expressions_fc # Make sure minus signs are represented correctly in the C code - ccode = f2c.c_path.read_text() + ccode = cpath(tmp_path, routine).read_text() # double minus due to index shift to 0 assert 'vector[i - 1 - 1]' in ccode or 'vector[-1 + i - 1]' in ccode assert 'vector[i - 1]' in ccode @@ -983,11 +1066,10 @@ def test_transpile_expressions(tmp_path, builder, frontend, use_c_ptr): assert np.all(vector == [i * scalar for i in range(1, n+1)]) -@pytest.mark.parametrize('use_c_ptr', (False, True)) @pytest.mark.parametrize('frontend', available_frontends()) @pytest.mark.parametrize('language', ('c', 'cuda')) @pytest.mark.parametrize('chevron', (False, True)) -def test_transpile_call(tmp_path, frontend, use_c_ptr, language, chevron): +def test_transpile_call(tmp_path, frontend, language, chevron): fcode_module = """ module transpile_call_kernel_mod implicit none @@ -1022,11 +1104,11 @@ def test_transpile_call(tmp_path, frontend, use_c_ptr, language, chevron): if chevron: calls = FindNodes(ir.CallStatement).visit(routine.body) calls[0]._update(chevron=(sym.IntLiteral(1), sym.IntLiteral(1))) - f2c = FortranCTransformation(use_c_ptr=use_c_ptr, path=tmp_path, language=language) + f2c = FortranCTransformation(language=language) f2c.apply(source=module.subroutine_map['transpile_call_kernel'], path=tmp_path, role='kernel') - ccode_kernel = f2c.c_path.read_text().replace(' ', '').replace('\n', '') + ccode_kernel = cpath(tmp_path, module.routines[0]).read_text().replace(' ', '').replace('\n', '') f2c.apply(source=routine, path=tmp_path, role='kernel') - ccode_driver = f2c.c_path.read_text().replace(' ', '').replace('\n', '') + ccode_driver = cpath(tmp_path, routine).read_text().replace(' ', '').replace('\n', '') assert "int*a,intb,int*c" in ccode_kernel # check for applied Dereference @@ -1221,13 +1303,18 @@ def test_transpile_multiconditional_simple(tmp_path, builder, frontend, codegen) # apply F2C trafo f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation() + f2cwrap.apply(source=routine, path=tmp_path) # check whether 'switch' statement is within C code assert 'switch' in codegen(routine) # compile C version libname = f'fc_{routine.name}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.multi_cond_simple_fc_mod.multi_cond_simple_fc # test C version for i, val in enumerate(test_vals): @@ -1284,13 +1371,18 @@ def test_transpile_multiconditional(tmp_path, builder, frontend): # apply F2C trafo f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation() + f2cwrap.apply(source=routine, path=tmp_path) # check whether 'switch' statement is within C code assert 'switch' in cgen(routine) # compile C version libname = f'fc_{routine.name}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.multi_cond_fc_mod.multi_cond_fc # test C version for val in test_results: @@ -1354,9 +1446,11 @@ def init_var(dtype, val=0): # apply F2C trafo f2c = FortranCTransformation() f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation() + f2cwrap.apply(source=routine, path=tmp_path) # check whether correct modulo was inserted - ccode = Path(f2c.c_path).read_text() + ccode = cpath(tmp_path, routine).read_text() if dtype == 'integer' and not add_float: assert '%' in ccode if dtype == 'real' or add_float: @@ -1364,7 +1458,10 @@ def init_var(dtype, val=0): # compile C version libname = f'fc_{routine.name}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine)], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.transpile_special_functions_fc_mod.transpile_special_functions_fc # test C version for i, val in enumerate(test_vals): @@ -1372,6 +1469,7 @@ def init_var(dtype, val=0): fc_function(in_var, out_var) assert int(out_var) == expected_results[i] + @pytest.mark.parametrize('frontend', available_frontends()) def test_transpile_interface_to_module(tmp_path, frontend): driver_fcode = """ @@ -1417,232 +1515,6 @@ def test_transpile_interface_to_module(tmp_path, frontend): assert imports[0].symbols == ('KERNEL_FC',) -@pytest.fixture(scope='module', name='horizontal') -def fixture_horizontal(): - return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'iend')) - - -@pytest.fixture(scope='module', name='vertical') -def fixture_vertical(): - return Dimension(name='vertical', size='nz', index='jk') - - -@pytest.fixture(scope='module', name='blocking') -def fixture_blocking(): - return Dimension(name='blocking', size='nb', index='b') - - -@pytest.fixture(scope='module', name='here') -def fixture_here(): - return Path(__file__).parent - - -@pytest.fixture(name='config') -def fixture_config(): - """ - Default configuration dict with basic options. - """ - return { - 'default': { - 'mode': 'idem', - 'role': 'kernel', - 'expand': True, - 'strict': False, # cudafor import - }, - 'routines': { - 'driver': {'role': 'driver'} - } - } - -def remove_whitespace_linebreaks(text): - return text.replace(' ', '').replace('\n', ' ').replace('\r', '').replace('\t', '').lower() - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_scc_cuda_parametrise(tmp_path, here, frontend, config, horizontal, vertical, blocking): - """ - Test SCC-CUF transformation type 0, thus including parametrising (array dimension(s)) - """ - - proj = here / '../../tests/sources/projSccCuf/module' - - scheduler = Scheduler(paths=[proj], config=config, seed_routines=['driver'], frontend=frontend, xmods=[tmp_path]) - - dic2p = {'nz': 137} - cuda_transform = SCCLowLevelParametrise( - horizontal=horizontal, vertical=vertical, block_dim=blocking, - transformation_type='parametrise', - dim_vars=(vertical.size,), as_kwarguments=True, remove_vector_section=True, - use_c_ptr=True, dic2p=dic2p, path=here, mode='cuda' - ) - scheduler.process(transformation=cuda_transform) - f2c_transformation = FortranCTransformation(path=tmp_path, language='cuda', use_c_ptr=True) - scheduler.process(transformation=f2c_transformation) - - kernel = scheduler['kernel_mod#kernel'].ir - kernel_variable_map = kernel.variable_map - assert kernel_variable_map[horizontal.index].type.intent is None - assert kernel_variable_map[horizontal.index].scope == kernel - device = scheduler['kernel_mod#device'].ir - device_variable_map = device.variable_map - assert device_variable_map[horizontal.index].type.intent.lower() == 'in' - assert device_variable_map[horizontal.index].scope == device - - fc_kernel = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_fc.F90')) - c_kernel = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c.c')) - c_kernel_header = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c.h')) - c_kernel_launch = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c_launch.h')) - c_device = remove_whitespace_linebreaks(read_file(tmp_path/'device_c.c')) - c_elemental_device = remove_whitespace_linebreaks(read_file(tmp_path/'elemental_device_c.c')) - c_some_func = remove_whitespace_linebreaks(read_file(tmp_path/'some_func_c.c')) - c_some_func_header = remove_whitespace_linebreaks(read_file(tmp_path/'some_func_c.h')) - - calls = FindNodes(ir.CallStatement).visit(scheduler["driver_mod#driver"].ir.body) - assert len(calls) == 3 - for call in calls: - assert str(call.name).lower() == 'kernel' - assert call.pragma[0].keyword == 'loki' - assert 'removed_loop' in call.pragma[0].content - # kernel_fc.F90 - assert '!$acchost_datause_device(q,t,z)' in fc_kernel - assert 'kernel_iso_c(start,nlon,c_loc(q),c_loc(t),c_loc(z),nb,tot,iend)' in fc_kernel - assert 'bind(c,name="kernel_c_launch")' in fc_kernel - assert 'useiso_c_binding' in fc_kernel - # kernel_c.c - assert '#include' in c_kernel - assert '#include' in c_kernel - assert '#include"kernel_c.h"' in c_kernel - assert '#include"kernel_c_launch.h"' in c_kernel - assert 'include"elemental_device_c.h"' in c_kernel - assert 'include"device_c.h"' in c_kernel - assert 'include"some_func_c.h"' in c_kernel - assert '__global__voidkernel_c' in c_kernel - assert 'jl=threadidx.x;' in c_kernel - assert 'b=blockidx.x;' in c_kernel - assert 'device_c(' in c_kernel - assert 'elemental_device_c(' in c_kernel - assert '=some_func_c(' in c_kernel - # kernel_c.h - assert '__global__voidkernel_c' in c_kernel_header - assert 'jl=threadidx.x;' not in c_kernel_header - assert 'b=blockidx.x;' not in c_kernel_header - # kernel_c_launch.h - assert 'extern"c"' in c_kernel_launch - assert 'voidkernel_c_launch(' in c_kernel_launch - assert 'structdim3blockdim;' in c_kernel_launch - assert 'structdim3griddim;' in c_kernel_launch - assert 'griddim=dim3(' in c_kernel_launch - assert 'blockdim=dim3(' in c_kernel_launch - assert 'kernel_c<<>>(' in c_kernel_launch - assert 'cudadevicesynchronize();' in c_kernel_launch - # device_c.c - assert '#include' in c_device - assert '#include' in c_device - assert '#include"device_c.h"' in c_device - # elemental_device_c.c - assert '__device__voiddevice_c(' in c_device - assert '#include' in c_elemental_device - assert '#include' in c_elemental_device - assert '#include"elemental_device_c.h"' in c_elemental_device - # some_func_c.c - assert 'doublesome_func_c(doublea)' in c_some_func - assert 'returnsome_func' in c_some_func - # some_func_c.h - assert 'doublesome_func_c(doublea);' in c_some_func_header - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_scc_cuda_hoist(tmp_path, here, frontend, config, horizontal, vertical, blocking): - """ - Test SCC-CUF transformation type 0, thus including parametrising (array dimension(s)) - """ - - proj = here / '../../tests/sources/projSccCuf/module' - - scheduler = Scheduler(paths=[proj], config=config, seed_routines=['driver'], frontend=frontend, xmods=[tmp_path]) - - cuda_transform = SCCLowLevelHoist( - horizontal=horizontal, vertical=vertical, block_dim=blocking, - transformation_type='parametrise', - dim_vars=(vertical.size,), as_kwarguments=True, remove_vector_section=True, - use_c_ptr=True, path=here, mode='cuda' - ) - scheduler.process(transformation=cuda_transform) - f2c_transformation = FortranCTransformation(path=tmp_path, language='cuda', use_c_ptr=True) - scheduler.process(transformation=f2c_transformation) - - kernel = scheduler['kernel_mod#kernel'].ir - kernel_variable_map = kernel.variable_map - assert kernel_variable_map[horizontal.index].type.intent is None - assert kernel_variable_map[horizontal.index].scope == kernel - device = scheduler['kernel_mod#device'].ir - device_variable_map = device.variable_map - assert device_variable_map[horizontal.index].type.intent.lower() == 'in' - assert device_variable_map[horizontal.index].scope == device - - fc_kernel = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_fc.F90')) - c_kernel = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c.c')) - c_kernel_header = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c.h')) - c_kernel_launch = remove_whitespace_linebreaks(read_file(tmp_path/'kernel_c_launch.h')) - c_device = remove_whitespace_linebreaks(read_file(tmp_path/'device_c.c')) - c_elemental_device = remove_whitespace_linebreaks(read_file(tmp_path/'elemental_device_c.c')) - c_some_func = remove_whitespace_linebreaks(read_file(tmp_path/'some_func_c.c')) - c_some_func_header = remove_whitespace_linebreaks(read_file(tmp_path/'some_func_c.h')) - - calls = FindNodes(ir.CallStatement).visit(scheduler["driver_mod#driver"].ir.body) - assert len(calls) == 3 - for call in calls: - assert str(call.name).lower() == 'kernel' - assert call.pragma[0].keyword == 'loki' - assert 'removed_loop' in call.pragma[0].content - # kernel_fc.F90 - assert '!$acchost_datause_device(q,t,z,local_z,device_local_x)' in fc_kernel - assert 'kernel_iso_c(start,nlon,nz,c_loc(q),c_loc(t),c_loc(z)' in fc_kernel - assert 'c_loc(z),nb,tot,iend,c_loc(local_z),c_loc(device_local_x))' in fc_kernel - assert 'bind(c,name="kernel_c_launch")' in fc_kernel - assert 'useiso_c_binding' in fc_kernel - # kernel_c.c - assert '#include' in c_kernel - assert '#include' in c_kernel - assert '#include"kernel_c.h"' in c_kernel - assert '#include"kernel_c_launch.h"' in c_kernel - assert '#include"elemental_device_c.h"' in c_kernel - assert '#include"device_c.h"' in c_kernel - assert 'include"some_func_c.h"' in c_kernel - assert '__global__voidkernel_c' in c_kernel - assert 'jl=threadidx.x;' in c_kernel - assert 'b=blockidx.x;' in c_kernel - assert 'device_c(' in c_kernel - assert 'elemental_device_c(' in c_kernel - assert '=some_func_c(' in c_kernel - # kernel_c.h - assert '__global__voidkernel_c' in c_kernel_header - assert 'jl=threadidx.x;' not in c_kernel_header - assert 'b=blockidx.x;' not in c_kernel_header - # kernel_c_launch.h - assert 'extern"c"' in c_kernel_launch - assert 'voidkernel_c_launch(' in c_kernel_launch - assert 'structdim3blockdim;' in c_kernel_launch - assert 'structdim3griddim;' in c_kernel_launch - assert 'griddim=dim3(' in c_kernel_launch - assert 'blockdim=dim3(' in c_kernel_launch - assert 'kernel_c<<>>(' in c_kernel_launch - assert 'cudadevicesynchronize();' in c_kernel_launch - # device_c.c - assert '#include' in c_device - assert '#include' in c_device - assert '#include"device_c.h"' in c_device - assert '__device__voiddevice_c(' in c_device - # elemental_device_c.c - assert '#include' in c_elemental_device - assert '#include' in c_elemental_device - assert '#include"elemental_device_c.h"' in c_elemental_device - # some_func_c.c - assert 'doublesome_func_c(doublea)' in c_some_func - assert 'returnsome_func' in c_some_func - # some_func_c.h - assert 'doublesome_func_c(doublea);' in c_some_func_header - - @pytest.mark.parametrize('frontend', available_frontends()) @pytest.mark.parametrize('language', ['c', 'cpp']) def test_transpile_optional_args(tmp_path, builder, frontend, language): @@ -1718,13 +1590,18 @@ def init_out_vars(): clean_test(filepath) - # transpile + # transpile f2c = FortranCTransformation(language=language) f2c.apply(source=routine, path=tmp_path) + f2cwrap = FortranISOCWrapperTransformation(language=language) + f2cwrap.apply(source=routine, path=tmp_path) # compile and testC/C++ version libname = f'fc_{routine.name}_{language}_{frontend}' - c_kernel = jit_compile_lib([f2c.wrapperpath, f2c.c_path], path=tmp_path, name=libname, builder=builder) + c_kernel = jit_compile_lib( + [wrapperpath(tmp_path, routine), cpath(tmp_path, routine, suffix=f'.{language}')], + path=tmp_path, name=libname, builder=builder + ) fc_function = c_kernel.transpile_optional_args_fc_mod.transpile_optional_args_fc if language != 'c': out_var, out_var2 = init_out_vars() diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index 8e9f747bb..77ec6a4fb 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -43,7 +43,9 @@ ExtractSCATransformation, CLAWTransformation, SCCVectorPipeline, SCCHoistPipeline, SCCStackPipeline, SCCRawStackPipeline, ) -from loki.transformations.transpile import FortranCTransformation +from loki.transformations.transpile import ( + FortranCTransformation, FortranISOCWrapperTransformation +) from loki.transformations.block_index_transformations import ( LowerBlockIndexTransformation, InjectBlockIndexTransformation, LowerBlockLoopTransformation @@ -417,14 +419,18 @@ def convert( mode = mode.replace('-', '_') # Sanitize mode string if mode in ['c', 'cuda_parametrise', 'cuda_hoist']: if mode == 'c': - f2c_transformation = FortranCTransformation(path=build) + f2c_transformation = FortranCTransformation() + f2c_wrapper = FortranISOCWrapperTransformation() elif mode in ['cuda_parametrise', 'cuda_hoist']: - f2c_transformation = FortranCTransformation(path=build, language='cuda', use_c_ptr=True) + f2c_transformation = FortranCTransformation(language='cuda') + f2c_wrapper = FortranISOCWrapperTransformation(language='cuda', use_c_ptr=True) else: assert False scheduler.process(f2c_transformation) + scheduler.process(f2c_wrapper) + build_args['output_dir'] = build for h in definitions: - f2c_transformation.apply(h, role='header') + f2c_wrapper.apply(h, role='header', build_args=build_args) # Housekeeping: Inject our re-named kernel and auto-wrapped it in a module dependency = DependencyTransformation(suffix='_FC', module_suffix='_MOD') scheduler.process(dependency)