diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b1208e667..e267a64bf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -47,8 +47,7 @@ jobs: - name: Testing run: | mkdir --parents src/api/python/tmp - PYTHONPATH="$PYTHONPATH:$PWD/src/" bin/run_tests - + LD_LIBRARY_PATH=$PWD/lib:$LD_LIBRARY_PATH PATH=$PWD/bin:/usr/lib/llvm-10/bin:$PATH PYTHONPATH="$PYTHONPATH:$PWD/src/:/usr/lib/llvm-10/build/utils/lit/" bin/run_tests - name: "List generated files" run: | @@ -64,4 +63,4 @@ jobs: name: daphne path: | bin/ - lib/ \ No newline at end of file + lib/ diff --git a/.gitignore b/.gitignore index ca9e5dec4..7110d9658 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ build_*/ /lib /tmp +# runtime dump +**/*.ll + # documentation build output doc_build/ @@ -25,7 +28,26 @@ __pycache__/ .idea/ .clion.source.upload.marker +# local test/dev scripts tmpdaphne.daphne +*.daphne +*.mlir +*.log + +# tags file +tags +tags.lock +tags.temp + +# clangd cache +.cache/ + +# gdb +.gdb_history + +# compile commands +compile_commands.json + # release scripts output /artifacts @@ -36,3 +58,7 @@ profiler/ precompiled-dependencies/ /cmake*/ /data + +# Allow .daphne and .mlir files in test/ +!test/**/*.mlir +!test/**/*.daphne diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d4940066..51c895637 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,4 +182,5 @@ add_subdirectory(src/util) add_dependencies(CompilerUtils MLIRDaphneTransformsIncGen) +add_subdirectory(daphne-opt) add_subdirectory(test) diff --git a/UserConfig.json b/UserConfig.json index 8f8e73075..5443be282 100644 --- a/UserConfig.json +++ b/UserConfig.json @@ -3,6 +3,7 @@ "use_vectorized_exec": false, "use_obj_ref_mgnt": true, "cuda_fuse_any": false, + "use_mlir_codegen": false, "vectorized_single_queue": false, "debug_llvm": false, "explain_kernels": false, @@ -14,6 +15,7 @@ "explain_type_adaptation": false, "explain_vectorized": false, "explain_obj_ref_mgnt": false, + "explain_mlir_codegen": false, "taskPartitioningScheme": "STATIC", "numberOfThreads": -1, "minimumTaskSize": 1, diff --git a/containers/daphne.Dockerfile b/containers/daphne.Dockerfile index 28b138aa4..96ac02124 100644 --- a/containers/daphne.Dockerfile +++ b/containers/daphne.Dockerfile @@ -64,7 +64,7 @@ LABEL "org.opencontainers.image.version"="$TIMESTAMP" LABEL "org.opencontainers.image.created"="${CREATION_DATE}" LABEL "org.opencontainers.image.revision"="${GIT_HASH}" RUN apt-get -qq -y update && apt-get -y upgrade && apt-get -y --no-install-recommends install \ - libtinfo6 libssl1.1 zlib1g python3-numpy python3-pandas \ + libtinfo6 libssl1.1 zlib1g python3-numpy python3-pandas\ && apt-get clean && rm -rf /var/lib/apt/lists/* COPY --from=daphne-build $DAPHNE_DIR/bin/* /usr/local/bin COPY --from=daphne-build $DAPHNE_DIR/lib/* /usr/local/lib diff --git a/daphne-opt/CMakeLists.txt b/daphne-opt/CMakeLists.txt new file mode 100644 index 000000000..b89da923a --- /dev/null +++ b/daphne-opt/CMakeLists.txt @@ -0,0 +1,45 @@ +# Copyright 2023 The DAPHNE Consortium +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) +set(LIBS + ${dialect_libs} + ${conversion_libs} + + MLIRDaphne + MLIRAnalysis + MLIRCallInterfaces + MLIRCastInterfaces + MLIRExecutionEngine + MLIRIR + # MLIRLLVMCommonConversion + MLIRLLVMToLLVMIRTranslation + # MLIRMemRefDialect + # MLIRLLVMDialect + MLIRParser + MLIRPass + MLIRSideEffectInterfaces + MLIRSupport + MLIRTargetLLVMIRExport + MLIRTransforms + MLIROptLib + ) +add_llvm_executable(daphne-opt daphne-opt.cpp) +set_target_properties(daphne-opt PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin) + +llvm_update_compile_flags(daphne-opt) +target_link_libraries(daphne-opt PRIVATE ${LIBS}) + +mlir_check_all_link_libraries(daphne-opt) diff --git a/daphne-opt/daphne-opt.cpp b/daphne-opt/daphne-opt.cpp new file mode 100644 index 000000000..380f0b5cf --- /dev/null +++ b/daphne-opt/daphne-opt.cpp @@ -0,0 +1,62 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "daphne-opt.h" + +#include + +#include "ir/daphneir/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Passes.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/InitAllDialects.h" +#include "mlir/InitAllPasses.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Tools/mlir-opt/MlirOptMain.h" + +int main(int argc, char **argv) { + mlir::registerAllPasses(); + // NOTE: One can also register standalone passes here. + mlir::daphne::registerDaphnePasses(); + + mlir::DialectRegistry registry; + registry.insert(); + // Add the following to include *all* MLIR Core dialects, or selectively + // include what you need like above. You only need to register dialects that + // will be *parsed* by the tool, not the one generated + // registerAllDialects(registry); + + return mlir::asMainReturnCode(mlir::MlirOptMain( + argc, argv, "Standalone DAPHNE optimizing compiler driver\n", + registry)); +} diff --git a/daphne-opt/daphne-opt.h b/daphne-opt/daphne-opt.h new file mode 100644 index 000000000..3b0f77bea --- /dev/null +++ b/daphne-opt/daphne-opt.h @@ -0,0 +1,24 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DAPHNEOPT_DAPHNEOP_H +#define DAPHNEOPT_DAPHNEOP_H + +#include "mlir/IR/Dialect.h" + +#include "ir/daphneir/Daphne.h" + +#endif // DAPHNEOPT_DAPHNEOP_H diff --git a/doc/Codegen.md b/doc/Codegen.md new file mode 100644 index 000000000..8d690c1bc --- /dev/null +++ b/doc/Codegen.md @@ -0,0 +1,100 @@ +# Code Generation with MLIR + +This document describes the process of directly generating code with the MLIR +framework. + +## Motivation + +DAPHNE provides a kernel for (almost) every DaphneIR operation which reside in +`src/runtime/local/kernels/`. These are precompiled as a shared library and +linked during compile-time. Even though these kernels can be highly optimized +and thus achieve great runtime characteristics, they may not provide a desired +level of extensibility for custom value types. They may also be lacking +information only available at compile-time that could enable further +optimizations. Additionally, through the process of progressively lowering the +input IR, the code generation pipeline may enable more optimization +possibilities such as operator or loop fusion. + + +As an alternative way to implement our operators we provide the code generation +pipeline which progressively lowers the DaphneIR available after parsing the +DaphneDSL script to operations in either the same dialect or operations from +other dialects. With that, we can optionally replace certain kernels by +generating code directly, and also perform a hybrid compilation approach where +we mix kernel calls with code generation in order to exploit advantages of +both, precompiled kernel libraries and code generation. Code generation passes +are found in `src/compiler/lowering/`. + + +## Guidelines + +Currently, the code generation pipeline is enabled with the CLI flag +`--mlir-codegen`. This adds the following passes that perform transformations and +lowerings: + +- [DenseMatrixOptPass](src/compiler/lowering/DaphneOptPass.cpp) +- [MatMulOpLoweringPass](src/compiler/lowering/MatMulOpLowering.cpp) +- [AggAllLoweringPass](src/compiler/lowering/AggAllOpLowering.cpp) +- [MapOpLoweringPass](src/compiler/lowering/MapOpLowering.cpp) +- InlinerPass +- [LowerEwOpPass](src/compiler/lowering/EwOpsLowering.cpp) +- ConvertMathToLLVMPass +- [ModOpLoweringPass](src/compiler/lowering/ModOpLowering.cpp) +- Canonicalizer +- CSE +- LoopFusion +- AffineScalarReplacement +- LowerAffinePass + +These passes are added in the `DaphneIrExecutor::buildCodegenPipeline` +function. The `--mlir-hybrid-codegen` flag disables the `MatMulOpLoweringPass` since the +kernel implementation vastly outperforms the generated code of this pass. + + +#### Runtime Interoperability + +Runtime interoperability with the `DenseMatrix` object is achieved with two +kernels in `src/runtime/local/kernels/ConvertDenseMatrixToMemRef.h` and +`src/runtime/local/kernels/ConvertMemRefToDenseMatrix.h` and the corresponding +DaphneOps `Daphne_ConvertMemRefToDenseMatrix` and +`Daphne_ConvertDenseMatrixToMemRef`. These kernels define how a MemRef is +passed to a kernel and how a kernel can return a `StridedMemRefType`. + + +#### Debugging + +In order to enable our debug `PrintIRPass` pass, one has to add `--explain +mlir_code_gen` when running `daphne`. Additionally, it is recommended to use the +`daphne-opt` tool to test passes in isolation. One just has to provide the +input IR for a pass to `daphne-opt` and the correct flag to run the pass (or +multiple passes) on the IR. `daphne-opt` provides all the functionality of the +`mlir-opt` tool. + +`daphne-opt --lower-ew --debug-only=dialect-conversion ew.mlir` performs the +`LowerEwOpPass` on the input file `ew.mlir` while providing dialect conversion +debug information. + + + +#### Testing + +To test the generated code, there currently are two different approaches. + +End-to-end tests can be found under `test/api/cli/codegen/` and are part of the +existing Catch2 test-suite with the its own tag, `TAG_CODEGEN`. + +Additionally, there are tests that check the generated IR by running the +`llvm-lit`, `daphne-opt`, and `FileCheck` utilities. These tests reside under +`test/compiler/lowering/`. They are `.mlir` files containing the input IR of a +certain pass, or pass pipeline, and the `llvm-lit` directive at the top of the +file (`RUN:`). In that line we specify how `llvm-lit` executes the test, e.g., +`// RUN: daphne-opt --lower-ew %s | FileCheck %s`, means that `daphne-opt` is +called with the `--lower-ew` flag and the current file as input, the output of +that, in addition to the file itself, is piped to `FileCheck`. `FileCheck` uses +the comments in the `.mlir` file to check for certain conditions, e.g., `// +CHECK-NOT: daphne.ewAdd` looks through the IR and fails if `daphne.ewAdd` can be +found. These `llvm-lit` tests are all run by the `codegen` testcase in +`test/codegen/Codegen.cpp`. + + +All codegen tests can be executed by running `bin/run_tests '[codegen]'`. diff --git a/doc/GettingStarted.md b/doc/GettingStarted.md index 98d02b0f0..20d072d05 100644 --- a/doc/GettingStarted.md +++ b/doc/GettingStarted.md @@ -42,29 +42,30 @@ launching DAPHNE via Docker (see below) should work the same way as in a native ### Software -| tool/lib | version known to work (*) | comment | -|--------------------------------------|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------| -| GCC/G++ | 9.3.0 | Last checked version: 12.2 | -| clang | 10.0.0 | | -| cmake | 3.20 | On Ubuntu 20.04, install by `sudo snap install cmake --classic` to fulfill the version requirement; `apt` provides only version 3.16.3. | -| git | 2.25.1 | | -| libssl-dev | 1.1.1 | Dependency introduced while optimizing grpc build (which used to build ssl unnecessarily) | -| libpfm4-dev | 4.10 | This dependency is needed for profiling support [DAPHNE-#479] | -| lld | 10.0.0 | | -| ninja | 1.10.0 | | -| pkg-config | 0.29.1 | | -| python3 | 3.8.5 | | -| numpy | 1.19.5 | | -| pandas | 0.25.3 | | -| java (e.g. openjdk) | 11 (1.7 should be fine) | | -| gfortran | 9.3.0 | | -| uuid-dev | | | -| wget | | Used to fetch additional dependencies and other artefacts | -| jq | | json commandline processor used in docker image generation scripts | -| *** | *** | *** | -| CUDA SDK | 11.7.1 | Optional for CUDA ops | -| OneAPI SDK | 2022.x | Optional for OneAPI ops | -| Intel FPGA SDK or OneAPI FPGA Add-On | 2022.x | Optional for FPGAOPENCL ops | +| tool/lib | version known to work (*) | comment | +|--------------------------------------|------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------| +| GCC/G++ | 9.3.0 | Last checked version: 12.2 | +| clang | 10.0.0 | | +| cmake | 3.20 | On Ubuntu 20.04, install by `sudo snap install cmake --classic` to fulfill the version requirement; `apt` provides only version 3.16.3. | +| git | 2.25.1 | | +| libssl-dev | 1.1.1 | Dependency introduced while optimizing grpc build (which used to build ssl unnecessarily) | +| libpfm4-dev | 4.10 | This dependency is needed for profiling support [DAPHNE-#479] | +| lld | 10.0.0 | | +| ninja | 1.10.0 | | +| pkg-config | 0.29.1 | | +| python3 | 3.8.5 | | +| numpy | 1.19.5 | | +| pandas | 0.25.3 | | +| java (e.g. openjdk) | 11 (1.7 should be fine) | | +| gfortran | 9.3.0 | | +| uuid-dev | | | +| llvm-10-tools | 10, 15 | On Ubuntu 22.04 you may need to install a newer `llvm-*-tools` version, such as `llvm-15-tools`. | +| wget | | Used to fetch additional dependencies and other artefacts | +| jq | | json commandline processor used in docker image generation scripts | +| *** | *** | *** | +| CUDA SDK | 11.7.1 | Optional for CUDA ops | +| OneAPI SDK | 2022.x | Optional for OneAPI ops | +| Intel FPGA SDK or OneAPI FPGA Add-On | 2022.x | Optional for FPGAOPENCL ops | ### Hardware diff --git a/install-ubuntu-packages.sh b/install-ubuntu-packages.sh index 8281493d9..6644005db 100644 --- a/install-ubuntu-packages.sh +++ b/install-ubuntu-packages.sh @@ -15,5 +15,6 @@ # limitations under the License. # This is a convenience script to install the required packages on Ubuntu 20+ systems to compile DAPHNE +# On Ubuntu 22.04 you may need to change the version of llvm-10-tools to a newer one, such as llvm-15-tools. sudo apt install build-essential clang cmake git libssl-dev libpfm4-dev lld ninja-build pkg-config python3-numpy \ - python3-pandas default-jdk-headless gfortran uuid-dev wget unzip jq + python3-pandas default-jdk-headless gfortran uuid-dev wget unzip jq llvm-10-tools diff --git a/src/api/cli/DaphneUserConfig.h b/src/api/cli/DaphneUserConfig.h index 92a5e6b23..3b7a2de93 100644 --- a/src/api/cli/DaphneUserConfig.h +++ b/src/api/cli/DaphneUserConfig.h @@ -42,6 +42,8 @@ struct DaphneUserConfig { bool use_obj_ref_mgnt = true; bool use_ipa_const_propa = true; bool use_phy_op_selection = true; + bool use_mlir_codegen = false; + bool use_mlir_hybrid_codegen = false; bool cuda_fuse_any = false; bool vectorized_single_queue = false; bool prePartitionRows = false; @@ -63,6 +65,8 @@ struct DaphneUserConfig { bool explain_type_adaptation = false; bool explain_vectorized = false; bool explain_obj_ref_mgnt = false; + bool explain_mlir_codegen = false; + SelfSchedulingScheme taskPartitioningScheme = STATIC; QueueTypeOption queueSetupScheme = CENTRALIZED; VictimSelectionLogic victimSelection = SEQPRI; diff --git a/src/api/internal/daphne_internal.cpp b/src/api/internal/daphne_internal.cpp index 138012c79..5ba81007c 100644 --- a/src/api/internal/daphne_internal.cpp +++ b/src/api/internal/daphne_internal.cpp @@ -256,6 +256,14 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int "libdir", cat(daphneOptions), desc("The directory containing kernel libraries") ); + static opt mlirCodegen( + "mlir-codegen", cat(daphneOptions), + desc("Enables lowering of certain DaphneIR operations on DenseMatrix to low-level MLIR operations.") + ); + static opt performHybridCodegen( + "mlir-hybrid-codegen", cat(daphneOptions), + desc("Enables prototypical hybrid code generation combining pre-compiled kernels and MLIR code generation.") + ); enum ExplainArgs { kernels, @@ -268,7 +276,8 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int phy_op_selection, type_adaptation, vectorized, - obj_ref_mgnt + obj_ref_mgnt, + mlir_codegen }; static llvm::cl::list explainArgList( @@ -286,7 +295,8 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int clEnumVal(vectorized, "Show DaphneIR after vectorization"), clEnumVal(obj_ref_mgnt, "Show DaphneIR after managing object references"), clEnumVal(kernels, "Show DaphneIR after kernel lowering"), - clEnumVal(llvm, "Show DaphneIR after llvm lowering")), + clEnumVal(llvm, "Show DaphneIR after llvm lowering"), + clEnumVal(mlir_codegen, "Show DaphneIR after MLIR codegen")), CommaSeparated); static llvm::cl::list scriptArgs1( @@ -367,6 +377,9 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int user_config.use_obj_ref_mgnt = !noObjRefMgnt; user_config.use_ipa_const_propa = !noIPAConstPropa; user_config.use_phy_op_selection = !noPhyOpSelection; + user_config.use_mlir_codegen = mlirCodegen; + user_config.use_mlir_hybrid_codegen = performHybridCodegen; + if(!libDir.getValue().empty()) user_config.libdir = libDir.getValue(); user_config.library_paths.push_back(user_config.libdir + "/libAllKernels.so"); @@ -428,6 +441,9 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int case obj_ref_mgnt: user_config.explain_obj_ref_mgnt = true; break; + case mlir_codegen: + user_config.explain_mlir_codegen = true; + break; } } diff --git a/src/compiler/execution/DaphneIrExecutor.cpp b/src/compiler/execution/DaphneIrExecutor.cpp index 2376ad20b..1c5ab19f5 100644 --- a/src/compiler/execution/DaphneIrExecutor.cpp +++ b/src/compiler/execution/DaphneIrExecutor.cpp @@ -14,234 +14,310 @@ * limitations under the License. */ +#include "DaphneIrExecutor.h" + #include #include -#include "DaphneIrExecutor.h" +#include +#include + +#include +#include +#include #include "llvm/Support/TargetSelect.h" +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" +#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h" +#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h" +#include "mlir/Conversion/MathToLLVM/MathToLLVM.h" #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Bufferization/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/ExecutionEngine/ExecutionEngine.h" #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/IR/BuiltinOps.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Transforms/Passes.h" -#include -#include #include "mlir/Support/LogicalResult.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Transforms/Passes.h" -#include -#include -#include - -DaphneIrExecutor::DaphneIrExecutor(bool selectMatrixRepresentations, DaphneUserConfig cfg) : userConfig_(std::move(cfg)), - selectMatrixRepresentations_(selectMatrixRepresentations) { +DaphneIrExecutor::DaphneIrExecutor(bool selectMatrixRepresentations, + DaphneUserConfig cfg) + : userConfig_(std::move(cfg)), + selectMatrixRepresentations_(selectMatrixRepresentations) { // register loggers - if(userConfig_.log_ptr != nullptr) - userConfig_.log_ptr->registerLoggers(); + if (userConfig_.log_ptr != nullptr) userConfig_.log_ptr->registerLoggers(); context_.getOrLoadDialect(); context_.getOrLoadDialect(); context_.getOrLoadDialect(); context_.getOrLoadDialect(); context_.getOrLoadDialect(); + context_.getOrLoadDialect(); + context_.getOrLoadDialect(); + context_.getOrLoadDialect(); + context_.getOrLoadDialect(); llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); } -bool DaphneIrExecutor::runPasses(mlir::ModuleOp module) -{ - // FIXME: operations in `template` functions (functions with unknown inputs) can't be verified +bool DaphneIrExecutor::runPasses(mlir::ModuleOp module) { + // FIXME: operations in `template` functions (functions with unknown inputs) + // can't be verified // as their type constraints are not met. - //if (failed(mlir::verify(module))) { - //module->emitError("failed to verify the module right after parsing"); - //return false; + // if (failed(mlir::verify(module))) { + // module->emitError("failed to verify the module right after parsing"); + // return false; //} - if (module) { - // This flag is really useful to figure out why the lowering failed - llvm::DebugFlag = userConfig_.debug_llvm; - { - mlir::PassManager pm(&context_); - // TODO Enable the verifier for all passes where it is possible. - // Originally, it was only turned off for the SpecializeGenericFunctionsPass. - pm.enableVerifier(false); - - if(userConfig_.explain_parsing) - pm.addPass(mlir::daphne::createPrintIRPass("IR after parsing:")); - - pm.addPass(mlir::createCanonicalizerPass()); - pm.addPass(mlir::createCSEPass()); - if(userConfig_.explain_parsing_simplified) - pm.addPass(mlir::daphne::createPrintIRPass("IR after parsing and some simplifications:")); - - pm.addPass(mlir::daphne::createRewriteSqlOpPass()); // calls SQL Parser - if(userConfig_.explain_sql) - pm.addPass(mlir::daphne::createPrintIRPass("IR after SQL parsing:")); - - pm.addPass(mlir::daphne::createSpecializeGenericFunctionsPass(userConfig_)); - if(userConfig_.explain_property_inference) - pm.addPass(mlir::daphne::createPrintIRPass("IR after inference:")); - - if(failed(pm.run(module))) { - module->dump(); - module->emitError("module pass error"); - return false; - } - } + if (!module) return false; + + // This flag is really useful to figure out why the lowering failed + llvm::DebugFlag = userConfig_.debug_llvm; + { mlir::PassManager pm(&context_); + // TODO Enable the verifier for all passes where it is possible. + // Originally, it was only turned off for the + // SpecializeGenericFunctionsPass. + pm.enableVerifier(false); + + if (userConfig_.explain_parsing) + pm.addPass(mlir::daphne::createPrintIRPass("IR after parsing:")); - // Note that property inference and canonicalization have already been done - // in the SpecializeGenericFunctionsPass, so actually, it's not necessary - // here anymore. - // TODO There is a cyclic dependency between (shape) inference and - // constant folding (included in canonicalization), at the moment we - // run only three iterations of both passes (see #173). - pm.addNestedPass(mlir::daphne::createInferencePass()); pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::createCSEPass()); + if (userConfig_.explain_parsing_simplified) + pm.addPass(mlir::daphne::createPrintIRPass( + "IR after parsing and some simplifications:")); - if(selectMatrixRepresentations_) - pm.addNestedPass(mlir::daphne::createSelectMatrixRepresentationsPass()); - if(userConfig_.explain_select_matrix_repr) - pm.addPass(mlir::daphne::createPrintIRPass("IR after selecting matrix representations:")); + pm.addPass(mlir::daphne::createRewriteSqlOpPass()); // calls SQL Parser + if (userConfig_.explain_sql) + pm.addPass( + mlir::daphne::createPrintIRPass("IR after SQL parsing:")); - if(userConfig_.use_phy_op_selection) { - pm.addPass(mlir::daphne::createPhyOperatorSelectionPass()); - pm.addPass(mlir::createCSEPass()); + pm.addPass( + mlir::daphne::createSpecializeGenericFunctionsPass(userConfig_)); + if (userConfig_.explain_property_inference) + pm.addPass(mlir::daphne::createPrintIRPass("IR after inference:")); + + if (failed(pm.run(module))) { + module->dump(); + module->emitError("module pass error"); + return false; } - if(userConfig_.explain_phy_op_selection) - pm.addPass(mlir::daphne::createPrintIRPass("IR after selecting physical operators:")); + } - pm.addNestedPass(mlir::daphne::createAdaptTypesToKernelsPass()); - if(userConfig_.explain_type_adaptation) - pm.addPass(mlir::daphne::createPrintIRPass("IR after type adaptation:")); + mlir::PassManager pm(&context_); + // Note that property inference and canonicalization have already been done + // in the SpecializeGenericFunctionsPass, so actually, it's not necessary + // here anymore. + + // TODO There is a cyclic dependency between (shape) inference and + // constant folding (included in canonicalization), at the moment we + // run only three iterations of both passes (see #173). + pm.addNestedPass(mlir::daphne::createInferencePass()); + pm.addPass(mlir::createCanonicalizerPass()); + + if (selectMatrixRepresentations_) + pm.addNestedPass( + mlir::daphne::createSelectMatrixRepresentationsPass()); + if (userConfig_.explain_select_matrix_repr) + pm.addPass(mlir::daphne::createPrintIRPass( + "IR after selecting matrix representations:")); + + if (userConfig_.use_phy_op_selection) { + pm.addPass(mlir::daphne::createPhyOperatorSelectionPass()); + pm.addPass(mlir::createCSEPass()); + } + if (userConfig_.explain_phy_op_selection) + pm.addPass(mlir::daphne::createPrintIRPass( + "IR after selecting physical operators:")); + + pm.addNestedPass( + mlir::daphne::createAdaptTypesToKernelsPass()); + if (userConfig_.explain_type_adaptation) + pm.addPass( + mlir::daphne::createPrintIRPass("IR after type adaptation:")); #if 0 - if (userConfig_.use_distributed) { - pm.addPass(mlir::daphne::createDistributeComputationsPass()); - //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution:")); - pm.addPass(mlir::createCSEPass()); - //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - CSE:")); - pm.addPass(mlir::createCanonicalizerPass()); - //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - canonicalization:")); - pm.addNestedPass(mlir::daphne::createWhileLoopInvariantCodeMotionPass()); - //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - WhileLICM:")); - } + if (userConfig_.use_distributed) { + pm.addPass(mlir::daphne::createDistributeComputationsPass()); + //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution")); + pm.addPass(mlir::createCSEPass()); + //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - CSE")); + pm.addPass(mlir::createCanonicalizerPass()); + //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - canonicalization")); + pm.addNestedPass(mlir::daphne::createWhileLoopInvariantCodeMotionPass()); + //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - WhileLICM")); + } #endif - - // For now, in order to use the distributed runtime we also require the vectorized engine to be enabled - // to create pipelines. Therefore, *if* distributed runtime is enabled, we need to make a vectorization pass. - if(userConfig_.use_vectorized_exec || userConfig_.use_distributed) { - // TODO: add inference here if we have rewrites that could apply to vectorized pipelines due to smaller sizes - pm.addNestedPass(mlir::daphne::createVectorizeComputationsPass()); - pm.addPass(mlir::createCanonicalizerPass()); - } - if(userConfig_.explain_vectorized) - pm.addPass(mlir::daphne::createPrintIRPass("IR after vectorization:")); - - if (userConfig_.use_distributed) - pm.addPass(mlir::daphne::createDistributePipelinesPass()); - if (userConfig_.enable_profiling) - pm.addNestedPass(mlir::daphne::createProfilingPass()); + // For now, in order to use the distributed runtime we also require the + // vectorized engine to be enabled to create pipelines. Therefore, *if* + // distributed runtime is enabled, we need to make a vectorization pass. + if (userConfig_.use_vectorized_exec || userConfig_.use_distributed) { + // TODO: add inference here if we have rewrites that could apply to + // vectorized pipelines due to smaller sizes + pm.addNestedPass( + mlir::daphne::createVectorizeComputationsPass()); + pm.addPass(mlir::createCanonicalizerPass()); + } + if (userConfig_.explain_vectorized) + pm.addPass(mlir::daphne::createPrintIRPass("IR after vectorization:")); - pm.addNestedPass(mlir::daphne::createInsertDaphneContextPass(userConfig_)); + if (userConfig_.use_distributed) + pm.addPass(mlir::daphne::createDistributePipelinesPass()); + + if (userConfig_.use_mlir_codegen || userConfig_.use_mlir_hybrid_codegen) buildCodegenPipeline(pm); + + if (userConfig_.enable_profiling) + pm.addNestedPass( + mlir::daphne::createProfilingPass()); + + pm.addNestedPass( + mlir::daphne::createInsertDaphneContextPass(userConfig_)); #ifdef USE_CUDA - if(userConfig_.use_cuda) - pm.addNestedPass(mlir::daphne::createMarkCUDAOpsPass(userConfig_)); + if (userConfig_.use_cuda) + pm.addNestedPass( + mlir::daphne::createMarkCUDAOpsPass(userConfig_)); #endif #ifdef USE_FPGAOPENCL - if(userConfig_.use_fpgaopencl) - pm.addNestedPass(mlir::daphne::createMarkFPGAOPENCLOpsPass(userConfig_)); + if (userConfig_.use_fpgaopencl) + pm.addNestedPass( + mlir::daphne::createMarkFPGAOPENCLOpsPass(userConfig_)); #endif - // Tidy up the IR before managing object reference counters with IncRefOp and DecRefOp. - // This is important, because otherwise, an SSA value whose references are managed could - // be cleared away by common subexpression elimination (CSE), while retaining its - // IncRefOps/DecRefOps, which could lead to double frees etc. - pm.addPass(mlir::createCanonicalizerPass()); - pm.addPass(mlir::createCSEPass()); + // Tidy up the IR before managing object reference counters with IncRefOp + // and DecRefOp. This is important, because otherwise, an SSA value whose + // references are managed could be cleared away by common subexpression + // elimination (CSE), while retaining its IncRefOps/DecRefOps, which could + // lead to double frees etc. + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::createCSEPass()); - if(userConfig_.use_obj_ref_mgnt) - pm.addNestedPass(mlir::daphne::createManageObjRefsPass()); - if(userConfig_.explain_obj_ref_mgnt) - pm.addPass(mlir::daphne::createPrintIRPass("IR after managing object references:")); + if (userConfig_.use_obj_ref_mgnt) + pm.addNestedPass( + mlir::daphne::createManageObjRefsPass()); + if (userConfig_.explain_obj_ref_mgnt) + pm.addPass(mlir::daphne::createPrintIRPass( + "IR after managing object references:")); - pm.addNestedPass(mlir::daphne::createRewriteToCallKernelOpPass()); - if(userConfig_.explain_kernels) - pm.addPass(mlir::daphne::createPrintIRPass("IR after kernel lowering:")); + pm.addNestedPass( + mlir::daphne::createRewriteToCallKernelOpPass()); + if (userConfig_.explain_kernels) + pm.addPass( + mlir::daphne::createPrintIRPass("IR after kernel lowering:")); - pm.addPass(mlir::createConvertSCFToCFPass()); - pm.addNestedPass(mlir::LLVM::createRequestCWrappersPass()); - pm.addPass(mlir::daphne::createLowerToLLVMPass(userConfig_)); - pm.addPass(mlir::createReconcileUnrealizedCastsPass()); - if(userConfig_.explain_llvm) - pm.addPass(mlir::daphne::createPrintIRPass("IR after llvm lowering:")); + pm.addPass(mlir::createConvertSCFToCFPass()); + pm.addNestedPass( + mlir::LLVM::createRequestCWrappersPass()); + pm.addPass(mlir::daphne::createLowerToLLVMPass(userConfig_)); + pm.addPass(mlir::createReconcileUnrealizedCastsPass()); + if (userConfig_.explain_llvm) + pm.addPass(mlir::daphne::createPrintIRPass("IR after llvm lowering:")); - if (failed(pm.run(module))) { - module->dump(); - module->emitError("module pass error"); - return false; - } - return true; + if (failed(pm.run(module))) { + module->dump(); + module->emitError("module pass error"); + return false; } - return false; + + return true; } -std::unique_ptr DaphneIrExecutor::createExecutionEngine(mlir::ModuleOp module) -{ - if (module) { - // An optimization pipeline to use within the execution engine. - auto optPipeline = mlir::makeOptimizingTransformer(0, 0, nullptr); - std::vector sharedLibRefs; - // This next line adds to our Linux platform lock-in - std::string daphne_executable_dir(std::filesystem::canonical("/proc/self/exe").parent_path()); - if(userConfig_.libdir.empty()) { - sharedLibRefPaths.push_back(std::string(daphne_executable_dir + "/../lib/libAllKernels.so")); - sharedLibRefs.emplace_back(sharedLibRefPaths.back()); - } - else { - sharedLibRefs.insert(sharedLibRefs.end(), userConfig_.library_paths.begin(), userConfig_.library_paths.end()); - } +std::unique_ptr DaphneIrExecutor::createExecutionEngine( + mlir::ModuleOp module) { + if (!module) return nullptr; + // An optimization pipeline to use within the execution engine. + unsigned optLevel = 0; + unsigned sizeLevel = 0; + llvm::TargetMachine *targetMachine = nullptr; + auto optPipeline = mlir::makeOptimizingTransformer(optLevel, sizeLevel, targetMachine); + std::vector sharedLibRefs; + // This next line adds to our Linux platform lock-in + std::string daphne_executable_dir( + std::filesystem::canonical("/proc/self/exe").parent_path()); + if (userConfig_.libdir.empty()) { + sharedLibRefPaths.push_back( + std::string(daphne_executable_dir + "/../lib/libAllKernels.so")); + sharedLibRefs.emplace_back(sharedLibRefPaths.back()); + } else { + sharedLibRefs.insert(sharedLibRefs.end(), + userConfig_.library_paths.begin(), + userConfig_.library_paths.end()); + } #ifdef USE_CUDA - if(userConfig_.use_cuda) { - sharedLibRefPaths.push_back(std::string(daphne_executable_dir + "/../lib/libCUDAKernels.so")); - sharedLibRefs.emplace_back(sharedLibRefPaths.back()); - } + if (userConfig_.use_cuda) { + sharedLibRefPaths.push_back( + std::string(daphne_executable_dir + "/../lib/libCUDAKernels.so")); + sharedLibRefs.emplace_back(sharedLibRefPaths.back()); + } #endif - + #ifdef USE_FPGAOPENCL - if(userConfig_.use_fpgaopencl) { - sharedLibRefPaths.push_back(std::string(daphne_executable_dir + "/../lib/libFPGAOPENCLKernels.so")); - sharedLibRefs.emplace_back(sharedLibRefPaths.back()); - } + if (userConfig_.use_fpgaopencl) { + sharedLibRefPaths.push_back(std::string( + daphne_executable_dir + "/../lib/libFPGAOPENCLKernels.so")); + sharedLibRefs.emplace_back(sharedLibRefPaths.back()); + } #endif - registerLLVMDialectTranslation(context_); - // module.dump(); - mlir::ExecutionEngineOptions options; - options.llvmModuleBuilder = nullptr; - options.transformer = optPipeline; - options.jitCodeGenOptLevel = llvm::CodeGenOpt::Level::Default; - options.sharedLibPaths = llvm::ArrayRef(sharedLibRefs); - options.enableObjectDump = true; - options.enableGDBNotificationListener = true; - options.enablePerfNotificationListener = true; - auto maybeEngine = mlir::ExecutionEngine::create(module, options); - - if (!maybeEngine) { - llvm::errs() << "Failed to create JIT-Execution engine: " - << maybeEngine.takeError(); - return nullptr; - } - return std::move(maybeEngine.get()); + registerLLVMDialectTranslation(context_); + // module.dump(); + mlir::ExecutionEngineOptions options; + options.llvmModuleBuilder = nullptr; + options.transformer = optPipeline; + options.jitCodeGenOptLevel = llvm::CodeGenOpt::Level::Default; + options.sharedLibPaths = llvm::ArrayRef(sharedLibRefs); + options.enableObjectDump = true; + options.enableGDBNotificationListener = true; + options.enablePerfNotificationListener = true; + auto maybeEngine = mlir::ExecutionEngine::create(module, options); + + if (!maybeEngine) { + llvm::errs() << "Failed to create JIT-Execution engine: " + << maybeEngine.takeError(); + return nullptr; } - return nullptr; + return std::move(maybeEngine.get()); +} + +void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) { + if (userConfig_.explain_mlir_codegen) + pm.addPass( + mlir::daphne::createPrintIRPass("IR before codegen pipeline")); + + pm.addPass(mlir::daphne::createDaphneOptPass()); + + if (!userConfig_.use_mlir_hybrid_codegen) { + pm.addPass(mlir::daphne::createMatMulOpLoweringPass()); + } + + pm.addPass(mlir::daphne::createAggAllOpLoweringPass()); + pm.addPass(mlir::daphne::createMapOpLoweringPass()); + pm.addPass(mlir::createInlinerPass()); + + pm.addPass(mlir::daphne::createEwOpLoweringPass()); + pm.addPass(mlir::createConvertMathToLLVMPass()); + pm.addPass(mlir::daphne::createModOpLoweringPass()); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::createCSEPass()); + pm.addNestedPass(mlir::createLoopFusionPass()); + pm.addNestedPass( + mlir::createAffineScalarReplacementPass()); + pm.addPass(mlir::createLowerAffinePass()); + + if (userConfig_.explain_mlir_codegen) + pm.addPass( + mlir::daphne::createPrintIRPass("IR after codegen pipeline")); } diff --git a/src/compiler/execution/DaphneIrExecutor.h b/src/compiler/execution/DaphneIrExecutor.h index 05d32d7b1..ef1c32d13 100644 --- a/src/compiler/execution/DaphneIrExecutor.h +++ b/src/compiler/execution/DaphneIrExecutor.h @@ -19,6 +19,7 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/ExecutionEngine/ExecutionEngine.h" #include +#include "mlir/Pass/PassManager.h" class DaphneIrExecutor { @@ -36,5 +37,7 @@ class DaphneIrExecutor bool selectMatrixRepresentations_; // Storage for lib paths needed for StringRefs std::vector sharedLibRefPaths; + + void buildCodegenPipeline(mlir::PassManager &); }; diff --git a/src/compiler/explanation/PrintIRPass.cpp b/src/compiler/explanation/PrintIRPass.cpp index 6dabf88d6..3adf1bf5b 100644 --- a/src/compiler/explanation/PrintIRPass.cpp +++ b/src/compiler/explanation/PrintIRPass.cpp @@ -17,34 +17,39 @@ #include #include -#include #include +#include using namespace mlir; /** * @brief A compiler pass that simply prints the IR. - * + * * Useful for manual testing and debugging, since this pass can easily be * integrated after any other pass to have a look at the IR. */ class PrintIRPass : public PassWrapper> { - std::string message; - -public: - PrintIRPass(const std::string message) : message(message) { - // - } - + + public: + PrintIRPass(const std::string message) : message(message) {} + void runOnOperation() final; + + StringRef getArgument() const final { return "print-ir"; } + StringRef getDescription() const final { + return "Pass for debugging purposes, prints the IR at the current " + "stage in the compilation pipeline."; + } }; void PrintIRPass::runOnOperation() { std::cerr << message << std::endl; - + auto module = getOperation(); - module.dump(); + OpPrintingFlags flags = {}; + flags.enableDebugInfo(/*enable=*/false, /*prettyForm=*/false); + module.print(llvm::errs(), flags); } std::unique_ptr daphne::createPrintIRPass(const std::string message) { diff --git a/src/compiler/inference/AdaptTypesToKernelsPass.cpp b/src/compiler/inference/AdaptTypesToKernelsPass.cpp index 94a261866..22812ba9d 100644 --- a/src/compiler/inference/AdaptTypesToKernelsPass.cpp +++ b/src/compiler/inference/AdaptTypesToKernelsPass.cpp @@ -40,6 +40,10 @@ using namespace mlir; struct AdaptTypesToKernelsPass : public PassWrapper> { void runOnOperation() final; + StringRef getArgument() const final { return "adapt-types-to-kernels"; } + StringRef getDescription() const final { + return "TODO"; + } }; void AdaptTypesToKernelsPass::runOnOperation() diff --git a/src/compiler/inference/InferencePass.cpp b/src/compiler/inference/InferencePass.cpp index 414a2afb6..c0af79ac0 100644 --- a/src/compiler/inference/InferencePass.cpp +++ b/src/compiler/inference/InferencePass.cpp @@ -519,8 +519,11 @@ class InferencePass : public PassWrapper daphne::createInferencePass(daphne::InferenceConfig cfg) { return std::make_unique(cfg); -} \ No newline at end of file +} diff --git a/src/compiler/inference/SelectMatrixRepresentationsPass.cpp b/src/compiler/inference/SelectMatrixRepresentationsPass.cpp index 11f74280b..9049b0a9c 100644 --- a/src/compiler/inference/SelectMatrixRepresentationsPass.cpp +++ b/src/compiler/inference/SelectMatrixRepresentationsPass.cpp @@ -161,6 +161,9 @@ class SelectMatrixRepresentationsPass : public PassWrappergetOperandTypes())); } + StringRef getArgument() const final { return "select-matrix-representations"; } + StringRef getDescription() const final { return "TODO"; } + static bool returnsKnownProperties(Operation *op) { return llvm::any_of(op->getResultTypes(), [](Type rt) { if(auto mt = rt.dyn_cast()) @@ -172,4 +175,4 @@ class SelectMatrixRepresentationsPass : public PassWrapper daphne::createSelectMatrixRepresentationsPass() { return std::make_unique(); -} \ No newline at end of file +} diff --git a/src/compiler/lowering/AggAllOpLowering.cpp b/src/compiler/lowering/AggAllOpLowering.cpp new file mode 100644 index 000000000..f3f16f861 --- /dev/null +++ b/src/compiler/lowering/AggAllOpLowering.cpp @@ -0,0 +1,180 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "compiler/utils/CompilerUtils.h" +#include "compiler/utils/LoweringUtils.h" +#include "ir/daphneir/Daphne.h" +#include "ir/daphneir/Passes.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/ArrayRef.h" +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" +#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" +#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" +#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" +#include "mlir/Conversion/LLVMCommon/ConversionTarget.h" +#include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h" +#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" +#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/Transforms/FuncConversions.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/UseDefLists.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +class SumAllOpLowering : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite( + daphne::AllAggSumOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + mlir::daphne::MatrixType matrixType = + adaptor.getArg().getType().dyn_cast(); + + auto loc = op->getLoc(); + auto nR = matrixType.getNumRows(); + auto nC = matrixType.getNumCols(); + + auto matrixElementType = matrixType.getElementType(); + auto memRefType = mlir::MemRefType::get({nR, nC}, matrixElementType); + auto memRef = rewriter.create( + op->getLoc(), memRefType, adaptor.getArg()); + + Value sum = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + + SmallVector loopIvs; + SmallVector forOps; + auto outerLoop = + rewriter.create(loc, 0, nR, 1, ValueRange{sum}); + for (Operation &nested : *outerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + loopIvs.push_back(outerLoop.getInductionVar()); + // outer loop body + rewriter.setInsertionPointToStart(outerLoop.getBody()); + Value sum_iter = rewriter.create( + loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0)); + // inner loop + auto innerLoop = + rewriter.create(loc, 0, nC, 1, ValueRange{sum_iter}); + for (Operation &nested : *innerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + loopIvs.push_back(innerLoop.getInductionVar()); + // inner loop body + rewriter.setInsertionPointToStart(innerLoop.getBody()); + // load value from memref + auto elementLoad = + rewriter.create(loc, memRef, loopIvs); + // sum loop iter arg and memref value + mlir::Value inner_sum = rewriter.create( + loc, innerLoop.getRegionIterArgs()[0], elementLoad); + // yield inner loop result + rewriter.setInsertionPointToEnd(innerLoop.getBody()); + rewriter.create(loc, inner_sum); + // yield outer loop result + rewriter.setInsertionPointToEnd(outerLoop.getBody()); + mlir::Value outer_sum = rewriter.create( + loc, outerLoop.getRegionIterArgs()[0], innerLoop.getResult(0)); + rewriter.create(loc, outer_sum); + + rewriter.setInsertionPointAfter(outerLoop); + rewriter.create(loc, adaptor.getArg()); + // replace sumAll op with result of loops + rewriter.replaceOp(op, outerLoop.getResult(0)); + + return success(); + } +}; + +namespace { +/** + * @brief Lowers the daphne::AggAll operator to a set of affine loops and + * performs the aggregation on a MemRef which is created from the input + * DenseMatrix. + * + * This rewrite may enable loop fusion of the produced affine loops by + * running the loop fusion pass. + */ +struct AggAllLoweringPass + : public mlir::PassWrapper> { + explicit AggAllLoweringPass() {} + + StringRef getArgument() const final { return "lower-agg"; } + StringRef getDescription() const final { + return "Lowers AggAll operators to a set of affine loops and performs " + "the aggregation on a MemRef which is created from the input " + "DenseMatrix."; + } + + void getDependentDialects(mlir::DialectRegistry ®istry) const override { + registry.insert(); + } + void runOnOperation() final; +}; +} // end anonymous namespace + +void AggAllLoweringPass::runOnOperation() { + mlir::ConversionTarget target(getContext()); + mlir::RewritePatternSet patterns(&getContext()); + LowerToLLVMOptions llvmOptions(&getContext()); + LLVMTypeConverter typeConverter(&getContext(), llvmOptions); + + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + + target.addLegalOp(); + target.addLegalOp(); + target.addLegalOp(); + + target.addIllegalOp(); + + patterns.insert(&getContext()); + auto module = getOperation(); + if (failed(applyPartialConversion(module, target, std::move(patterns)))) { + signalPassFailure(); + } +} + +std::unique_ptr mlir::daphne::createAggAllOpLoweringPass() { + return std::make_unique(); +} diff --git a/src/compiler/lowering/CMakeLists.txt b/src/compiler/lowering/CMakeLists.txt index 0484a8b5c..6b9ac25af 100644 --- a/src/compiler/lowering/CMakeLists.txt +++ b/src/compiler/lowering/CMakeLists.txt @@ -27,6 +27,12 @@ add_mlir_dialect_library(MLIRDaphneTransforms SpecializeGenericFunctionsPass.cpp VectorizeComputationsPass.cpp WhileLoopInvariantCodeMotionPass.cpp + DaphneOptPass.cpp + EwOpsLowering.cpp + ModOpLowering.cpp + MapOpLowering.cpp + MatMulOpLowering.cpp + AggAllOpLowering.cpp DEPENDS MLIRDaphneOpsIncGen @@ -35,9 +41,14 @@ add_mlir_dialect_library(MLIRDaphneTransforms LINK_COMPONENTS Core ) + target_link_libraries(MLIRDaphneTransforms PUBLIC CompilerUtils + MLIRSCFToControlFlow MLIRArithToLLVM + MLIRMemRefToLLVM + MLIRAffineToStandard + MLIRLinalgToStandard MLIRControlFlowToLLVM MLIRFuncToLLVM MLIRFuncTransforms diff --git a/src/compiler/lowering/DaphneOptPass.cpp b/src/compiler/lowering/DaphneOptPass.cpp new file mode 100644 index 000000000..8795962e2 --- /dev/null +++ b/src/compiler/lowering/DaphneOptPass.cpp @@ -0,0 +1,102 @@ +#include "compiler/utils/CompilerUtils.h" +#include "compiler/utils/LoweringUtils.h" +#include "ir/daphneir/Daphne.h" +#include "ir/daphneir/Passes.h" +#include "llvm/Support/Debug.h" +#include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" + +#define DEBUG_TYPE "dm-opt" + +using namespace mlir; + +class IntegerModOpt : public mlir::OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + [[nodiscard]] static bool optimization_viable(mlir::daphne::EwModOp op) { + if (!op.getRhs().getType().isUnsignedInteger()) return false; + + std::pair isConstant = + CompilerUtils::isConstant(op.getRhs()); + // Apply (lhs % rhs) to (lhs & (rhs - 1)) optimization when rhs is a power of two + return isConstant.first && (isConstant.second & (isConstant.second - 1)) == 0; + } + + mlir::LogicalResult matchAndRewrite( + mlir::daphne::EwModOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Value cst_one = rewriter.create( + op.getLoc(), static_cast(1)); + mlir::Value sub = rewriter.create( + op.getLoc(), adaptor.getRhs(), cst_one); + mlir::Value andOp = rewriter.create( + op.getLoc(), adaptor.getLhs(), sub); + rewriter.replaceOp(op, andOp); + return success(); + } +}; + +namespace { +/** + * @brief This pass transforms operations (currently limited to the EwModOp) in + * the DaphneDialect to a different set of operations also from the + * DaphneDialect. + */ +struct DenseMatrixOptPass + : public mlir::PassWrapper> { + explicit DenseMatrixOptPass() {} + + void getDependentDialects(mlir::DialectRegistry ®istry) const override { + registry.insert(); + } + void runOnOperation() final; + + StringRef getArgument() const final { return "opt-daphne"; } + StringRef getDescription() const final { + return "Performs optimizations on the DaphneIR by transforming " + "operations in the DaphneDialect to a set of other operation " + "also from the DaphneDialect."; + } +}; +} // end anonymous namespace + +void DenseMatrixOptPass::runOnOperation() { + mlir::ConversionTarget target(getContext()); + mlir::RewritePatternSet patterns(&getContext()); + mlir::LowerToLLVMOptions llvmOptions(&getContext()); + mlir::LLVMTypeConverter typeConverter(&getContext(), llvmOptions); + + typeConverter.addConversion([](Type type) { return type; }); + + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + + target.addDynamicallyLegalOp( + [&](mlir::daphne::EwModOp op) { + return !IntegerModOpt::optimization_viable(op); + }); + + patterns.insert(typeConverter, &getContext()); + + auto module = getOperation(); + if (failed(applyPartialConversion(module, target, std::move(patterns)))) { + signalPassFailure(); + } +} + +std::unique_ptr mlir::daphne::createDaphneOptPass() { + return std::make_unique(); +} diff --git a/src/compiler/lowering/DistributeComputationsPass.cpp b/src/compiler/lowering/DistributeComputationsPass.cpp index d57a00a62..09b57a9a2 100644 --- a/src/compiler/lowering/DistributeComputationsPass.cpp +++ b/src/compiler/lowering/DistributeComputationsPass.cpp @@ -73,6 +73,9 @@ struct DistributeComputationsPass : public PassWrapper> { void runOnOperation() final; + + StringRef getArgument() const final { return "distribute-computation"; } + StringRef getDescription() const final { return "TODO"; } }; } diff --git a/src/compiler/lowering/DistributePipelinesPass.cpp b/src/compiler/lowering/DistributePipelinesPass.cpp index ae4ce4698..d4ea14468 100644 --- a/src/compiler/lowering/DistributePipelinesPass.cpp +++ b/src/compiler/lowering/DistributePipelinesPass.cpp @@ -67,6 +67,9 @@ struct DistributePipelinesPass : public PassWrapper> { void runOnOperation() final; + + StringRef getArgument() const final { return "distribute-pipelines"; } + StringRef getDescription() const final { return "TODO"; } }; void DistributePipelinesPass::runOnOperation() diff --git a/src/compiler/lowering/EwOpsLowering.cpp b/src/compiler/lowering/EwOpsLowering.cpp new file mode 100644 index 000000000..d892fdfe8 --- /dev/null +++ b/src/compiler/lowering/EwOpsLowering.cpp @@ -0,0 +1,344 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "compiler/utils/CompilerUtils.h" +#include "compiler/utils/LoweringUtils.h" +#include "ir/daphneir/Daphne.h" +#include "ir/daphneir/Passes.h" +#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" +#include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/UseDefLists.h" +#include "mlir/IR/Value.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +template +struct UnaryOpLowering : public mlir::OpConversionPattern { + using OpAdaptor = typename mlir::OpConversionPattern::OpAdaptor; + + public: + UnaryOpLowering(mlir::TypeConverter &typeConverter, mlir::MLIRContext *ctx) + : mlir::OpConversionPattern(typeConverter, ctx) { + this->setDebugName("EwDaphneOpsLowering"); + } + + mlir::LogicalResult matchAndRewrite( + UnaryOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Type type = op.getType(); + + if (type.isa()) { + rewriter.replaceOpWithNewOp(op.getOperation(), + adaptor.getOperands()); + } else if (type.isa()) { + rewriter.replaceOpWithNewOp(op.getOperation(), + adaptor.getOperands()); + } else { + return mlir::failure(); + } + return mlir::success(); + } +}; + +template +class BinaryOpLowering final : public mlir::OpConversionPattern { + using OpAdaptor = typename mlir::OpConversionPattern::OpAdaptor; + + public: + BinaryOpLowering(mlir::TypeConverter &typeConverter, mlir::MLIRContext *ctx) + : mlir::OpConversionPattern(typeConverter, ctx) { + this->setDebugName("EwDaphneOpLowering"); + } + + mlir::LogicalResult convertEwScalar( + BinaryOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + auto lhs = adaptor.getLhs(); + auto rhs = adaptor.getRhs(); + auto loc = op.getLoc(); + + if (lhs.getType().template isa() && + rhs.getType().template isa()) { + rewriter.replaceOpWithNewOp(op.getOperation(), + adaptor.getOperands()); + return mlir::success(); + } + + Value castedLhs = this->typeConverter->materializeTargetConversion( + rewriter, loc, + rewriter.getIntegerType( + adaptor.getRhs().getType().getIntOrFloatBitWidth()), + ValueRange{adaptor.getLhs()}); + + Value castedRhs = this->typeConverter->materializeTargetConversion( + rewriter, loc, + rewriter.getIntegerType( + adaptor.getRhs().getType().getIntOrFloatBitWidth()), + ValueRange{adaptor.getRhs()}); + + Value binaryOp = rewriter.create(loc, castedLhs, castedRhs); + + Value res = this->typeConverter->materializeSourceConversion( + rewriter, loc, lhs.getType(), ValueRange{binaryOp}); + + rewriter.replaceOp(op, res); + return mlir::success(); + } + + mlir::LogicalResult matchAndRewrite( + BinaryOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + auto lhs = adaptor.getLhs(); + auto rhs = adaptor.getRhs(); + + // no matrix + if (!lhs.getType().template isa() && + !rhs.getType().template isa()) + return convertEwScalar(op, adaptor, rewriter); + + // for now assume matrix is LHS and RHS is non matrix + mlir::daphne::MatrixType lhsMatrixType = + adaptor.getLhs() + .getType() + .template dyn_cast(); + auto matrixElementType = lhsMatrixType.getElementType(); + auto lhsRows = lhsMatrixType.getNumRows(); + auto lhsCols = lhsMatrixType.getNumCols(); + auto lhsMemRefType = + mlir::MemRefType::get({lhsRows, lhsCols}, matrixElementType); + + mlir::Type elementType{}; + mlir::Value memRefLhs = + rewriter.create( + op->getLoc(), lhsMemRefType, adaptor.getLhs()); + + mlir::Value memRefRhs{}; + bool isMatrixMatrix = + rhs.getType().template isa(); + + if (isMatrixMatrix) { + memRefRhs = + rewriter.create( + op->getLoc(), lhsMemRefType, adaptor.getRhs()); + elementType = lhsMemRefType.getElementType(); + } else { + elementType = rhs.getType(); + } + + mlir::Value outputMemRef = + insertMemRefAlloc(lhsMemRefType, op->getLoc(), rewriter); + + SmallVector lowerBounds(/*Rank=*/2, /*Value=*/0); + SmallVector steps(/*Rank=*/2, /*Value=*/1); + buildAffineLoopNest( + rewriter, op.getLoc(), lowerBounds, + {lhsMatrixType.getNumRows(), lhsMatrixType.getNumCols()}, steps, + [&](OpBuilder &nestedBuilder, Location loc, ValueRange ivs) { + mlir::Value loadLhs = + nestedBuilder.create(loc, memRefLhs, ivs); + mlir::Value binaryOp{}; + + if (adaptor.getRhs() + .getType() + .template isa()) { + binaryOp = nestedBuilder.create(loc, loadLhs, + adaptor.getRhs()); + + nestedBuilder.create(loc, binaryOp, + outputMemRef, ivs); + return; + } + + mlir::Value rhs{}; + if (isMatrixMatrix) + rhs = + nestedBuilder.create(loc, memRefRhs, ivs); + else + rhs = adaptor.getRhs(); + + // is integer + if (elementType.isInteger( + elementType.getIntOrFloatBitWidth())) { + Value castedLhs = + this->typeConverter->materializeTargetConversion( + nestedBuilder, loc, + nestedBuilder.getIntegerType( + lhsMemRefType.getElementTypeBitWidth()), + ValueRange{loadLhs}); + + Value castedRhs = + this->typeConverter->materializeTargetConversion( + nestedBuilder, loc, + nestedBuilder.getIntegerType( + lhsMemRefType.getElementTypeBitWidth()), + ValueRange{rhs}); + + binaryOp = + nestedBuilder.create(loc, castedLhs, castedRhs); + Value castedRes = + this->typeConverter->materializeSourceConversion( + nestedBuilder, loc, elementType, + ValueRange{binaryOp}); + nestedBuilder.create(loc, castedRes, + outputMemRef, ivs); + } else { + // is float + binaryOp = nestedBuilder.create(loc, loadLhs, rhs); + nestedBuilder.create(loc, binaryOp, + outputMemRef, ivs); + } + }); + mlir::Value output = convertMemRefToDenseMatrix( + op->getLoc(), rewriter, outputMemRef, op.getType()); + + rewriter.replaceOp(op, output); + return mlir::success(); + } +}; + +// clang-format off +// math::sqrt only supports floating point, DAPHNE promotes argument type of sqrt to f32/64 +using SqrtOpLowering = UnaryOpLowering; +using AbsOpLowering = UnaryOpLowering; +using AddOpLowering = BinaryOpLowering; +using SubOpLowering = BinaryOpLowering; +using MulOpLowering = BinaryOpLowering; +using DivOpLowering = BinaryOpLowering; +using PowOpLowering = BinaryOpLowering; +// clang-format on + +namespace { +/** + * @brief This pass lowers element-wise operations to affine loop + * structures and arithmetic operations. + * + * This rewrite may enable loop fusion of the produced affine loops by + * running the loop fusion pass. + */ +struct EwOpLoweringPass + : public mlir::PassWrapper> { + explicit EwOpLoweringPass() {} + + void getDependentDialects(mlir::DialectRegistry ®istry) const override { + registry.insert(); + } + void runOnOperation() final; + + StringRef getArgument() const final { return "lower-ew"; } + StringRef getDescription() const final { + return "This pass lowers element-wise operations to affine-loop " + "structures and arithmetic operations."; + } +}; +} // end anonymous namespace + +void populateLowerEwOpConversionPatterns(mlir::LLVMTypeConverter &typeConverter, + mlir::RewritePatternSet &patterns) { + // clang-format off + patterns.insert< + AddOpLowering, + SubOpLowering, + MulOpLowering, + SqrtOpLowering, + AbsOpLowering, + DivOpLowering, + PowOpLowering>(typeConverter, patterns.getContext()); + // clang-format on +} + +void EwOpLoweringPass::runOnOperation() { + mlir::ConversionTarget target(getContext()); + mlir::RewritePatternSet patterns(&getContext()); + mlir::LowerToLLVMOptions llvmOptions(&getContext()); + mlir::LLVMTypeConverter typeConverter(&getContext(), llvmOptions); + + typeConverter.addConversion(convertInteger); + typeConverter.addConversion(convertFloat); + typeConverter.addConversion([](Type type) { return type; }); + typeConverter.addArgumentMaterialization(materializeCastFromIllegal); + typeConverter.addSourceMaterialization(materializeCastToIllegal); + typeConverter.addTargetMaterialization(materializeCastFromIllegal); + + target.addLegalDialect(); + + target.addDynamicallyLegalOp( + [](Operation *op) { + return op->getOperandTypes()[0].isa(); + }); + + target.addDynamicallyLegalOp([](Operation *op) { + if (op->getOperandTypes()[0].isa() && + op->getOperandTypes()[1].isa()) { + mlir::daphne::MatrixType lhs = + op->getOperandTypes()[0] + .template dyn_cast(); + mlir::daphne::MatrixType rhs = + op->getOperandTypes()[1] + .template dyn_cast(); + if (lhs.getNumRows() != rhs.getNumRows() || + lhs.getNumCols() != rhs.getNumCols() || + lhs.getNumRows() == -1 || lhs.getNumCols() == -1) + return true; + + return false; + } + + if (op->getOperandTypes()[0].isa()) { + mlir::daphne::MatrixType lhsMatrixType = + op->getOperandTypes()[0].dyn_cast(); + return lhsMatrixType.getNumRows() == -1 || lhsMatrixType.getNumCols() == -1; + } + + return false; + }); + + populateLowerEwOpConversionPatterns(typeConverter, patterns); + + auto module = getOperation(); + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); +} + +std::unique_ptr mlir::daphne::createEwOpLoweringPass() { + return std::make_unique(); +} diff --git a/src/compiler/lowering/LowerToLLVMPass.cpp b/src/compiler/lowering/LowerToLLVMPass.cpp index 6baa7e4ce..6fd9c975e 100644 --- a/src/compiler/lowering/LowerToLLVMPass.cpp +++ b/src/compiler/lowering/LowerToLLVMPass.cpp @@ -18,16 +18,23 @@ #include "ir/daphneir/Passes.h" #include "compiler/utils/CompilerUtils.h" +#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h" + +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" +#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h" +#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Func/Transforms/FuncConversions.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Transforms/DialectConversion.h" #include @@ -41,35 +48,6 @@ using namespace mlir; // be combined into a single variadic result. const std::string ATTR_HASVARIADICRESULTS = "hasVariadicResults"; -#if 0 -// At the moment, all of these operations are lowered to kernel calls. -template -struct BinaryOpLowering : public OpConversionPattern -{ - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(BinaryOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override - { - Type type = op.getType(); - if (type.isa()) { - rewriter.replaceOpWithNewOp(op.getOperation(), adaptor.getOperands()); - } - else if (type.isa()) { - rewriter.replaceOpWithNewOp(op.getOperation(), adaptor.getOperands()); - } - else { - return failure(); - } - return success(); - } -}; -using AddOpLowering = BinaryOpLowering; -using SubOpLowering = BinaryOpLowering; -using MulOpLowering = BinaryOpLowering; -#endif - struct ReturnOpLowering : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -308,16 +286,18 @@ class CallKernelOpLowering : public OpConversionPattern auto loc = op.getLoc(); auto inputOutputTypes = getLLVMInputOutputTypes( - loc, rewriter.getContext(), typeConverter, - op.getResultTypes(), ValueRange(adaptor.getOperands()).getTypes(), - hasVarRes, rewriter.getIndexType()); + loc, rewriter.getContext(), typeConverter, op.getResultTypes(), + ValueRange(adaptor.getOperands()).getTypes(), hasVarRes, + rewriter.getIndexType()); // create function protoype and get `FlatSymbolRefAttr` to it auto kernelRef = getOrInsertFunctionAttr( - rewriter, module, op.getCalleeAttr().getValue(), - getKernelFuncSignature(rewriter.getContext(), inputOutputTypes)); + rewriter, module, op.getCalleeAttr().getValue(), + getKernelFuncSignature(rewriter.getContext(), inputOutputTypes)); - auto kernelOperands = allocOutputReferences(loc, rewriter, adaptor.getOperands(), inputOutputTypes, op->getNumResults(), hasVarRes); + auto kernelOperands = allocOutputReferences( + loc, rewriter, adaptor.getOperands(), inputOutputTypes, + op->getNumResults(), hasVarRes); // call function // The kernel call has an empty list of return types, because our @@ -934,6 +914,7 @@ void DaphneLowerToLLVMPass::runOnOperation() RewritePatternSet patterns(&getContext()); LowerToLLVMOptions llvmOptions(&getContext()); + // llvmOptions.useBarePtrCallConv = true; LLVMTypeConverter typeConverter(&getContext(), llvmOptions); typeConverter.addConversion([&](daphne::MatrixType t) { @@ -985,9 +966,13 @@ void DaphneLowerToLLVMPass::runOnOperation() LLVMConversionTarget target(getContext()); // populate dialect conversions - arith::populateArithToLLVMConversionPatterns(typeConverter, patterns); - populateFuncToLLVMConversionPatterns(typeConverter, patterns); + mlir::linalg::populateLinalgToStandardConversionPatterns(patterns); + populateAffineToStdConversionPatterns(patterns); + populateSCFToControlFlowConversionPatterns(patterns); + mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, patterns); + populateFinalizeMemRefToLLVMConversionPatterns(typeConverter, patterns); cf::populateControlFlowToLLVMConversionPatterns(typeConverter, patterns); + populateFuncToLLVMConversionPatterns(typeConverter, patterns); populateReturnOpTypeConversionPattern(patterns, typeConverter); target.addLegalOp(); diff --git a/src/compiler/lowering/ManageObjRefsPass.cpp b/src/compiler/lowering/ManageObjRefsPass.cpp index b819912b6..90120163f 100644 --- a/src/compiler/lowering/ManageObjRefsPass.cpp +++ b/src/compiler/lowering/ManageObjRefsPass.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -26,11 +27,11 @@ using namespace mlir; /** * @brief Inserts DaphneIR operations for managing the reference counters of * runtime data objects. - * + * * Thus, it takes care of freeing data objects (e.g., intermediate results) at * the right points. The operations employed for reference management are * `IncRefOp` and `DecRefOp`. - * + * * The core ideas are: * - We decrease the reference counter of each SSA value (block argument or * op result) to prevent memory leaks. @@ -48,12 +49,23 @@ struct ManageObjRefsPass : public PassWrapper(builder.getUnknownLoc(), + v.getDefiningOp()->getOperand(0)); +} + /** * @brief Inserts a `DecRefOp` in the right place, to decrease the reference * counter of the given value. - * + * * @param builder * @param v */ @@ -62,21 +74,22 @@ void processValue(OpBuilder builder, Value v) { // removed soon anyway). // We only need to manage the reference counters of DAPHNE data objects // like matrices and frames (not of scalars). + + Operation* defOp = v.getDefiningOp(); + if (defOp && llvm::isa(defOp)) + processMemRefInterop(builder, v); + if(!v.getType().isa()) return; - - Operation * defOp = v.getDefiningOp(); - Operation * decRefAfterOp = nullptr; - if(v.use_empty()) { + Operation* decRefAfterOp = nullptr; + if (v.use_empty()) { // If the given SSA value has no uses, we want to decrease its // reference counter directly after its definition (nullptr for block // args). Note that ideally, there should be no unused SSA values. - if(defOp) - decRefAfterOp = defOp; + if (defOp) decRefAfterOp = defOp; // else: decRefAfterOp stays nullptr - } - else { + } else { // If the given SSA value has uses, we need to find the last of them. // Note that the iterator over the uses provided by the value does not // seem to follow any useful order, in general, so we need to find out @@ -85,26 +98,15 @@ void processValue(OpBuilder builder, Value v) { // value in the block where the value was defined, to simplify things. // So if the user of the value is in a descendant block, we need to // find its parent op in the block where the given value is defined. - Operation * lastUseOp = nullptr; - // TODO What about Block::findAncestorInBlock()? - for(OpOperand & use : v.getUses()) { - Operation * thisUseOp = use.getOwner(); - // Find parent op in the block where v is defined. - while(thisUseOp->getBlock() != v.getParentBlock()) - thisUseOp = thisUseOp->getParentOp(); - // Determine if this is a later use. - if(!lastUseOp || lastUseOp->isBeforeInBlock(thisUseOp)) - lastUseOp = thisUseOp; - } - decRefAfterOp = lastUseOp; + decRefAfterOp = findLastUseOfSSAValue(v); } // At this point, decRefAfterOp is nullptr, or the last user of v, or the // defining op of v. - + if(decRefAfterOp) { // The given value is used and/or an OpResult. - + // Don't insert a DecRefOp if the last user is a terminator. if(decRefAfterOp->hasTrait()) // The value is handed out of its block (e.g., return, yield, ...). @@ -116,7 +118,7 @@ void processValue(OpBuilder builder, Value v) { // runtime is on the main branch. // Don't insert a DecRefOp if there is already one. Currently, this can // happen only on the distributed worker, since the IR it gets already - // contains + // contains if(isa(decRefAfterOp)) return; @@ -136,7 +138,7 @@ void processValue(OpBuilder builder, Value v) { else builder.setInsertionPointToStart(pb); } - + // Finally create the DecRefOp. builder.create(builder.getUnknownLoc(), v); } @@ -144,9 +146,9 @@ void processValue(OpBuilder builder, Value v) { /** * @brief Inserts an `IncRefOp` for the given value if its type is a DAPHNE * data type (matrix, frame). - * + * * If the type is unknown, throw an exception. - * + * * @param v * @param b */ @@ -164,7 +166,7 @@ void incRefIfObj(Value v, OpBuilder & b) { /** * @brief Inserts an `IncRefOp` for each operand of the given operation whose * type is a DAPHNE data type (matrix, frame), right before the operation. - * + * * @param op * @param b */ @@ -177,7 +179,7 @@ void incRefArgs(Operation& op, OpBuilder & b) { /** * @brief Manages the reference counters of all values defined in the given * block by inserting `IncRefOp` and `DecRefOp` in the right places. - * + * * @param builder * @param b */ @@ -185,14 +187,14 @@ void processBlock(OpBuilder builder, Block * b) { // Make sure that the reference counters of block arguments are decreased. for(BlockArgument& arg : b->getArguments()) processValue(builder, arg); - + // Make sure the the reference counters of op results are decreased, and // Increase the reference counters of operands where necessary. for(Operation& op : b->getOperations()) { // 1) Increase the reference counters of operands, if necessary. // TODO We could use traits to identify those cases. - + // Casts that will not call a kernel. if(auto co = dyn_cast(op)) { if(co.isTrivialCast() || co.isRemovePropertyCast()) @@ -228,13 +230,13 @@ void processBlock(OpBuilder builder, Block * b) { // Note: We do not increase the reference counters of the arguments // of vectorized pipelines, because internally, a pipeline processes // views into its inputs. These are individual data objects. - - + + // 2) Make sure the the reference counters of op results are decreased. for(Value v : op.getResults()) processValue(builder, v); - - + + // 3) Recurse into the op, if it has regions. for(Region& r : op.getRegions()) for(Block& b2 : r.getBlocks()) @@ -252,4 +254,4 @@ void ManageObjRefsPass::runOnOperation() std::unique_ptr daphne::createManageObjRefsPass() { return std::make_unique(); -} \ No newline at end of file +} diff --git a/src/compiler/lowering/MapOpLowering.cpp b/src/compiler/lowering/MapOpLowering.cpp new file mode 100644 index 000000000..27fff5dcc --- /dev/null +++ b/src/compiler/lowering/MapOpLowering.cpp @@ -0,0 +1,146 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/utils/CompilerUtils.h" +#include "compiler/utils/LoweringUtils.h" +#include "ir/daphneir/Daphne.h" +#include "ir/daphneir/Passes.h" +#include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +class InlineMapOpLowering + : public mlir::OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult matchAndRewrite( + mlir::daphne::MapOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + + mlir::daphne::MatrixType lhsMatrixType = + op->getOperandTypes().front().dyn_cast(); + auto matrixElementType = lhsMatrixType.getElementType(); + auto lhsMemRefType = mlir::MemRefType::get( + {lhsMatrixType.getNumRows(), lhsMatrixType.getNumCols()}, matrixElementType); + + mlir::Value lhs = + rewriter.create( + loc, lhsMemRefType, adaptor.getArg()); + mlir::ModuleOp module = op->getParentOfType(); + func::FuncOp udfFuncOp = + module.lookupSymbol(op.getFunc()); + + SmallVector loopIvs; + + auto outerLoop = + rewriter.create(loc, 0, lhsMatrixType.getNumRows(), 1); + for (Operation &nested : *outerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + loopIvs.push_back(outerLoop.getInductionVar()); + + // outer loop body + rewriter.setInsertionPointToStart(outerLoop.getBody()); + auto innerLoop = + rewriter.create(loc, 0, lhsMatrixType.getNumCols(), 1); + for (Operation &nested : *innerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + loopIvs.push_back(innerLoop.getInductionVar()); + rewriter.create(loc); + rewriter.setInsertionPointToStart(innerLoop.getBody()); + + // inner loop body + mlir::Value lhsValue = rewriter.create(loc, lhs, loopIvs); + mlir::Value res = + rewriter.create(loc, udfFuncOp, ValueRange{lhsValue}) + ->getResult(0); + rewriter.create(loc, res, lhs, loopIvs); + rewriter.create(loc); + + rewriter.setInsertionPointAfter(outerLoop); + mlir::Value output = convertMemRefToDenseMatrix(op->getLoc(), rewriter, + lhs, op.getType()); + rewriter.replaceOp(op, output); + return mlir::success(); + } +}; + +namespace { +/** + * @brief The MapOpLoweringPass rewrites the daphne::MapOp operator + * to a set of perfectly nested affine loops and inserts for each element a call + * to the UDF assigned to the daphne::MapOp. + * + * This rewrite enables subsequent inlining pass to completely replace + * the daphne::MapOp by inlining the produced CallOps from this pass. + */ +struct MapOpLoweringPass + : public mlir::PassWrapper> { + explicit MapOpLoweringPass() {} + + void getDependentDialects(mlir::DialectRegistry ®istry) const override { + registry.insert(); + } + void runOnOperation() final; + + StringRef getArgument() const final { return "lower-map"; } + StringRef getDescription() const final { + return "Lowers the daphne.mapOp operation to" + "a set of affine loops, directly calling the UDF. " + "Subsequent use of the inlining pass may inline the call to the " + "UDF."; + } +}; +} // end anonymous namespace + +void MapOpLoweringPass::runOnOperation() { + mlir::ConversionTarget target(getContext()); + mlir::RewritePatternSet patterns(&getContext()); + mlir::LowerToLLVMOptions llvmOptions(&getContext()); + mlir::LLVMTypeConverter typeConverter(&getContext(), llvmOptions); + + target.addLegalDialect(); + + target.addIllegalOp(); + + patterns.insert(&getContext()); + auto module = getOperation(); + if (failed(applyPartialConversion(module, target, std::move(patterns)))) { + signalPassFailure(); + } +} + +std::unique_ptr mlir::daphne::createMapOpLoweringPass() { + return std::make_unique(); +} diff --git a/src/compiler/lowering/MatMulOpLowering.cpp b/src/compiler/lowering/MatMulOpLowering.cpp new file mode 100644 index 000000000..6c401e266 --- /dev/null +++ b/src/compiler/lowering/MatMulOpLowering.cpp @@ -0,0 +1,236 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "compiler/utils/CompilerUtils.h" +#include "compiler/utils/LoweringUtils.h" +#include "ir/daphneir/Daphne.h" +#include "ir/daphneir/Passes.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/ArrayRef.h" +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" +#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" +#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" +#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" +#include "mlir/Conversion/LLVMCommon/ConversionTarget.h" +#include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h" +#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" +#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/Transforms/FuncConversions.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/UseDefLists.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +static constexpr int ROW = 0; +static constexpr int COL = 1; + +void affineMatMul(mlir::Value &lhs, mlir::Value &rhs, mlir::Value &output, + ConversionPatternRewriter &rewriter, mlir::Location loc, + ArrayRef lhsShape, ArrayRef rhsShape, + mlir::MLIRContext *ctx) { + SmallVector loopIvs; + + // row loop + auto rowLoop = rewriter.create(loc, 0, lhsShape[ROW], 1); + for (Operation &nested : *rowLoop.getBody()) { + rewriter.eraseOp(&nested); + } + + // row loop body + rewriter.setInsertionPointToStart(rowLoop.getBody()); + + // fma loop + auto innerLoop = rewriter.create(loc, 0, rhsShape[ROW], 1); + for (Operation &nested : *innerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + rewriter.setInsertionPointToStart(innerLoop.getBody()); + + // col loop + auto colLoop = rewriter.create(loc, 0, rhsShape[COL], 1); + for (Operation &nested : *colLoop.getBody()) { + rewriter.eraseOp(&nested); + } + + // col loop body + rewriter.setInsertionPointToStart(colLoop.getBody()); + + loopIvs.push_back(rowLoop.getInductionVar()); + loopIvs.push_back(colLoop.getInductionVar()); + loopIvs.push_back(innerLoop.getInductionVar()); + + // load + mlir::Value a = rewriter.create( + loc, lhs, ValueRange{loopIvs[0], loopIvs[2]}); + mlir::Value b = rewriter.create( + loc, rhs, ValueRange{loopIvs[2], loopIvs[1]}); + mlir::Value c = rewriter.create( + loc, output, ValueRange{loopIvs[0], loopIvs[1]}); + + // fma + mlir::Value fma = rewriter.create(loc, a, b, c); + + // store + rewriter.create(loc, fma, output, + ValueRange{loopIvs[0], loopIvs[1]}); + + // AffineYieldOp at end of loop blocks + rewriter.setInsertionPointToEnd(rowLoop.getBody()); + rewriter.create(loc); + rewriter.setInsertionPointToEnd(colLoop.getBody()); + rewriter.create(loc); + rewriter.setInsertionPointToEnd(innerLoop.getBody()); + rewriter.create(loc); + rewriter.setInsertionPointAfter(rowLoop); +} + +class MatMulLowering : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite( + daphne::MatMulOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + mlir::daphne::MatrixType lhsMatrixType = + adaptor.getLhs().getType().dyn_cast(); + mlir::daphne::MatrixType rhsMatrixType = + adaptor.getRhs().getType().dyn_cast(); + + auto lhsRows = lhsMatrixType.getNumRows(); + auto lhsCols = lhsMatrixType.getNumCols(); + + auto rhsRows = rhsMatrixType.getNumRows(); + auto rhsCols = rhsMatrixType.getNumCols(); + + auto matrixElementType = lhsMatrixType.getElementType(); + + // TODO(phil): if shape is unknown, e.g., row/col = -1 we currently + // can't create a MemRefType + auto lhsMemRefType = + mlir::MemRefType::get({lhsRows, lhsCols}, matrixElementType); + auto rhsMemRefType = + mlir::MemRefType::get({rhsRows, rhsCols}, matrixElementType); + + mlir::MemRefType outputMemRefType = + mlir::MemRefType::get({lhsRows, rhsCols}, matrixElementType); + + // daphne::Matrix -> memref + mlir::Value lhs = + rewriter.create( + op->getLoc(), lhsMemRefType, adaptor.getLhs()); + mlir::Value rhs = + rewriter.create( + op->getLoc(), rhsMemRefType, adaptor.getRhs()); + + // Alloc output memref + mlir::Value outputMemRef = + insertMemRefAlloc(outputMemRefType, loc, rewriter); + + // Fill the output MemRef + affineFillMemRef(0.0, rewriter, loc, outputMemRefType.getShape(), + op->getContext(), outputMemRef, matrixElementType); + // Do the actual MatMul with hand built codegen + affineMatMul(lhs, rhs, outputMemRef, rewriter, loc, + lhsMemRefType.getShape(), rhsMemRefType.getShape(), + op->getContext()); + + mlir::Value DM = convertMemRefToDenseMatrix(loc, rewriter, outputMemRef, + op.getType()); + + rewriter.replaceOp(op, DM); + return success(); + } +}; + +namespace { +/** + * @brief The MatMulLoweringPass rewrites the MatMulOp from the DaphneDialect + * to a affine loop structure implementing a naive iterative matrix + * multiplication. + * + * The naive iterative algorithm is simply a perfectly nested + * loop algorithm running in O(n^3) performing the 3 load operations in it's + * inner loop body, calculates an FMA and stores the result in the output + * matrix. + */ +struct MatMulLoweringPass + : public mlir::PassWrapper> { + explicit MatMulLoweringPass() {} + + StringRef getArgument() const final { return "lower-mm"; } + StringRef getDescription() const final { + return "This pass lowers the MatMulOp to an affine loop structure " + "performing a naive iterative matrix multiplication."; + } + + void getDependentDialects(mlir::DialectRegistry ®istry) const override { + registry.insert(); + } + void runOnOperation() final; +}; +} // end anonymous namespace + +void MatMulLoweringPass::runOnOperation() { + mlir::ConversionTarget target(getContext()); + mlir::RewritePatternSet patterns(&getContext()); + LowerToLLVMOptions llvmOptions(&getContext()); + LLVMTypeConverter typeConverter(&getContext(), llvmOptions); + + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + + target.addLegalOp(); + target.addLegalOp(); + target.addLegalOp(); + + target.addIllegalOp(); + + patterns.insert(&getContext()); + auto module = getOperation(); + if (failed(applyPartialConversion(module, target, std::move(patterns)))) { + signalPassFailure(); + } +} + +std::unique_ptr mlir::daphne::createMatMulOpLoweringPass() { + return std::make_unique(); +} diff --git a/src/compiler/lowering/ModOpLowering.cpp b/src/compiler/lowering/ModOpLowering.cpp new file mode 100644 index 000000000..05fdf7ea4 --- /dev/null +++ b/src/compiler/lowering/ModOpLowering.cpp @@ -0,0 +1,226 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "compiler/utils/CompilerUtils.h" +#include "compiler/utils/LoweringUtils.h" +#include "ir/daphneir/Daphne.h" +#include "ir/daphneir/Passes.h" +#include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +class EwModOpLowering + : public mlir::OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + [[nodiscard]] bool optimization_viable(mlir::Value divisor) const { + std::pair isConstant = + CompilerUtils::isConstant(divisor); + return isConstant.first && (isConstant.second & (isConstant.second - 1)) == 0; + } + + void optimizeEwModOp(mlir::Value memRef, mlir::Value divisor, + ArrayRef shape, + ConversionPatternRewriter &rewriter, + Location loc) const { + // divisor - 1 + mlir::Value cst_one = rewriter.create( + loc, rewriter.getI64Type(), rewriter.getI64IntegerAttr(1)); + + auto casted_divisor = typeConverter->materializeTargetConversion( + rewriter, loc, rewriter.getI64Type(), ValueRange{divisor}); + + mlir::Value rhs = + rewriter.create(loc, casted_divisor, cst_one); + + SmallVector lowerBounds(/*Rank=*/2, /*Value=*/0); + SmallVector steps(/*Rank=*/2, /*Value=*/1); + buildAffineLoopNest( + rewriter, loc, lowerBounds, shape, steps, + [&](OpBuilder &nestedBuilder, Location loc, ValueRange ivs) { + mlir::Value load = + nestedBuilder.create(loc, memRef, ivs); + mlir::Value res{}; + + Value castedLhs = + this->typeConverter->materializeTargetConversion( + nestedBuilder, loc, + nestedBuilder.getIntegerType( + divisor.getType().getIntOrFloatBitWidth()), + ValueRange{load}); + + res = nestedBuilder.create(loc, castedLhs, rhs); + Value castedRes = + this->typeConverter->materializeSourceConversion( + nestedBuilder, loc, divisor.getType(), ValueRange{res}); + + nestedBuilder.create(loc, castedRes, memRef, + ivs); + }); + } + + void lowerEwModOp(mlir::Value memRef, mlir::Value divisor, + ArrayRef shape, + ConversionPatternRewriter &rewriter, Location loc) const { + SmallVector lowerBounds(/*Rank=*/2, /*Value=*/0); + SmallVector steps(/*Rank=*/2, /*Value=*/1); + buildAffineLoopNest( + rewriter, loc, lowerBounds, shape, steps, + [&](OpBuilder &nestedBuilder, Location loc, ValueRange ivs) { + mlir::Value load = + nestedBuilder.create(loc, memRef, ivs); + mlir::Value res{}; + + // this is enough since divisor will be casted to float if + // matrix is float + if (divisor.getType().isa()) { + res = + nestedBuilder.create(loc, load, divisor); + nestedBuilder.create(loc, res, memRef, ivs); + return; + } + + Value castedLhs = + this->typeConverter->materializeTargetConversion( + nestedBuilder, loc, + nestedBuilder.getIntegerType( + divisor.getType().getIntOrFloatBitWidth()), + ValueRange{load}); + + Value castedRhs = + this->typeConverter->materializeTargetConversion( + nestedBuilder, loc, + nestedBuilder.getIntegerType( + divisor.getType().getIntOrFloatBitWidth()), + ValueRange{divisor}); + + res = nestedBuilder.create(loc, castedLhs, + castedRhs); + Value castedRes = + this->typeConverter->materializeSourceConversion( + nestedBuilder, loc, divisor.getType(), ValueRange{res}); + + nestedBuilder.create(loc, castedRes, memRef, + ivs); + }); + } + + mlir::LogicalResult matchAndRewrite( + mlir::daphne::EwModOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::daphne::MatrixType lhsTensor = + adaptor.getLhs().getType().dyn_cast(); + auto lhsRows = lhsTensor.getNumRows(); + auto lhsCols = lhsTensor.getNumCols(); + + auto lhsMemRefType = mlir::MemRefType::get({lhsRows, lhsCols}, + lhsTensor.getElementType()); + + // daphne::Matrix -> memref + mlir::Value lhs = + rewriter.create( + op->getLoc(), lhsMemRefType, adaptor.getLhs()); + mlir::Value rhs = adaptor.getRhs(); + + if (optimization_viable(rhs)) + optimizeEwModOp(lhs, rhs, + {lhsTensor.getNumRows(), lhsTensor.getNumCols()}, + rewriter, op->getLoc()); + else + lowerEwModOp(lhs, rhs, + {lhsTensor.getNumRows(), lhsTensor.getNumCols()}, + rewriter, op->getLoc()); + + mlir::Value output = convertMemRefToDenseMatrix(op->getLoc(), rewriter, + lhs, op.getType()); + rewriter.replaceOp(op, output); + return success(); + } +}; + +namespace { +/** + * @brief Performs an integer mod optimization on the EwModOp operator by + * lowering to an affine loop structure and performing the mod op on values + * loaded from a MemRef. + * + * If possible, we additionally perform the integer modulo optimization by + * replacing the modulo with an bitwise AND and a subtraction. + */ +struct ModOpLoweringPass + : public mlir::PassWrapper> { + explicit ModOpLoweringPass() {} + + void getDependentDialects(mlir::DialectRegistry ®istry) const override { + registry + .insert(); + } + void runOnOperation() final; + + StringRef getArgument() const final { return "lower-mod"; } + StringRef getDescription() const final { + return "Performs an integer mod optimization on the EwModOp operator " + "by lowering to an affine loop structure" + "and performing the mod op on values loaded from a MemRef."; + } +}; +} // end anonymous namespace + +void ModOpLoweringPass::runOnOperation() { + mlir::ConversionTarget target(getContext()); + mlir::RewritePatternSet patterns(&getContext()); + mlir::LowerToLLVMOptions llvmOptions(&getContext()); + mlir::LLVMTypeConverter typeConverter(&getContext(), llvmOptions); + + typeConverter.addConversion(convertInteger); + typeConverter.addConversion(convertFloat); + typeConverter.addConversion([](Type type) { return type; }); + typeConverter.addArgumentMaterialization(materializeCastFromIllegal); + typeConverter.addSourceMaterialization(materializeCastToIllegal); + typeConverter.addTargetMaterialization(materializeCastFromIllegal); + + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + + target.addIllegalOp(); + + patterns.insert(typeConverter, &getContext()); + auto module = getOperation(); + if (failed(applyPartialConversion(module, target, std::move(patterns)))) { + signalPassFailure(); + } +} + +std::unique_ptr mlir::daphne::createModOpLoweringPass() { + return std::make_unique(); +} diff --git a/src/compiler/lowering/RewriteSqlOpPass.cpp b/src/compiler/lowering/RewriteSqlOpPass.cpp index 401544ac3..9c3d2d32d 100644 --- a/src/compiler/lowering/RewriteSqlOpPass.cpp +++ b/src/compiler/lowering/RewriteSqlOpPass.cpp @@ -85,6 +85,9 @@ namespace : public PassWrapper > { void runOnOperation() final; + + StringRef getArgument() const final { return "rewrite-sqlop"; } + StringRef getDescription() const final { return "TODO"; } }; } diff --git a/src/compiler/lowering/RewriteToCallKernelOpPass.cpp b/src/compiler/lowering/RewriteToCallKernelOpPass.cpp index b9e78f319..4454aaec8 100644 --- a/src/compiler/lowering/RewriteToCallKernelOpPass.cpp +++ b/src/compiler/lowering/RewriteToCallKernelOpPass.cpp @@ -18,9 +18,14 @@ #include "ir/daphneir/Daphne.h" #include "ir/daphneir/Passes.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/IR/BuiltinDialect.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/IR/IRMapping.h" @@ -364,6 +369,7 @@ namespace // Inject the current DaphneContext as the last input parameter to // all kernel calls, unless it's a CreateDaphneContextOp. + if(!llvm::isa(op)) newOperands.push_back(dctx); @@ -494,8 +500,12 @@ void RewriteToCallKernelOpPass::runOnOperation() // Specification of (il)legal dialects/operations. All DaphneIR operations // but those explicitly marked as legal will be replaced by CallKernelOp. ConversionTarget target(getContext()); - target.addLegalDialect(); - target.addLegalOp(); + target.addLegalDialect(); + + target.addLegalOp(); target.addIllegalDialect(); target.addLegalOp< daphne::ConstantOp, @@ -504,6 +514,8 @@ void RewriteToCallKernelOpPass::runOnOperation() daphne::CreateVariadicPackOp, daphne::StoreVariadicPackOp, daphne::VectorizedPipelineOp, + scf::ForOp, + memref::LoadOp, daphne::GenericCallOp, daphne::MapOp >(); diff --git a/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp b/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp index 453f65525..15ebd9b03 100644 --- a/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp +++ b/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp @@ -387,6 +387,9 @@ namespace { public: void runOnOperation() final; + + StringRef getArgument() const final { return "specialize-generic-funcs"; } + StringRef getDescription() const final { return "TODO"; } }; } diff --git a/src/compiler/lowering/WhileLoopInvariantCodeMotionPass.cpp b/src/compiler/lowering/WhileLoopInvariantCodeMotionPass.cpp index 58c042af0..8e933155e 100644 --- a/src/compiler/lowering/WhileLoopInvariantCodeMotionPass.cpp +++ b/src/compiler/lowering/WhileLoopInvariantCodeMotionPass.cpp @@ -36,6 +36,9 @@ using namespace mlir; struct WhileLoopInvariantCodeMotionPass : public PassWrapper > { void runOnOperation() final; + + StringRef getArgument() const final { return "while-loop-invariant-code-motion"; } + StringRef getDescription() const final { return "TODO"; } }; void WhileLoopInvariantCodeMotionPass::runOnOperation() { diff --git a/src/compiler/utils/CMakeLists.txt b/src/compiler/utils/CMakeLists.txt index a7acd88bf..73e8cd7c0 100644 --- a/src/compiler/utils/CMakeLists.txt +++ b/src/compiler/utils/CMakeLists.txt @@ -14,9 +14,10 @@ add_library(CompilerUtils STATIC CompilerUtils.cpp + LoweringUtils.cpp TypePrinting.cpp ) target_link_libraries(CompilerUtils PUBLIC DaphneMetaDataParser -) \ No newline at end of file +) diff --git a/src/compiler/utils/CompilerUtils.cpp b/src/compiler/utils/CompilerUtils.cpp index 9ec231f55..43fb800f1 100644 --- a/src/compiler/utils/CompilerUtils.cpp +++ b/src/compiler/utils/CompilerUtils.cpp @@ -57,6 +57,14 @@ std::pair CompilerUtils::isConstant(mlir::Value v) { ); } + +template<> +std::pair CompilerUtils::isConstant(mlir::Value v) { + return isConstantHelper( + v, [](mlir::IntegerAttr attr){return attr.getValue().getLimitedValue();} + ); +} + template<> std::pair CompilerUtils::isConstant(mlir::Value v) { return isConstantHelper( diff --git a/src/compiler/utils/CompilerUtils.h b/src/compiler/utils/CompilerUtils.h index b934f55ea..13e4973b6 100644 --- a/src/compiler/utils/CompilerUtils.h +++ b/src/compiler/utils/CompilerUtils.h @@ -178,6 +178,9 @@ struct CompilerUtils { return "Descriptor"; else if(t.isa()) return "Target"; + else if(auto memRefType = t.dyn_cast()) { + return "StridedMemRefType_" + mlirTypeToCppTypeName(memRefType.getElementType(), false) + "_2"; + } std::string typeName; llvm::raw_string_ostream rsos(typeName); @@ -261,4 +264,4 @@ struct CompilerUtils { return vt; } -}; \ No newline at end of file +}; diff --git a/src/compiler/utils/LoweringUtils.cpp b/src/compiler/utils/LoweringUtils.cpp new file mode 100644 index 000000000..943dbd304 --- /dev/null +++ b/src/compiler/utils/LoweringUtils.cpp @@ -0,0 +1,188 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LoweringUtils.h" + +#include + +#include "ir/daphneir/Daphne.h" +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" +#include "mlir/Dialect/Affine/Passes.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Transforms/Passes.h" + +/// Insert an allocation for the given MemRefType. +mlir::Value insertMemRefAlloc(mlir::MemRefType type, mlir::Location loc, + mlir::PatternRewriter &rewriter) { + auto alloc = rewriter.create(loc, type); + + // Make sure to allocate at the beginning of the block. + auto *parentBlock = alloc->getBlock(); + alloc->moveBefore(&parentBlock->front()); + + return alloc; +} + +void insertMemRefDealloc(mlir::Value memref, mlir::Location loc, + mlir::PatternRewriter &rewriter) { + auto dealloc = rewriter.create(loc, memref); + dealloc->moveBefore(&memref.getParentBlock()->back()); +} + +// TODO(phil) try to provide function templates to remove duplication +void affineFillMemRefInt(int value, mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::ArrayRef shape, + mlir::MLIRContext *ctx, mlir::Value memRef, + mlir::Type elemType) { + constexpr int ROW = 0; + constexpr int COL = 1; + mlir::Value fillValue = rewriter.create( + loc, rewriter.getI64Type(), rewriter.getI64IntegerAttr(value)); + + llvm::SmallVector loopIvs; + + auto outerLoop = rewriter.create(loc, 0, shape[ROW], 1); + for (mlir::Operation &nested : *outerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + loopIvs.push_back(outerLoop.getInductionVar()); + + // outer loop body + rewriter.setInsertionPointToStart(outerLoop.getBody()); + auto innerLoop = rewriter.create(loc, 0, shape[COL], 1); + for (mlir::Operation &nested : *innerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + loopIvs.push_back(innerLoop.getInductionVar()); + rewriter.create(loc); + rewriter.setInsertionPointToStart(innerLoop.getBody()); + rewriter.create(loc, fillValue, memRef, loopIvs); + + rewriter.create(loc); + rewriter.setInsertionPointAfter(outerLoop); +} + +void affineFillMemRef(double value, mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::ArrayRef shape, + mlir::MLIRContext *ctx, mlir::Value memRef, + mlir::Type elemType) { + constexpr int ROW = 0; + constexpr int COL = 1; + mlir::Value fillValue = rewriter.create( + loc, elemType, rewriter.getFloatAttr(elemType, value)); + + llvm::SmallVector loopIvs; + + auto outerLoop = rewriter.create(loc, 0, shape[ROW], 1); + for (mlir::Operation &nested : *outerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + loopIvs.push_back(outerLoop.getInductionVar()); + + // outer loop body + rewriter.setInsertionPointToStart(outerLoop.getBody()); + auto innerLoop = rewriter.create(loc, 0, shape[COL], 1); + for (mlir::Operation &nested : *innerLoop.getBody()) { + rewriter.eraseOp(&nested); + } + loopIvs.push_back(innerLoop.getInductionVar()); + rewriter.create(loc); + rewriter.setInsertionPointToStart(innerLoop.getBody()); + rewriter.create(loc, fillValue, memRef, loopIvs); + + rewriter.create(loc); + rewriter.setInsertionPointAfter(outerLoop); +} + +mlir::Value convertMemRefToDenseMatrix( + mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + mlir::Value memRef, mlir::Type type) { + auto extractStridedMetadataOp = + rewriter.create(loc, memRef); + // aligned ptr (memref.data) + mlir::Value alignedPtr = + rewriter.create(loc, + memRef); + // offset + mlir::Value offset = extractStridedMetadataOp.getOffset(); + // strides + mlir::ResultRange strides = extractStridedMetadataOp.getStrides(); + // sizes + mlir::ResultRange sizes = extractStridedMetadataOp.getSizes(); + + return rewriter.create( + loc, type, alignedPtr, offset, sizes[0], sizes[1], strides[0], + strides[1]); +} + +mlir::Type convertFloat(mlir::FloatType floatType) { + return mlir::IntegerType::get(floatType.getContext(), + floatType.getIntOrFloatBitWidth()); +} + +mlir::Type convertInteger(mlir::IntegerType intType) { + return mlir::IntegerType::get(intType.getContext(), + intType.getIntOrFloatBitWidth()); +} + +llvm::Optional materializeCastFromIllegal(mlir::OpBuilder &builder, + mlir::Type type, + mlir::ValueRange inputs, + mlir::Location loc) { + mlir::Type fromType = getElementTypeOrSelf(inputs[0].getType()); + mlir::Type toType = getElementTypeOrSelf(type); + + if ((!fromType.isSignedInteger() && !fromType.isUnsignedInteger()) || + !toType.isSignlessInteger()) + return std::nullopt; + // Use unrealized conversion casts to do signful->signless conversions. + return builder + .create(loc, type, inputs[0]) + ->getResult(0); +} + +llvm::Optional materializeCastToIllegal(mlir::OpBuilder &builder, + mlir::Type type, + mlir::ValueRange inputs, + mlir::Location loc) { + mlir::Type fromType = getElementTypeOrSelf(inputs[0].getType()); + mlir::Type toType = getElementTypeOrSelf(type); + + if (!fromType.isSignlessInteger() || + (!toType.isSignedInteger() && !toType.isUnsignedInteger())) + return std::nullopt; + // Use unrealized conversion casts to do signless->signful conversions. + return builder + .create(loc, type, inputs[0]) + ->getResult(0); +} + +mlir::Operation *findLastUseOfSSAValue(mlir::Value &v) { + mlir::Operation *lastUseOp = nullptr; + + for (mlir::OpOperand &use : v.getUses()) { + mlir::Operation *thisUseOp = use.getOwner(); + // Find parent op in the block where v is defined. + while (thisUseOp->getBlock() != v.getParentBlock()) + thisUseOp = thisUseOp->getParentOp(); + // Determine if this is a later use. + if (!lastUseOp || lastUseOp->isBeforeInBlock(thisUseOp)) + lastUseOp = thisUseOp; + } + + return lastUseOp; +} diff --git a/src/compiler/utils/LoweringUtils.h b/src/compiler/utils/LoweringUtils.h new file mode 100644 index 000000000..5555b1324 --- /dev/null +++ b/src/compiler/utils/LoweringUtils.h @@ -0,0 +1,65 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Visitors.h" +#include "mlir/Transforms/DialectConversion.h" + +mlir::Value insertMemRefAlloc(mlir::MemRefType type, mlir::Location loc, + mlir::PatternRewriter &rewriter); + +void insertMemRefDealloc(mlir::Value memref, mlir::Location loc, + mlir::PatternRewriter &rewriter); + +void affineFillMemRefInt(int value, mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::ArrayRef shape, + mlir::MLIRContext *ctx, mlir::Value memRef, + mlir::Type elemType); + +void affineFillMemRef(double value, mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::ArrayRef shape, + mlir::MLIRContext *ctx, mlir::Value memRef, + mlir::Type elemType); + +mlir::Value convertMemRefToDenseMatrix(mlir::Location, + mlir::ConversionPatternRewriter &, + mlir::Value memRef, mlir::Type); + +llvm::Optional materializeCastFromIllegal(mlir::OpBuilder &builder, + mlir::Type type, + mlir::ValueRange inputs, + mlir::Location loc); + +llvm::Optional materializeCastToIllegal(mlir::OpBuilder &builder, + mlir::Type type, + mlir::ValueRange inputs, + mlir::Location loc); + +mlir::Type convertFloat(mlir::FloatType floatType); + +mlir::Type convertInteger(mlir::IntegerType intType); + +mlir::Operation *findLastUseOfSSAValue(mlir::Value &v); diff --git a/src/ir/daphneir/CMakeLists.txt b/src/ir/daphneir/CMakeLists.txt index c6ef724e3..6036aefcd 100644 --- a/src/ir/daphneir/CMakeLists.txt +++ b/src/ir/daphneir/CMakeLists.txt @@ -52,4 +52,4 @@ add_mlir_dialect_library(MLIRDaphne LINK_LIBS PUBLIC CompilerUtils -) \ No newline at end of file +) diff --git a/src/ir/daphneir/Daphne.h b/src/ir/daphneir/Daphne.h index f5cd35985..73a2e6b23 100644 --- a/src/ir/daphneir/Daphne.h +++ b/src/ir/daphneir/Daphne.h @@ -33,6 +33,7 @@ #include "mlir/IR/AttrTypeSubElements.h" #pragma GCC diagnostic pop +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/src/ir/daphneir/DaphneDialect.cpp b/src/ir/daphneir/DaphneDialect.cpp index ded725c4f..205e7c4e9 100644 --- a/src/ir/daphneir/DaphneDialect.cpp +++ b/src/ir/daphneir/DaphneDialect.cpp @@ -16,24 +16,33 @@ #include #include + #include + +#include "mlir/Support/LogicalResult.h" #define GET_OP_CLASSES #include #define GET_TYPEDEF_CLASSES -#include +#include +#include +#include + #include +#include +#include "llvm/ADT/ArrayRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/Builders.h" -#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/IR/SymbolTable.h" -#include "mlir/IR/BuiltinOps.h" #include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/CastInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" @@ -41,12 +50,46 @@ #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Interfaces/VectorInterfaces.h" #include "mlir/Interfaces/ViewLikeInterface.h" +#include "mlir/Transforms/InliningUtils.h" #include #include #include #include +struct DaphneInlinerInterface : public mlir::DialectInlinerInterface { + using DialectInlinerInterface::DialectInlinerInterface; + + bool isLegalToInline(mlir::Operation *call, mlir::Operation *callable, + bool wouldBeCloned) const final { + return true; + } + + bool isLegalToInline(mlir::Operation *, mlir::Region *, bool, mlir::IRMapping &) const final { + return true; + } + + bool isLegalToInline(mlir::Region *, mlir::Region *, bool, mlir::IRMapping &) const final { + return true; + } + + void handleTerminator(mlir::Operation *op, + mlir::ArrayRef valuesToRepl) const final { + auto returnOp = mlir::dyn_cast(op); + + // Replace the values directly with the return operands. + assert(returnOp.getNumOperands() == valuesToRepl.size()); + for (const auto &it : llvm::enumerate(returnOp.getOperands())) + valuesToRepl[it.index()].replaceAllUsesWith(it.value()); + } + + mlir::Operation *materializeCallConversion(mlir::OpBuilder &builder, mlir::Value input, + mlir::Type resultType, + mlir::Location conversionLoc) const final { + return builder.create(conversionLoc, resultType, input); + } +}; + void mlir::daphne::DaphneDialect::initialize() { addOperations< @@ -57,6 +100,7 @@ void mlir::daphne::DaphneDialect::initialize() #define GET_TYPEDEF_LIST #include >(); + addInterfaces(); } mlir::Operation *mlir::daphne::DaphneDialect::materializeConstant(OpBuilder &builder, @@ -179,6 +223,9 @@ mlir::Type mlir::daphne::DaphneDialect::parseType(mlir::DialectAsmParser &parser else if (keyword == "String") { return StringType::get(parser.getBuilder().getContext()); } + else if (keyword == "DaphneContext") { + return mlir::daphne::DaphneContextType::get(parser.getBuilder().getContext()); + } else { parser.emitError(parser.getCurrentLocation()) << "Parsing failed, keyword `" << keyword << "` not recognized!"; return nullptr; @@ -363,6 +410,7 @@ ::mlir::LogicalResult mlir::daphne::MatrixType::verify( // Value type is known. || elementType.isSignedInteger(64) || elementType.isUnsignedInteger(8) + || elementType.isUnsignedInteger(64) || elementType.isF32() || elementType.isF64() || elementType.isIndex() @@ -783,6 +831,10 @@ mlir::OpFoldResult mlir::daphne::EwAndOp::fold(FoldAdaptor adaptor) { return {}; } +mlir::OpFoldResult mlir::daphne::EwBitwiseAndOp::fold(FoldAdaptor adaptor) { + return {}; +} + mlir::OpFoldResult mlir::daphne::EwOrOp::fold(FoldAdaptor adaptor) { ArrayRef operands = adaptor.getOperands(); auto boolOp = [](const bool &a, const bool &b) { return a || b; }; @@ -1323,4 +1375,35 @@ mlir::LogicalResult mlir::daphne::CondOp::canonicalize(mlir::daphne::CondOp op, return mlir::success(); } -} \ No newline at end of file +} + +mlir::LogicalResult mlir::daphne::ConvertDenseMatrixToMemRef::canonicalize( + mlir::daphne::ConvertDenseMatrixToMemRef op, + mlir::PatternRewriter &rewriter) { + // removes unnecessary conversions of MemRef -> DM -> MemRef + mlir::Operation *dmNode = op->getOperand(0).getDefiningOp(); + + if (!llvm::isa(dmNode)) + return failure(); + + mlir::Operation *originalMemRefOp = + dmNode->getPrevNode()->getOperand(0).getDefiningOp(); + op.replaceAllUsesWith(originalMemRefOp); + + rewriter.eraseOp(op); + if (dmNode->getUsers().empty()) rewriter.eraseOp(dmNode); + + return mlir::success(); +} + +mlir::LogicalResult mlir::daphne::ConvertMemRefToDenseMatrix::canonicalize( + mlir::daphne::ConvertMemRefToDenseMatrix op, + mlir::PatternRewriter &rewriter) { + mlir::Operation *extractPtr = op->getPrevNode(); + auto srcMemRef = extractPtr->getOperand(0).getDefiningOp(); + extractPtr->moveAfter(srcMemRef); + op->moveAfter(extractPtr); + + return mlir::success(); +} + diff --git a/src/ir/daphneir/DaphneDistributableOpInterface.cpp b/src/ir/daphneir/DaphneDistributableOpInterface.cpp index 20ee390a0..416179a6c 100644 --- a/src/ir/daphneir/DaphneDistributableOpInterface.cpp +++ b/src/ir/daphneir/DaphneDistributableOpInterface.cpp @@ -134,6 +134,9 @@ IMPL_EWBINARYOP(EwAndOp) IMPL_EWBINARYOP(EwOrOp) IMPL_EWBINARYOP(EwXorOp) +// Bitwise +IMPL_EWBINARYOP(EwBitwiseAndOp); + // Strings IMPL_EWBINARYOP(EwConcatOp) @@ -170,4 +173,4 @@ std::vector daphne::RowAggMaxOp::createEquivalentDistributedDAG( std::vector daphne::RowAggMaxOp::getOperandDistrPrimitives() { return {false}; -} \ No newline at end of file +} diff --git a/src/ir/daphneir/DaphneOps.td b/src/ir/daphneir/DaphneOps.td index b16310932..e4cd6a96b 100644 --- a/src/ir/daphneir/DaphneOps.td +++ b/src/ir/daphneir/DaphneOps.td @@ -34,8 +34,11 @@ include "ir/daphneir/DaphneTypeInferenceTraits.td" include "ir/daphneir/CUDASupport.td" include "ir/daphneir/FPGAOPENCLSupport.td" +include "mlir/Dialect/LLVMIR/LLVMTypes.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" +include "mlir/Interfaces/CallInterfaces.td" +include "mlir/IR/AttrTypeBase.td" // **************************************************************************** // Custom constraints @@ -66,6 +69,28 @@ class TypesMatchOrOneIsMatrixOfOther : PredOpTrait< class Daphne_Op traits = []> : Op; +// **************************************************************************** +// DAPHNE Runtime Interoperability +// **************************************************************************** + +def Daphne_ConvertMemRefToDenseMatrix : Daphne_Op<"convertMemRefToDenseMatrix"> { + let summary = "Return a DenseMatrix."; + let description = [{ Constructs a DenseMatrix given a rank 2 StridedMemRefType. }]; + + /* let arguments = (ins AnyMemRef:$arg); */ + let hasCanonicalizeMethod = 1; + let arguments = (ins Size:$base, Size:$offset, Size:$size0, Size:$size1, Size:$stride0, Size:$stride1); + let results = (outs MatrixOrU:$res); +} + +def Daphne_ConvertDenseMatrixToMemRef : Daphne_Op<"convertDenseMatrixToMemRef", [Pure]> { + let summary = "Given a DenseMatrix, return a StridedMemRefType."; + let description = [{ Constructs a StridedMemRefType with rank 2 from a DenseMatrix* with already allocated memory. }]; + let hasCanonicalizeMethod = 1; + let arguments = (ins MatrixOrU:$arg); + let results = (outs AnyMemRef:$output); +} + // **************************************************************************** // Data generation // **************************************************************************** @@ -110,7 +135,6 @@ def Daphne_RandMatrixOp : Daphne_Op<"randMatrix", [ NumRowsFromIthScalar<0>, NumColsFromIthScalar<1>, DeclareOpInterfaceMethods, SparsityFromIthScalar<4>, CastArgsToResTypeRandMatrixOp ]> { - //let arguments = (ins Size:$numRows, Size:$numCols, AnyScalar:$min, AnyScalar:$max, F64:$sparsity, Seed:$seed, StrScalar:$pdf); let arguments = (ins Size:$numRows, Size:$numCols, AnyScalar:$min, AnyScalar:$max, F64:$sparsity, IntScalar:$seed); let results = (outs MatrixOrU:$res); } @@ -173,7 +197,8 @@ def Daphne_MatMulOp : Daphne_Op<"matMul", [ class Daphne_EwUnaryOp traits = []> : Daphne_Op { let arguments = (ins AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$arg); let results = (outs AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$res); @@ -228,7 +253,8 @@ class Daphne_EwBinaryOp traits = []> DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, ShapeEwBinary, - CastArgsToResType + CastArgsToResType, + NoMemoryEffect ])> { let arguments = (ins AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$lhs, AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$rhs); let results = (outs AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$res); @@ -279,6 +305,12 @@ def Daphne_EwAndOp : Daphne_EwBinaryOp<"ewAnd", NumScalar, [Commutative, Valu def Daphne_EwOrOp : Daphne_EwBinaryOp<"ewOr" , NumScalar, [Commutative, ValueTypeFromArgsInt]>; def Daphne_EwXorOp : Daphne_EwBinaryOp<"ewXor", NumScalar, [Commutative, ValueTypeFromArgsInt]>; +// ---------------------------------------------------------------------------- +// Bitwise +// ---------------------------------------------------------------------------- + +def Daphne_EwBitwiseAndOp : Daphne_EwBinaryOp<"ewBitwiseAnd", NumScalar, [Commutative, ValueTypeFromArgsInt]>; + // ---------------------------------------------------------------------------- // Strings // ---------------------------------------------------------------------------- @@ -1288,7 +1320,7 @@ def Daphne_PrintOp : Daphne_Op<"print"> { // TODO We might change it to only accept scalars here and enforce toString // for matrices and frames. But currently, we need it like that for the // rest of the program. - let arguments = (ins AnyTypeOf<[AnyScalar, MatrixOrFrame, Unknown]>:$arg, BoolScalar:$newline, BoolScalar:$err); + let arguments = (ins AnyTypeOf<[AnyScalar, MatrixOrFrame, AnyMemRef, Unknown]>:$arg, BoolScalar:$newline, BoolScalar:$err); let results = (outs); // no results } diff --git a/src/ir/daphneir/DaphneVectorizableOpInterface.cpp b/src/ir/daphneir/DaphneVectorizableOpInterface.cpp index 0ab264dc6..9785c9ba3 100644 --- a/src/ir/daphneir/DaphneVectorizableOpInterface.cpp +++ b/src/ir/daphneir/DaphneVectorizableOpInterface.cpp @@ -189,6 +189,9 @@ IMPL_SPLIT_COMBINE_EWBINARYOP(EwAndOp) IMPL_SPLIT_COMBINE_EWBINARYOP(EwOrOp) IMPL_SPLIT_COMBINE_EWBINARYOP(EwXorOp) +// Bitwise +IMPL_SPLIT_COMBINE_EWBINARYOP(EwBitwiseAndOp); + // Strings IMPL_SPLIT_COMBINE_EWBINARYOP(EwConcatOp) diff --git a/src/ir/daphneir/Passes.h b/src/ir/daphneir/Passes.h index 3f7d7ef18..ec9c5f45a 100644 --- a/src/ir/daphneir/Passes.h +++ b/src/ir/daphneir/Passes.h @@ -43,8 +43,15 @@ namespace mlir::daphne { std::unique_ptr createAdaptTypesToKernelsPass(); std::unique_ptr createDistributeComputationsPass(); std::unique_ptr createDistributePipelinesPass(); + std::unique_ptr createMapOpLoweringPass(); + std::unique_ptr createEwOpLoweringPass(); + std::unique_ptr createModOpLoweringPass(); std::unique_ptr createInferencePass(InferenceConfig cfg = {false, true, true, true, true}); std::unique_ptr createInsertDaphneContextPass(const DaphneUserConfig& cfg); + std::unique_ptr createDaphneOptPass(); + std::unique_ptr createMatMulOpLoweringPass(); + std::unique_ptr createAggAllOpLoweringPass(); + std::unique_ptr createMemRefTestPass(); std::unique_ptr createProfilingPass(); std::unique_ptr createLowerToLLVMPass(const DaphneUserConfig& cfg); std::unique_ptr createManageObjRefsPass(); diff --git a/src/ir/daphneir/Passes.td b/src/ir/daphneir/Passes.td index 20fc2a5ee..39725a131 100644 --- a/src/ir/daphneir/Passes.td +++ b/src/ir/daphneir/Passes.td @@ -55,4 +55,25 @@ def WhileLoopInvariantCodeMotionPass : Pass<"while-loop-invariant-code-motion", let constructor = "mlir::daphne::createWhileLoopInvariantCodeMotionPass()"; } -#endif // SRC_IR_DAPHNEIR_PASSES_TD \ No newline at end of file +def AggAllLoweringPass : Pass<"lower-agg", "::mlir::func::FuncOp"> { + let constructor = "mlir::daphne::createAggAllOpLoweringPass()"; +} + +def MatMulOpLoweringPass : Pass<"lower-mm", "::mlir::func::FuncOp"> { + let constructor = "mlir::daphne::createMatMulOpLoweringPass()"; +} + +def DaphneOpsOptPass : Pass<"opt-daphne", "::mlir::func::FuncOp"> { + let constructor = "mlir::daphne::createDaphneOptPass()"; +} + +def MapOpLoweringPass: Pass<"lower-map", "::mlir::func::FuncOp"> { + let constructor = "mlir::daphne::createMapOpLoweringPass()"; +} + +def LowerEwOpPass: Pass<"lower-ew", "::mlir::func::FuncOp"> { + let constructor = "mlir::daphne::createEwOpLoweringPass()"; +} + + +#endif // SRC_IR_DAPHNEIR_PASSES_TD diff --git a/src/parser/config/ConfigParser.cpp b/src/parser/config/ConfigParser.cpp index d34e446a1..3debbab4d 100644 --- a/src/parser/config/ConfigParser.cpp +++ b/src/parser/config/ConfigParser.cpp @@ -51,6 +51,8 @@ void ConfigParser::readUserConfig(const std::string& filename, DaphneUserConfig& config.use_ipa_const_propa = jf.at(DaphneConfigJsonParams::USE_IPA_CONST_PROPA).get(); if (keyExists(jf, DaphneConfigJsonParams::USE_PHY_OP_SELECTION)) config.use_phy_op_selection = jf.at(DaphneConfigJsonParams::USE_PHY_OP_SELECTION).get(); + if (keyExists(jf, DaphneConfigJsonParams::USE_MLIR_CODEGEN)) + config.use_mlir_codegen = jf.at(DaphneConfigJsonParams::USE_MLIR_CODEGEN).get(); if (keyExists(jf, DaphneConfigJsonParams::CUDA_FUSE_ANY)) config.cuda_fuse_any = jf.at(DaphneConfigJsonParams::CUDA_FUSE_ANY).get(); if (keyExists(jf, DaphneConfigJsonParams::VECTORIZED_SINGLE_QUEUE)) @@ -79,6 +81,8 @@ void ConfigParser::readUserConfig(const std::string& filename, DaphneUserConfig& config.explain_vectorized = jf.at(DaphneConfigJsonParams::EXPLAIN_VECTORIZED).get(); if (keyExists(jf, DaphneConfigJsonParams::EXPLAIN_OBJ_REF_MGNT)) config.explain_obj_ref_mgnt = jf.at(DaphneConfigJsonParams::EXPLAIN_OBJ_REF_MGNT).get(); + if (keyExists(jf, DaphneConfigJsonParams::EXPLAIN_MLIR_CODEGEN)) + config.explain_mlir_codegen = jf.at(DaphneConfigJsonParams::EXPLAIN_MLIR_CODEGEN).get(); if (keyExists(jf, DaphneConfigJsonParams::TASK_PARTITIONING_SCHEME)) { config.taskPartitioningScheme = jf.at(DaphneConfigJsonParams::TASK_PARTITIONING_SCHEME).get(); if (config.taskPartitioningScheme == SelfSchedulingScheme::INVALID) { @@ -142,4 +146,4 @@ void ConfigParser::checkAnyUnexpectedKeys(const nlohmann::basic_json<>& j, const .append("' file")); } } -} \ No newline at end of file +} diff --git a/src/parser/config/JsonParams.h b/src/parser/config/JsonParams.h index 8e9c1c257..172143258 100644 --- a/src/parser/config/JsonParams.h +++ b/src/parser/config/JsonParams.h @@ -30,6 +30,7 @@ struct DaphneConfigJsonParams { inline static const std::string USE_OBJ_REF_MGNT = "use_obj_ref_mgnt"; inline static const std::string USE_IPA_CONST_PROPA = "use_ipa_const_propa"; inline static const std::string USE_PHY_OP_SELECTION = "use_phy_op_selection"; + inline static const std::string USE_MLIR_CODEGEN = "use_mlir_codegen"; inline static const std::string CUDA_FUSE_ANY = "cuda_fuse_any"; inline static const std::string VECTORIZED_SINGLE_QUEUE = "vectorized_single_queue"; @@ -45,6 +46,7 @@ struct DaphneConfigJsonParams { inline static const std::string EXPLAIN_TYPE_ADAPTATION = "explain_type_adaptation"; inline static const std::string EXPLAIN_VECTORIZED = "explain_vectorized"; inline static const std::string EXPLAIN_OBJ_REF_MGNT = "explain_obj_ref_mgnt"; + inline static const std::string EXPLAIN_MLIR_CODEGEN = "explain_mlir_codegen"; inline static const std::string TASK_PARTITIONING_SCHEME = "taskPartitioningScheme"; inline static const std::string NUMBER_OF_THREADS = "numberOfThreads"; inline static const std::string MINIMUM_TASK_SIZE = "minimumTaskSize"; @@ -53,13 +55,14 @@ struct DaphneConfigJsonParams { inline static const std::string LIBRARY_PATHS = "library_paths"; inline static const std::string DAPHNEDSL_IMPORT_PATHS = "daphnedsl_import_paths"; inline static const std::string LOGGING = "logging"; - + inline static const std::string JSON_PARAMS[] = { USE_CUDA_, USE_VECTORIZED_EXEC, USE_OBJ_REF_MGNT, USE_IPA_CONST_PROPA, USE_PHY_OP_SELECTION, + USE_MLIR_CODEGEN, CUDA_FUSE_ANY, VECTORIZED_SINGLE_QUEUE, DEBUG_LLVM, @@ -73,6 +76,7 @@ struct DaphneConfigJsonParams { EXPLAIN_PHY_OP_SELECTION, EXPLAIN_TYPE_ADAPTATION, EXPLAIN_VECTORIZED, + EXPLAIN_MLIR_CODEGEN, EXPLAIN_OBJ_REF_MGNT, TASK_PARTITIONING_SCHEME, NUMBER_OF_THREADS, diff --git a/src/runtime/local/kernels/BinaryOpCode.h b/src/runtime/local/kernels/BinaryOpCode.h index 7d61dc0eb..54d878b4c 100644 --- a/src/runtime/local/kernels/BinaryOpCode.h +++ b/src/runtime/local/kernels/BinaryOpCode.h @@ -18,29 +18,33 @@ enum class BinaryOpCode { // Arithmetic. - ADD, // addition - SUB, // subtraction - MUL, // multiplication - DIV, // division - POW, // to the power of - MOD, // modulus - LOG, // logarithm + ADD, // addition + SUB, // subtraction + MUL, // multiplication + DIV, // division + POW, // to the power of + MOD, // modulus + LOG, // logarithm + // Comparisons. - EQ, // equal - NEQ, // not equal - LT, // less than - LE, // less equal - GT, // greater than - GE, // greater equal - + EQ, // equal + NEQ, // not equal + LT, // less than + LE, // less equal + GT, // greater than + GE, // greater equal + // Min/max. MIN, MAX, - + // Logical. AND, OR, + + // Bitwise. + BITWISE_AND, }; static std::string_view binary_op_codes[] = {"ADD", "SUB", "MUL", "DIV", "POW", "MOD", "LOG", "EQ", "NEQ", "LT", "LE", - "GT", "GE", "MIN", "MAX", "AND", "OR"}; \ No newline at end of file + "GT", "GE", "MIN", "MAX", "AND", "OR", "BITWISE_AND"}; diff --git a/src/runtime/local/kernels/ConvertDenseMatrixToMemRef.h b/src/runtime/local/kernels/ConvertDenseMatrixToMemRef.h new file mode 100644 index 000000000..c281db96c --- /dev/null +++ b/src/runtime/local/kernels/ConvertDenseMatrixToMemRef.h @@ -0,0 +1,38 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "mlir/ExecutionEngine/CRunnerUtils.h" +#include "runtime/local/datastructures/DenseMatrix.h" + +template +inline StridedMemRefType convertDenseMatrixToMemRef( + const DenseMatrix *input, DCTX(ctx)) { + StridedMemRefType memRef{}; + memRef.basePtr = input->getValuesSharedPtr().get(); + memRef.data = memRef.basePtr; + memRef.offset = 0; + memRef.sizes[0] = input->getNumRows(); + memRef.sizes[1] = input->getNumCols(); + + // TODO(phil): needs to be calculated for non row-major memory layouts + memRef.strides[0] = input->getNumCols(); + memRef.strides[1] = 1; + input->increaseRefCounter(); + + return memRef; +} diff --git a/src/runtime/local/kernels/ConvertMemRefToDenseMatrix.h b/src/runtime/local/kernels/ConvertMemRefToDenseMatrix.h new file mode 100644 index 000000000..96779ea70 --- /dev/null +++ b/src/runtime/local/kernels/ConvertMemRefToDenseMatrix.h @@ -0,0 +1,32 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "runtime/local/context/DaphneContext.h" +#include "runtime/local/datastructures/DenseMatrix.h" + +template +inline void convertMemRefToDenseMatrix(DenseMatrix*& result, size_t basePtr, + size_t offset, size_t size0, + size_t size1, size_t stride0, + size_t stride1, DCTX(ctx)) { + auto no_op_deleter = [](T*) {}; + T* valuePtr = reinterpret_cast(basePtr); + std::shared_ptr ptr(valuePtr, no_op_deleter); + result = DataObjectFactory::create>(size0, size1, ptr); +} + diff --git a/src/runtime/local/kernels/MatMul.h b/src/runtime/local/kernels/MatMul.h index d0cfefbeb..fd5ff9e19 100644 --- a/src/runtime/local/kernels/MatMul.h +++ b/src/runtime/local/kernels/MatMul.h @@ -51,4 +51,3 @@ void matMul(DTRes *& res, const DTLhs * lhs, const DTRhs * rhs, bool transa, boo - diff --git a/src/runtime/local/kernels/genKernelInst.py b/src/runtime/local/kernels/genKernelInst.py index fedbe87ee..d2b2c2a44 100755 --- a/src/runtime/local/kernels/genKernelInst.py +++ b/src/runtime/local/kernels/genKernelInst.py @@ -118,7 +118,10 @@ def generateKernelInstantiation(kernelTemplateInfo, templateValues, opCodes, out .replace(" **", "" if rp["isOutput"] else "_variadic") .replace(" *", "_variadic" if "isVariadic" in rp and rp["isVariadic"] else "") .replace("& ", "") - .replace("<", "_").replace(">", "") + .replace("<", "_") + .replace(">", "") + .replace(",", "_") + .replace(" ", "_") for rp in extendedRuntimeParams ]) if typesForName != "": diff --git a/src/runtime/local/kernels/kernels.json b/src/runtime/local/kernels/kernels.json index c7129ec5f..1ffd4d625 100644 --- a/src/runtime/local/kernels/kernels.json +++ b/src/runtime/local/kernels/kernels.json @@ -784,6 +784,80 @@ [] ] }, + { + "kernelTemplate": { + "header": "ConvertMemRefToDenseMatrix.h", + "opName": "convertMemRefToDenseMatrix", + "returnType": "void", + "templateParams": [ + { + "name": "VT", + "isDataType": false + } + ], + "runtimeParams": [ + { + "type": "DenseMatrix *&", + "name": "result" + }, + { + "type": "size_t", + "name": "basePtr" + }, + { + "type": "size_t", + "name": "offset" + }, + { + "type": "size_t", + "name": "size0" + }, + { + "type": "size_t", + "name": "size1" + }, + { + "type": "size_t", + "name": "stride0" + }, + { + "type": "size_t", + "name": "stride1" + } + ] + }, + "instantiations": [ + ["int64_t"], + ["uint64_t"], + ["float"], + ["double"] + ] + }, + { + "kernelTemplate": { + "header": "ConvertDenseMatrixToMemRef.h", + "opName": "convertDenseMatrixToMemRef", + "returnType": "StridedMemRefType", + "templateParams": [ + { + "name": "VT", + "isDataType": false + } + ], + "runtimeParams": [ + { + "type": "DenseMatrix *", + "name": "input" + } + ] + }, + "instantiations": [ + ["int64_t"], + ["uint64_t"], + ["float"], + ["double"] + ] + }, { "kernelTemplate": { "header": "CreateFrame.h", @@ -1086,7 +1160,7 @@ [["DenseMatrix", "double"], ["DenseMatrix", "double"], "double"], [["DenseMatrix", "int64_t"], ["DenseMatrix", "int64_t"], "int64_t"] ], - "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR"] + "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR", "BITWISE_AND"] }, { "name": ["CPP"], @@ -1097,7 +1171,7 @@ ["Frame", "Frame", "double"], ["Frame", "Frame", "int64_t"] ], - "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR"] + "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR", "BITWISE_AND"] } ] }, @@ -1143,7 +1217,7 @@ ["uint32_t", "uint32_t", "uint32_t"], ["size_t", "size_t", "size_t"] ], - "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR"] + "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR", "BITWISE_AND"] }, { "kernelTemplate": { @@ -1492,6 +1566,7 @@ [["DenseMatrix", "float"], "float"], [["DenseMatrix", "double"], "double"], [["DenseMatrix", "int64_t"], "int64_t"], + [["DenseMatrix", "uint64_t"], "uint64_t"], [["DenseMatrix", "uint8_t"], "uint8_t"]] } ] diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e266dc80c..7d63d5976 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,15 +47,22 @@ set(TEST_SOURCES api/cli/vectorized/MultiThreadedOpsTest.cpp api/cli/vectorized/VectorizedPipelineTest.cpp api/cli/Utils.cpp - + api/python/DaphneLibTest.cpp - + + api/cli/codegen/EwBinaryScalarTest.cpp + api/cli/codegen/MatMulTest.cpp + api/cli/codegen/EwOpLoopFusionTest.cpp + api/cli/codegen/AggAllTest.cpp + api/cli/codegen/MapOpTest.cpp + codegen/CodegenTest.cpp + ir/daphneir/InferTypesTest.cpp - + parser/config/ConfigParserTest.cpp - + runtime/distributed/worker/WorkerTest.cpp - + runtime/local/datastructures/CSRMatrixTest.cpp runtime/local/datastructures/DenseMatrixTest.cpp runtime/local/datastructures/FrameTest.cpp @@ -142,7 +149,7 @@ endif() add_executable(run_tests ${TEST_SOURCES}) set_target_properties(run_tests PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin) -add_dependencies(run_tests daphne daphnelib DistributedWorker) +add_dependencies(run_tests daphne daphnelib DistributedWorker daphne-opt) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) set(LIBS AllKernels ${dialect_libs} DataStructures DaphneDSLParser MLIRDaphne WorkerImpl Proto DaphneConfigParser diff --git a/test/api/cli/Utils.h b/test/api/cli/Utils.h index 3768a18cd..1c2a29891 100644 --- a/test/api/cli/Utils.h +++ b/test/api/cli/Utils.h @@ -163,6 +163,31 @@ pid_t runProgramInBackground(int &out, int &err, const char * execPath, Args ... } } +/** + * @brief Executes the "run-lit.py" python script in a directory and + * captures `stdout`, `stderr`, and the status code. + * + * "run-lit.py" is required to run the LLVM tool llvm-lit in order to + * test "*.mlir" files in the directoy using the llvm-lit command RUN: + * in each file. + * + * @param out The stream where to direct the program's standard output. + * @param err The stream where to direct the program's standard error. + * @param dirPath The path to the directory containing the "run-lit.py" script + * and the "*.mlir" test cases. + * @param args The arguments to pass in addition to the script's path. Despite + * the variadic template, each element should be of type `char *`. The last one + * does *not* need to be a null pointer. + * @return The status code returned by the process, or `-1` if it did not exit + * normally. + */ +template +int runLIT(std::stringstream &out, std::stringstream &err, std::string dirPath, + Args... args) { + return runProgram(out, err, "/bin/python3", "python3", + (dirPath + "run-lit.py").c_str(), "-v", dirPath.c_str(), + args...); +} /** * @brief Executes DAPHNE's command line interface with the given arguments and @@ -465,4 +490,4 @@ void compareDaphneToSomeRefSimple(const std::string & dirPath, const std::string */ std::string generalizeDataTypes(const std::string& str); -#endif //TEST_API_CLI_UTILS_H \ No newline at end of file +#endif //TEST_API_CLI_UTILS_H diff --git a/test/api/cli/codegen/AggAllTest.cpp b/test/api/cli/codegen/AggAllTest.cpp new file mode 100644 index 000000000..f0c383c00 --- /dev/null +++ b/test/api/cli/codegen/AggAllTest.cpp @@ -0,0 +1,33 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include "api/cli/StatusCode.h" + +const std::string dirPath = "test/api/cli/codegen/"; + +TEST_CASE("aggAll", TAG_CODEGEN) { + std::string result = "100\n"; + + compareDaphneToStr(result, dirPath + "sum_aggall.daphne"); + compareDaphneToStr(result, dirPath + "sum_aggall.daphne", "--mlir-codegen"); +} diff --git a/test/api/cli/codegen/EwBinaryScalarTest.cpp b/test/api/cli/codegen/EwBinaryScalarTest.cpp new file mode 100644 index 000000000..224d566c3 --- /dev/null +++ b/test/api/cli/codegen/EwBinaryScalarTest.cpp @@ -0,0 +1,75 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include "api/cli/StatusCode.h" + +const std::string dirPath = "test/api/cli/codegen/"; + +void test_binary_lowering(const std::string op, + const std::string kernel_call, + const std::string lowering, + const std::string result) { + std::stringstream out; + std::stringstream err; + + int status = runDaphne(out, err, "--explain", "llvm", (dirPath + op + ".daphne").c_str()); + CHECK(status == StatusCode::SUCCESS); + + CHECK_THAT(err.str(), Catch::Contains(kernel_call)); + CHECK_THAT(err.str(), !Catch::Contains(lowering)); + CHECK(out.str() == result); + + out.str(std::string()); + err.str(std::string()); + + status = runDaphne(out, err, "--explain", "llvm", "--mlir-codegen", (dirPath + op + ".daphne").c_str()); + CHECK(status == StatusCode::SUCCESS); + + CHECK_THAT(err.str(), !Catch::Contains(kernel_call)); + CHECK_THAT(err.str(), Catch::Contains(lowering)); + CHECK(out.str() == result); +} + +TEST_CASE("ewBinaryAddScalar", TAG_CODEGEN) { + test_binary_lowering("add", "llvm.call @_ewAdd__", "llvm.add", "3\n"); +} + +TEST_CASE("ewBinarySubScalar", TAG_CODEGEN) { + test_binary_lowering("sub", "llvm.call @_ewSub__", "llvm.sub", "-1\n"); +} + +TEST_CASE("ewBinaryMulScalar", TAG_CODEGEN) { + test_binary_lowering("mul", "llvm.call @_ewMul__", "llvm.mul", "2\n"); +} + +TEST_CASE("ewBinaryDivScalar", TAG_CODEGEN) { + test_binary_lowering("div", "llvm.call @_ewDiv__", "llvm.fdiv", "1.5\n"); +} + +TEST_CASE("ewBinaryPowScalar", TAG_CODEGEN) { + test_binary_lowering("pow", "llvm.call @_ewPow__", "llvm.intr.pow", "9\n"); +} + +TEST_CASE("ewBinaryAbsScalar", TAG_CODEGEN) { + test_binary_lowering("abs", "llvm.call @_ewAbs__", "llvm.intr.fabs", "4\n"); +} diff --git a/test/api/cli/codegen/EwOpLoopFusionTest.cpp b/test/api/cli/codegen/EwOpLoopFusionTest.cpp new file mode 100644 index 000000000..46f91b7cb --- /dev/null +++ b/test/api/cli/codegen/EwOpLoopFusionTest.cpp @@ -0,0 +1,42 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include "api/cli/StatusCode.h" + +const std::string dirPath = "test/api/cli/codegen/"; + +TEST_CASE("ewloopfusion", TAG_CODEGEN) { + std::string result = + "DenseMatrix(2x2, double)\n" + "8 8\n" + "8 8\n" + "DenseMatrix(2x2, double)\n" + "10 10\n" + "10 10\n" + "DenseMatrix(2x2, double)\n" + "9 9\n" + "9 9\n"; + + compareDaphneToStr(result, dirPath + "fusion.daphne"); + compareDaphneToStr(result, dirPath + "fusion.daphne", "--mlir-codegen"); +} diff --git a/test/api/cli/codegen/MapOpTest.cpp b/test/api/cli/codegen/MapOpTest.cpp new file mode 100644 index 000000000..a7ccf56d0 --- /dev/null +++ b/test/api/cli/codegen/MapOpTest.cpp @@ -0,0 +1,37 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include "api/cli/StatusCode.h" + +const std::string dirPath = "test/api/cli/codegen/"; + +TEST_CASE("mapOp", TAG_CODEGEN) { + std::string result = + "DenseMatrix(2x2, double)\n" + "2.1 1\n" + "6.5 -1.2\n"; + + compareDaphneToStr(result, dirPath + "map.daphne"); + compareDaphneToStr(result, dirPath + "map.daphne", "--mlir-codegen", "--no-obj-ref-mgnt"); +} + diff --git a/test/api/cli/codegen/MatMulTest.cpp b/test/api/cli/codegen/MatMulTest.cpp new file mode 100644 index 000000000..6ae2f324a --- /dev/null +++ b/test/api/cli/codegen/MatMulTest.cpp @@ -0,0 +1,49 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include "api/cli/StatusCode.h" + +const std::string dirPath = "test/api/cli/codegen/"; + +TEST_CASE("matmul", TAG_CODEGEN) { + std::string result = + "DenseMatrix(3x3, double)\n" + "45 45 45\n" + "45 45 45\n" + "45 45 45\n"; + + compareDaphneToStr(result, dirPath + "matmul.daphne"); + compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen"); +} + + +TEST_CASE("matvec", TAG_CODEGEN) { + std::string result = + "DenseMatrix(3x1, double)\n" + "45\n" + "45\n" + "45\n"; + + compareDaphneToStr(result, dirPath + "matvec.daphne"); + compareDaphneToStr(result, dirPath + "matvec.daphne", "--mlir-codegen"); +} diff --git a/test/api/cli/codegen/abs.daphne b/test/api/cli/codegen/abs.daphne new file mode 100644 index 000000000..1cf002ecc --- /dev/null +++ b/test/api/cli/codegen/abs.daphne @@ -0,0 +1,7 @@ +// Performs a Abs. Used to compare precompiled kernel with codegen. Value +// extracted as scalar to avoid it being optimized out of the calculation with +// constant folding or similar. + +X = [1.0, -2.0, -4.0]; +a = as.scalar(X[2:3, 0:1]); +print(abs(a)); diff --git a/test/api/cli/codegen/add.daphne b/test/api/cli/codegen/add.daphne new file mode 100644 index 000000000..fd33984cc --- /dev/null +++ b/test/api/cli/codegen/add.daphne @@ -0,0 +1,8 @@ +// Performs an AddOp. Used to compare precompiled kernel with codegen. +// Values extracted as scalar to avoid them being optimized out of +// the calculation with constant folding or similar. + +X = [1, 2, 3]; +a = as.scalar(X[0:1, 0:1]); +b = as.scalar(X[1:2, 0:1]); +print(a + b); diff --git a/test/api/cli/codegen/div.daphne b/test/api/cli/codegen/div.daphne new file mode 100644 index 000000000..a934b84b7 --- /dev/null +++ b/test/api/cli/codegen/div.daphne @@ -0,0 +1,8 @@ +// Performs a DivOp. Used to compare precompiled kernel with codegen. Values +// extracted as scalar to avoid them being optimized out of the calculation +// with constant folding or similar. + +X = [1.0, 2.0, 3.0]; +a = as.scalar(X[2:3, 0:1]); +b = as.scalar(X[1:2, 0:1]); +print(a / b); diff --git a/test/api/cli/codegen/fusion.daphne b/test/api/cli/codegen/fusion.daphne new file mode 100644 index 000000000..e4b81f68e --- /dev/null +++ b/test/api/cli/codegen/fusion.daphne @@ -0,0 +1,11 @@ +// Performs loop fusion on multiple EwBinaryOps. Used to compare precompiled +// kernel with codegen. + +X = fill(4.0, 2, 2); +X = X * 2.0; +Y = X + 2.0; +Z = X + 1.0; + +print(X); +print(Y); +print(Z); diff --git a/test/api/cli/codegen/log.daphne b/test/api/cli/codegen/log.daphne new file mode 100644 index 000000000..b9f86d44b --- /dev/null +++ b/test/api/cli/codegen/log.daphne @@ -0,0 +1,8 @@ +// Performs a LogOp. Used to compare precompiled kernel with codegen. Values +// extracted as scalar to avoid them being optimized out of the calculation +// with constant folding or similar. + +X = [1, 2, 3]; +a = as.scalar(X[0:1, 0:1]); +b = as.scalar(X[1:2, 0:1]); +print(ln(a, b)); diff --git a/test/api/cli/codegen/map.daphne b/test/api/cli/codegen/map.daphne new file mode 100644 index 000000000..6c9d203eb --- /dev/null +++ b/test/api/cli/codegen/map.daphne @@ -0,0 +1,10 @@ +// Performs a MapOp with the UDF `increment`. Used to compare precompiled +// kernel with codegen. + +def increment(x) { + return x + 1; +} + +X = reshape([1.1, 0.0, 5.5, -2.2], 2, 2); + +print(map(X, increment)); diff --git a/test/api/cli/codegen/matmul.daphne b/test/api/cli/codegen/matmul.daphne new file mode 100644 index 000000000..af5b46ae9 --- /dev/null +++ b/test/api/cli/codegen/matmul.daphne @@ -0,0 +1,9 @@ +// Performs a MatMulOp. Used to compare precompiled kernel with codegen. + +N = 3; +A = fill(5.0, N, N); +B = fill(3.0, N, N); + +C = A@B; + +print(C); // for small matrices diff --git a/test/api/cli/codegen/matvec.daphne b/test/api/cli/codegen/matvec.daphne new file mode 100644 index 000000000..7aba59805 --- /dev/null +++ b/test/api/cli/codegen/matvec.daphne @@ -0,0 +1,9 @@ +// Performs a MatMulOp. Used to compare precompiled kernel with codegen. + +N = 3; +A = fill(5.0, N, N); +B = fill(3.0, N, 1); + +C = A@B; + +print(C); // for small matrices diff --git a/test/api/cli/codegen/mul.daphne b/test/api/cli/codegen/mul.daphne new file mode 100644 index 000000000..17ea31d5c --- /dev/null +++ b/test/api/cli/codegen/mul.daphne @@ -0,0 +1,8 @@ +// Performs a MulOp. Used to compare precompiled kernel with codegen. Values +// extracted as scalar to avoid them being optimized out of the calculation +// with constant folding or similar. + +X = [1, 2, 3]; +a = as.scalar(X[0:1, 0:1]); +b = as.scalar(X[1:2, 0:1]); +print(a * b); diff --git a/test/api/cli/codegen/pow.daphne b/test/api/cli/codegen/pow.daphne new file mode 100644 index 000000000..ff13b1b23 --- /dev/null +++ b/test/api/cli/codegen/pow.daphne @@ -0,0 +1,8 @@ +// Performs a PowOp. Used to compare precompiled kernel with codegen. Values +// extracted as scalar to avoid them being optimized out of the calculation +// with constant folding or similar. + +X = [1.0, 2.0, 3.0]; +a = as.scalar(X[2:3, 0:1]); +b = as.scalar(X[1:2, 0:1]); +print(pow(a, b)); diff --git a/test/api/cli/codegen/sub.daphne b/test/api/cli/codegen/sub.daphne new file mode 100644 index 000000000..a230024c1 --- /dev/null +++ b/test/api/cli/codegen/sub.daphne @@ -0,0 +1,8 @@ +// Compare precompiled kernel with codegen generated for the SubOp. Value +// extracted as scalar to avoid it being optimizedd out of the calculation with +// constant folding or similar. + +X = [1, 2, 3]; +a = as.scalar(X[0:1, 0:1]); +b = as.scalar(X[1:2, 0:1]); +print(a - b); diff --git a/test/api/cli/codegen/sum_aggall.daphne b/test/api/cli/codegen/sum_aggall.daphne new file mode 100644 index 000000000..77578e7c6 --- /dev/null +++ b/test/api/cli/codegen/sum_aggall.daphne @@ -0,0 +1,5 @@ +// Compare precompiled kernel with codegen generated for the AggAllOp. + +X = fill(1.0, 10, 10); +a = sum(X); +print(a); diff --git a/test/codegen/.gitignore b/test/codegen/.gitignore new file mode 100644 index 000000000..a6e9662a3 --- /dev/null +++ b/test/codegen/.gitignore @@ -0,0 +1,2 @@ +Output/** +.lit_test_times.txt diff --git a/test/codegen/CodegenTest.cpp b/test/codegen/CodegenTest.cpp new file mode 100644 index 000000000..fce4d03c9 --- /dev/null +++ b/test/codegen/CodegenTest.cpp @@ -0,0 +1,40 @@ +/* + * Copyright 2023 The DAPHNE Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "run_tests.h" + +#include "api/cli/StatusCode.h" +#include "api/cli/Utils.h" + +#include + +const std::string dirPath = "test/codegen/"; + +// Place all test files with FileCheck directives in the dirPath. +// LIT will test all *.mlir files in the directory. +TEST_CASE("codegen", TAG_CODEGEN) { + std::stringstream out; + std::stringstream err; + + int status = runLIT(out, err, dirPath); + +#ifndef NDEBUG + spdlog::info("runLIT return status: " + std::to_string(status)); + spdlog::info("runLIT out:\n" + out.str()); + spdlog::info("runLIT err:\n" + err.str()); +#endif + CHECK(status == StatusCode::SUCCESS); +} diff --git a/test/codegen/daphne_opt.mlir b/test/codegen/daphne_opt.mlir new file mode 100644 index 000000000..25a02d813 --- /dev/null +++ b/test/codegen/daphne_opt.mlir @@ -0,0 +1,18 @@ +// RUN: daphne-opt --opt-daphne %s | FileCheck %s + +module { + func.func @main() { + %0 = "daphne.constant"() {value = 2 : ui64} : () -> ui64 + %1 = "daphne.constant"() {value = 2 : index} : () -> index + %2 = "daphne.constant"() {value = 4 : ui64} : () -> ui64 + %3 = "daphne.constant"() {value = false} : () -> i1 + %4 = "daphne.constant"() {value = true} : () -> i1 + %5 = "daphne.fill"(%2, %1, %1) : (ui64, index, index) -> !daphne.Matrix<2x2xui64> + // CHECK-NOT: daphne.ewMod + // CHECK: daphne.ewSub + // CHECK-NEXT: daphne.ewBitwiseAnd + %6 = "daphne.ewMod"(%5, %0) : (!daphne.Matrix<2x2xui64>, ui64) -> !daphne.Matrix<2x2xui64> + "daphne.print"(%6, %4, %3) : (!daphne.Matrix<2x2xui64>, i1, i1) -> () + "daphne.return"() : () -> () + } +} diff --git a/test/codegen/ew.mlir b/test/codegen/ew.mlir new file mode 100644 index 000000000..d94cf32cd --- /dev/null +++ b/test/codegen/ew.mlir @@ -0,0 +1,105 @@ +// RUN: daphne-opt --lower-ew %s | FileCheck %s + +func.func @add() { + %0 = "daphne.constant"() {value = 2 : index} : () -> index + %1 = "daphne.constant"() {value = false} : () -> i1 + %2 = "daphne.constant"() {value = true} : () -> i1 + %3 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64 + %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64> + // CHECK-NOT: daphne.ewAdd + // CHECK: arith.addf + %5 = "daphne.ewAdd"(%4, %4) : (!daphne.Matrix<2x2xf64>, !daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64> + "daphne.print"(%5, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> () + "daphne.return"() : () -> () +} + +func.func @sub() { + %0 = "daphne.constant"() {value = 2 : index} : () -> index + %1 = "daphne.constant"() {value = false} : () -> i1 + %2 = "daphne.constant"() {value = true} : () -> i1 + %3 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64 + %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64> + // CHECK-NOT: daphne.ewSub + // CHECK: arith.subf + %5 = "daphne.ewSub"(%4, %4) : (!daphne.Matrix<2x2xf64>, !daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64> + "daphne.print"(%5, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> () + "daphne.return"() : () -> () +} + +func.func @mul() { + %0 = "daphne.constant"() {value = 2 : index} : () -> index + %1 = "daphne.constant"() {value = false} : () -> i1 + %2 = "daphne.constant"() {value = true} : () -> i1 + %3 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64 + %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64> + // CHECK-NOT: daphne.ewMul + // CHECK: arith.mulf + %5 = "daphne.ewMul"(%4, %4) : (!daphne.Matrix<2x2xf64>, !daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64> + "daphne.print"(%5, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> () + "daphne.return"() : () -> () +} + +func.func @div() { + %0 = "daphne.constant"() {value = 2 : index} : () -> index + %1 = "daphne.constant"() {value = false} : () -> i1 + %2 = "daphne.constant"() {value = true} : () -> i1 + %3 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64 + %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64> + // CHECK-NOT: daphne.ewDiv + // CHECK: arith.divf + %5 = "daphne.ewDiv"(%4, %4) : (!daphne.Matrix<2x2xf64>, !daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64> + "daphne.print"(%5, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> () + "daphne.return"() : () -> () +} + +func.func @sqrt() { + %0 = "daphne.constant"() {value = 0 : index} : () -> index + %1 = "daphne.constant"() {value = 1 : index} : () -> index + %2 = "daphne.constant"() {value = 2 : index} : () -> index + %3 = "daphne.constant"() {value = false} : () -> i1 + %4 = "daphne.constant"() {value = true} : () -> i1 + %5 = "daphne.constant"() {value = 4 : si64} : () -> si64 + %6 = "daphne.fill"(%5, %2, %2) : (si64, index, index) -> !daphne.Matrix<2x2xsi64> + %7 = "daphne.sliceRow"(%6, %0, %1) : (!daphne.Matrix<2x2xsi64>, index, index) -> !daphne.Matrix + %8 = "daphne.sliceCol"(%7, %0, %1) : (!daphne.Matrix, index, index) -> !daphne.Matrix + %9 = "daphne.sliceRow"(%6, %0, %1) : (!daphne.Matrix<2x2xsi64>, index, index) -> !daphne.Matrix + %10 = "daphne.sliceCol"(%9, %0, %1) : (!daphne.Matrix, index, index) -> !daphne.Matrix + %11 = "daphne.cast"(%10) : (!daphne.Matrix) -> si64 + %12 = "daphne.cast"(%11) : (si64) -> f64 + // CHECK-NOT: daphne.ewSqrt + // CHECK: math.sqrt + %13 = "daphne.ewSqrt"(%12) : (f64) -> f64 + "daphne.print"(%13, %4, %3) : (f64, i1, i1) -> () + "daphne.return"() : () -> () +} + +func.func @abs() { + %0 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64 + %3 = "daphne.constant"() {value = false} : () -> i1 + %4 = "daphne.constant"() {value = true} : () -> i1 + // CHECK-NOT: daphne.ewAbs + // CHECK: math.absf + %12 = "daphne.ewAbs"(%0) : (f64) -> f64 + "daphne.print"(%12, %4, %3) : (f64, i1, i1) -> () + "daphne.return"() : () -> () +} + +func.func @pow() { + %0 = "daphne.constant"() {value = 0 : index} : () -> index + %1 = "daphne.constant"() {value = 1 : index} : () -> index + %2 = "daphne.constant"() {value = 2 : index} : () -> index + %3 = "daphne.constant"() {value = false} : () -> i1 + %4 = "daphne.constant"() {value = true} : () -> i1 + %5 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64 + %6 = "daphne.fill"(%5, %2, %2) : (f64, index, index) -> !daphne.Matrix<2x2xf64> + %7 = "daphne.sliceRow"(%6, %0, %1) : (!daphne.Matrix<2x2xf64>, index, index) -> !daphne.Matrix + %8 = "daphne.sliceCol"(%7, %0, %1) : (!daphne.Matrix, index, index) -> !daphne.Matrix + %9 = "daphne.sliceRow"(%6, %0, %1) : (!daphne.Matrix<2x2xf64>, index, index) -> !daphne.Matrix + %10 = "daphne.sliceCol"(%9, %0, %1) : (!daphne.Matrix, index, index) -> !daphne.Matrix + %11 = "daphne.cast"(%10) : (!daphne.Matrix) -> f64 + // CHECK-NOT: daphne.ewPow + // CHECK: math.powf + %12 = "daphne.ewPow"(%11, %11) : (f64, f64) -> f64 + "daphne.print"(%12, %4, %3) : (f64, i1, i1) -> () + "daphne.return"() : () -> () +} diff --git a/test/codegen/fusion.mlir b/test/codegen/fusion.mlir new file mode 100644 index 000000000..43107ad22 --- /dev/null +++ b/test/codegen/fusion.mlir @@ -0,0 +1,29 @@ +// RUN: daphne-opt -pass-pipeline="builtin.module(lower-ew, canonicalize, func.func(affine-loop-fusion))" %s | FileCheck %s"""" + +func.func @main() { + %0 = "daphne.constant"() {value = 2 : index} : () -> index + %1 = "daphne.constant"() {value = false} : () -> i1 + %2 = "daphne.constant"() {value = true} : () -> i1 + %3 = "daphne.constant"() {value = 1.000000e+00 : f64} : () -> f64 + %4 = "daphne.constant"() {value = 2.000000e+00 : f64} : () -> f64 + %5 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64 + %6 = "daphne.fill"(%5, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64> + // CHECK: affine.for + // CHECK-NEXT: affine.for + // CHECK-NEXT: affine.load + // CHECK-NEXT: arith.mulf + // CHECK-NEXT: affine.store + // CHECK-NEXT: affine.load + // CHECK-NEXT: arith.addf + // CHECK-NEXT: affine.store + // CHECK-NEXT: affine.load + // CHECK-NEXT: arith.addf + // CHECK-NEXT: affine.store + %7 = "daphne.ewMul"(%6, %4) : (!daphne.Matrix<2x2xf64>, f64) -> !daphne.Matrix<2x2xf64> + %8 = "daphne.ewAdd"(%7, %4) : (!daphne.Matrix<2x2xf64>, f64) -> !daphne.Matrix<2x2xf64> + %9 = "daphne.ewAdd"(%7, %3) : (!daphne.Matrix<2x2xf64>, f64) -> !daphne.Matrix<2x2xf64> + "daphne.print"(%7, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> () + "daphne.print"(%8, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> () + "daphne.print"(%9, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> () + "daphne.return"() : () -> () +} diff --git a/test/codegen/lit.cfg b/test/codegen/lit.cfg new file mode 100644 index 000000000..fab502252 --- /dev/null +++ b/test/codegen/lit.cfg @@ -0,0 +1,17 @@ +import lit.formats +import os + +config.name = "DAPHNE LIT config" +config.test_format = lit.formats.ShTest(True) + +config.suffixes = [".mlir"] + +config.test_source_root = os.path.dirname(__file__) + +config.environment["PATH"] = os.path.pathsep.join( + ( + os.path.abspath("bin/"), + os.path.abspath("thirdparty/build/llvm-project/bin/"), + config.environment["PATH"], + ) +) diff --git a/test/codegen/mapop.mlir b/test/codegen/mapop.mlir new file mode 100644 index 000000000..ff8825989 --- /dev/null +++ b/test/codegen/mapop.mlir @@ -0,0 +1,26 @@ +// RUN: daphne-opt --lower-map --inline %s | FileCheck %s + +module { + func.func @"increment-1-1"(%arg0: f64) -> f64 { + %0 = "daphne.ewExp"(%arg0) : (f64) -> f64 + "daphne.return"(%0) : (f64) -> () + } + func.func @main() { + %0 = "daphne.constant"() {value = 2 : index} : () -> index + %1 = "daphne.constant"() {value = false} : () -> i1 + %2 = "daphne.constant"() {value = true} : () -> i1 + %3 = "daphne.constant"() {value = 93985655361872 : ui64} : () -> ui64 + %4 = "daphne.matrixConstant"(%3) : (ui64) -> !daphne.Matrix + %5 = "daphne.reshape"(%4, %0, %0) : (!daphne.Matrix, index, index) -> !daphne.Matrix<2x2xf64> + // CHECK-NOT: daphne.map + // CHECK: {{.*}}"daphne.convertDenseMatrixToMemRef"{{.*}} + // CHECK: affine.for + // CHECK-NEXT: affine.for + // CHECK-NOT: func.call + // CHECK: affine.load + // CHECK-NEXT: daphne.ewExp + %6 = "daphne.map"(%5) {func = "increment-1-1"} : (!daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64> + "daphne.print"(%6, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> () + "daphne.return"() : () -> () + } +} diff --git a/test/codegen/matmul.mlir b/test/codegen/matmul.mlir new file mode 100644 index 000000000..6f3672be5 --- /dev/null +++ b/test/codegen/matmul.mlir @@ -0,0 +1,32 @@ +// RUN: daphne-opt --lower-mm %s | FileCheck %s + +module { + func.func @main() { + // CHECK: {{.*}}memref.alloc + %0 = "daphne.constant"() {value = 10 : index} : () -> index + %1 = "daphne.constant"() {value = false} : () -> i1 + %2 = "daphne.constant"() {value = 3.000000e+00 : f64} : () -> f64 + %3 = "daphne.constant"() {value = 5.000000e+00 : f64} : () -> f64 + %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<10x10xf64> + %5 = "daphne.fill"(%2, %0, %0) : (f64, index, index) -> !daphne.Matrix<10x10xf64> + // CHECK: {{.*}}"daphne.convertDenseMatrixToMemRef"{{.*}} + // CHECK-NEXT: {{.*}}"daphne.convertDenseMatrixToMemRef"{{.*}} + + // Initialize alloced memref to 0 + // CHECK: affine.for + // CHECK-NEXT: {{ *}}affine.for + // CHECK-NEXT: {{ *}}affine.store + + // MatMul + // CHECK: affine.for + // CHECK-NEXT: affine.for + // CHECK-NEXT: affine.for + // CHECK-NEXT: {{.*}}memref.load + // CHECK-NEXT: {{.*}}memref.load + // CHECK-NEXT: {{.*}}memref.load + // CHECK-NEXT: {{.*}}llvm.intr.fma + // CHECK-NEXT: {{.*}}memref.store + %6 = "daphne.matMul"(%4, %5, %1, %1) : (!daphne.Matrix<10x10xf64>, !daphne.Matrix<10x10xf64>, i1, i1) -> !daphne.Matrix<10x10xf64> + "daphne.return"() : () -> () + } +} diff --git a/test/codegen/run-lit.py b/test/codegen/run-lit.py new file mode 100644 index 000000000..39898435c --- /dev/null +++ b/test/codegen/run-lit.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python + +from lit.main import main +main() diff --git a/test/codegen/sum_agg.mlir b/test/codegen/sum_agg.mlir new file mode 100644 index 000000000..d0df6ea7e --- /dev/null +++ b/test/codegen/sum_agg.mlir @@ -0,0 +1,26 @@ +// RUN: daphne-opt --lower-agg %s | FileCheck %s + +module { + func.func @main() { + %0 = "daphne.constant"() {value = true} : () -> i1 + %1 = "daphne.constant"() {value = 10 : index} : () -> index + %2 = "daphne.constant"() {value = 1000000 : si64} : () -> si64 + %3 = "daphne.constant"() {value = false} : () -> i1 + %4 = "daphne.constant"() {value = 1.000000e+00 : f64} : () -> f64 + %5 = "daphne.fill"(%4, %1, %1) : (f64, index, index) -> !daphne.Matrix<10x10xf64> + %6 = "daphne.now"() : () -> si64 + // CHECK-NOT: sumAll + // CHECK: {{.*}}"daphne.convertDenseMatrixToMemRef"{{.*}} + // CHECK: affine.for + // CHECK-NEXT: arith.constant + // CHECK-NEXT: affine.for + // CHECK-NEXT: memref.load + %7 = "daphne.sumAll"(%5) : (!daphne.Matrix<10x10xf64>) -> f64 + %8 = "daphne.now"() : () -> si64 + "daphne.print"(%7, %0, %3) : (f64, i1, i1) -> () + %9 = "daphne.ewSub"(%8, %6) : (si64, si64) -> si64 + %10 = "daphne.ewDiv"(%9, %2) : (si64, si64) -> si64 + "daphne.print"(%10, %0, %3) : (si64, i1, i1) -> () + "daphne.return"() : () -> () + } +} diff --git a/test/tags.h b/test/tags.h index 9d977a61c..14c490cbc 100644 --- a/test/tags.h +++ b/test/tags.h @@ -24,6 +24,7 @@ #define TAG_ALGORITHMS "[algorithms]" #define TAG_CAST "[cast]" +#define TAG_CODEGEN "[codegen]" #define TAG_CONFIG "[config]" #define TAG_CONTROLFLOW "[controlflow]" #define TAG_DATASTRUCTURES "[datastructures]"