Merge remote-tracking branch 'upstream/main' into LDEProject

daphne-eu · Jan 21, 2025 · 77938fb · 77938fb
2 parents 99125d4 + ab157d9
commit 77938fb
Show file tree

Hide file tree

Showing 133 changed files with 1,884 additions and 480 deletions.
diff --git a/UserConfig.json b/UserConfig.json
@@ -24,6 +24,9 @@
     "explain_vectorized": false,
     "explain_obj_ref_mgnt": false,
     "explain_mlir_codegen": false,
+    "explain_mlir_codegen_sparsity_exploiting_op_fusion": false,
+    "explain_mlir_codegen_daphneir_to_mlir": false,
+    "explain_mlir_codegen_mlir_specific": false,
     "taskPartitioningScheme": "STATIC",
     "numberOfThreads": -1,
     "minimumTaskSize": 1,

diff --git a/doc/DaphneDSL/Builtins.md b/doc/DaphneDSL/Builtins.md
@@ -490,23 +490,39 @@ We will support set operations such as **`intersect`**, **`merge`**, and **`exce
 
 - **`cartesian`**`(lhs:frame, rhs:frame)`
 
-    Calculates the cartesian (cross) product of the two input frames.
+    Calculates the cartesian product of the two input frames.
 
-- **`innerJoin`**`(lhs:frame, rhs:frame, lhsOn:str, rhsOn:str)`
+- **`innerJoin`**`(lhs:frame, rhs:frame, lhsOn:str, rhsOn:str[, numRowRes:si64])`
 
     Performs an inner join of the two input frames on `lhs`.`lhsOn` == `rhs`.`rhsOn`.
 
-- **`semiJoin`**`(lhs:frame, rhs:frame, lhsOn:str, rhsOn:str)`
+    The parameter `numRowRes` is an optional hint for an upper bound of the number or result rows.
+    If specified, it determines the number of rows that will be allocated for the result, whereby `-1` stands for an automatically chosen size.
+    Otherwise, it defaults to `-1`.
+
+- **`semiJoin`**`(lhs:frame, rhs:frame, lhsOn:str, rhsOn:str[, numRowRes:si64])`
 
     Performs a semi join of the two input frames on `lhs`.`lhsOn` == `rhs`.`rhsOn`.
     Returns only the columns belonging to `lhs`.
+
+    The parameter `numRowRes` is an optional hint for an upper bound of the number or result rows.
+    If specified, it determines the number of rows that will be allocated for the result, whereby `-1` stands for an automatically chosen size.
+    Otherwise, it defaults to `-1`.
 
 - **`groupJoin`**`(lhs:frame, rhs:frame, lhsOn:str, rhsOn:str, rhsAgg:str)`
 
     Group-join of `lhs` and `rhs` on `lhs.lhsOn == rhs.rhsOn` with summation of `rhs.rhsAgg`.
 
 We will support more variants of joins, including (left/right) outer joins, theta joins, anti-joins, etc.
 
+### Grouping and aggregation
+
+- **`groupSum`**`(arg:frame, grpColNames:str[, grpColNames, ...], sumColName:str)`
+
+    Groups the rows in the given frame `arg` by the specified columns `grpColNames` (at least one column) and calculates the per-group sum of the column denoted by `sumColName`.
+
+    *This built-in function is currently limited in terms of functionality (aggregation only on a single column, sum as the only aggregation function). It will be extended in the future. Meanwhile, consider using DAPHNE's `sql()` built-in function for more comprehensive grouping and aggregation support.*
+
 ### Frame label manipulation
 
 - **`setColLabels`**`(arg:frame, labels:str, ...)`

diff --git a/doc/DaphneDSL/Imports.md b/doc/DaphneDSL/Imports.md
@@ -40,6 +40,8 @@ print(utils.x);
     }
 ```
 
+NOTE: to use a user config, the json file path needs to be passed as CLI arg to the DAPHNE binary `daphne --config=<filename>`
+
 NOTE: `default_dirs` can hold many paths and it will look for the **one** specified file in each, whereas any other library names have a list consisting of **one** directory,  from which **all** files will be imported (can be easily extended to multiple directories).
 
 Example:

diff --git a/doc/development/BuildingDaphne.md b/doc/development/BuildingDaphne.md
@@ -132,6 +132,10 @@ All possible options for the build script:
 
 ---
 
+## Building on WSL
+
+When using Windows Subsystems for Linux (WSL), the default memory limit for WSL is 50% of the total memory of the underlying Windows host. This can lead to build fails due to SIGKILL for DAPHNE builds. [Advanced settings configuration in WSL](https://learn.microsoft.com/en-us/windows/wsl/wsl-config) describes how the memory limit can be configured.
+
 ## Extension
 
 ### Overview over the build script

diff --git a/doc/tutorial/sqlTutorial.md b/doc/tutorial/sqlTutorial.md
@@ -56,7 +56,7 @@ Other features we do and don't support right now can be found below.
 
 ### Supported Features
 
-* Cross Product
+* SQL Cross Product (Cartesian Product)
 * Complex Where Clauses
 * Inner Join with single and multiple join conditions separated by an "AND" Operator
 * Group By Clauses

diff --git a/src/api/cli/DaphneUserConfig.h b/src/api/cli/DaphneUserConfig.h
@@ -74,6 +74,9 @@ struct DaphneUserConfig {
     bool explain_vectorized = false;
     bool explain_obj_ref_mgnt = false;
     bool explain_mlir_codegen = false;
+    bool explain_mlir_codegen_sparsity_exploiting_op_fusion = false;
+    bool explain_mlir_codegen_daphneir_to_mlir = false;
+    bool explain_mlir_codegen_mlir_specific = false;
     bool statistics = false;
 
     bool force_cuda = false;

diff --git a/src/api/internal/daphne_internal.cpp b/src/api/internal/daphne_internal.cpp
@@ -287,26 +287,34 @@ int startDAPHNE(int argc, const char **argv, DaphneLibResult *daphneLibRes, int
         type_adaptation,
         vectorized,
         obj_ref_mgnt,
-        mlir_codegen
+        mlir_codegen,
+        mlir_codegen_sparsity_exploiting_op_fusion,
+        mlir_codegen_daphneir_to_mlir,
+        mlir_codegen_mlir_specific
     };
 
     static llvm::cl::list<ExplainArgs> explainArgList(
         "explain", cat(daphneOptions),
         llvm::cl::desc("Show DaphneIR after certain compiler passes (separate "
                        "multiple values by comma, the order is irrelevant)"),
-        llvm::cl::values(clEnumVal(parsing, "Show DaphneIR after parsing"),
-                         clEnumVal(parsing_simplified, "Show DaphneIR after parsing and some simplifications"),
-                         clEnumVal(sql, "Show DaphneIR after SQL parsing"),
-                         clEnumVal(property_inference, "Show DaphneIR after property inference"),
-                         clEnumVal(select_matrix_repr, "Show DaphneIR after selecting "
-                                                       "physical matrix representations"),
-                         clEnumVal(phy_op_selection, "Show DaphneIR after selecting physical operators"),
-                         clEnumVal(type_adaptation, "Show DaphneIR after adapting types to available kernels"),
-                         clEnumVal(vectorized, "Show DaphneIR after vectorization"),
-                         clEnumVal(obj_ref_mgnt, "Show DaphneIR after managing object references"),
-                         clEnumVal(kernels, "Show DaphneIR after kernel lowering"),
-                         clEnumVal(llvm, "Show DaphneIR after llvm lowering"),
-                         clEnumVal(mlir_codegen, "Show DaphneIR after MLIR codegen")),
+        llvm::cl::values(
+            clEnumVal(parsing, "Show DaphneIR after parsing"),
+            clEnumVal(parsing_simplified, "Show DaphneIR after parsing and some simplifications"),
+            clEnumVal(sql, "Show DaphneIR after SQL parsing"),
+            clEnumVal(property_inference, "Show DaphneIR after property inference"),
+            clEnumVal(select_matrix_repr, "Show DaphneIR after selecting "
+                                          "physical matrix representations"),
+            clEnumVal(phy_op_selection, "Show DaphneIR after selecting physical operators"),
+            clEnumVal(type_adaptation, "Show DaphneIR after adapting types to available kernels"),
+            clEnumVal(vectorized, "Show DaphneIR after vectorization"),
+            clEnumVal(obj_ref_mgnt, "Show DaphneIR after managing object references"),
+            clEnumVal(kernels, "Show DaphneIR after kernel lowering"),
+            clEnumVal(mlir_codegen, "Show DaphneIR after MLIR codegen"),
+            clEnumVal(mlir_codegen_sparsity_exploiting_op_fusion,
+                      "Show DaphneIR after MLIR codegen (sparsity-exploiting operator fusion)"),
+            clEnumVal(mlir_codegen_daphneir_to_mlir, "Show DaphneIR after MLIR codegen (DaphneIR to MLIR)"),
+            clEnumVal(mlir_codegen_mlir_specific, "Show DaphneIR after MLIR codegen (MLIR-specific)"),
+            clEnumVal(llvm, "Show DaphneIR after llvm lowering")),
         CommaSeparated);
 
     static llvm::cl::list<string> scriptArgs1("args", cat(daphneOptions),
@@ -479,6 +487,15 @@ int startDAPHNE(int argc, const char **argv, DaphneLibResult *daphneLibRes, int
         case mlir_codegen:
             user_config.explain_mlir_codegen = true;
             break;
+        case mlir_codegen_sparsity_exploiting_op_fusion:
+            user_config.explain_mlir_codegen_sparsity_exploiting_op_fusion = true;
+            break;
+        case mlir_codegen_daphneir_to_mlir:
+            user_config.explain_mlir_codegen_daphneir_to_mlir = true;
+            break;
+        case mlir_codegen_mlir_specific:
+            user_config.explain_mlir_codegen_mlir_specific = true;
+            break;
         }
     }
 

diff --git a/src/compiler/execution/DaphneIrExecutor.cpp b/src/compiler/execution/DaphneIrExecutor.cpp
@@ -262,6 +262,14 @@ void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) {
         pm.addPass(mlir::daphne::createPrintIRPass("IR before codegen pipeline"));
 
     pm.addPass(mlir::daphne::createDaphneOptPass());
+
+    pm.addPass(mlir::daphne::createSparsityExploitationPass());
+    // SparseExploit fuses multiple operations which only need to be lowered if still needed elsewhere.
+    // Todo: if possible, run only if SparseExploitLowering was successful.
+    pm.addPass(mlir::createCanonicalizerPass());
+    if (userConfig_.explain_mlir_codegen_sparsity_exploiting_op_fusion)
+        pm.addPass(mlir::daphne::createPrintIRPass("IR after MLIR codegen (sparsity-exploiting operator fusion):"));
+
     pm.addPass(mlir::daphne::createEwOpLoweringPass());
     pm.addPass(mlir::daphne::createAggAllOpLoweringPass());
     pm.addPass(mlir::daphne::createAggDimOpLoweringPass());
@@ -287,6 +295,9 @@ void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) {
             pm.addPass(mlir::daphne::createPrintIRPass("IR directly after lowering MatMulOp."));
     }
 
+    if (userConfig_.explain_mlir_codegen_daphneir_to_mlir)
+        pm.addPass(mlir::daphne::createPrintIRPass("IR after MLIR codegen (DaphneIR to MLIR):"));
+
     pm.addPass(mlir::createConvertMathToLLVMPass());
     pm.addPass(mlir::daphne::createModOpLoweringPass());
     pm.addPass(mlir::createCanonicalizerPass());
@@ -307,4 +318,6 @@ void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) {
 
     if (userConfig_.explain_mlir_codegen)
         pm.addPass(mlir::daphne::createPrintIRPass("IR after codegen pipeline"));
+    if (userConfig_.explain_mlir_codegen_mlir_specific)
+        pm.addPass(mlir::daphne::createPrintIRPass("IR after MLIR codegen (MLIR-specific):"));
 }
diff --git a/src/compiler/inference/SelectMatrixRepresentationsPass.cpp b/src/compiler/inference/SelectMatrixRepresentationsPass.cpp
@@ -23,7 +23,6 @@
 #include <mlir/Pass/Pass.h>
 
 #include <memory>
-#include <stdexcept>
 
 using namespace mlir;
 

diff --git a/src/compiler/lowering/AggAllOpLowering.cpp b/src/compiler/lowering/AggAllOpLowering.cpp
@@ -18,6 +18,7 @@
 #include <utility>
 
 #include "compiler/utils/LoweringUtils.h"
+#include <util/ErrorHandler.h>
 
 #include "ir/daphneir/Daphne.h"
 #include "ir/daphneir/Passes.h"
@@ -90,8 +91,9 @@ class AggAllOpLowering : public OpConversionPattern<AggOp> {
         ssize_t numCols = matrixType.getNumCols();
 
         if (numRows < 0 || numCols < 0) {
-            return rewriter.notifyMatchFailure(
-                op, "aggAllOp codegen currently only works with matrix dimensions that are known at compile time");
+            throw ErrorHandler::compilerError(
+                loc, "AggAllOpLowering",
+                "aggAllOp codegen currently only works with matrix dimensions that are known at compile time");
         }
 
         Type matrixElementType = matrixType.getElementType();

diff --git a/src/compiler/lowering/AggDimOpLowering.cpp b/src/compiler/lowering/AggDimOpLowering.cpp
@@ -18,6 +18,8 @@
 #include <utility>
 
 #include "compiler/utils/LoweringUtils.h"
+#include <util/ErrorHandler.h>
+
 #include "ir/daphneir/Daphne.h"
 #include "ir/daphneir/Passes.h"
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
@@ -100,8 +102,9 @@ class AggDimOpLowering : public OpConversionPattern<AggOp> {
         ssize_t numCols = matrixType.getNumCols();
 
         if (numRows < 0 || numCols < 0) {
-            return rewriter.notifyMatchFailure(
-                op, "aggDimOp codegen currently only works with matrix dimensions that are known at compile time");
+            throw ErrorHandler::compilerError(
+                loc, "AggDimOpLowering",
+                "aggDimOp codegen currently only works with matrix dimensions that are known at compile time");
         }
 
         Type matrixElementType = matrixType.getElementType();
@@ -236,8 +239,9 @@ class AggDimIdxOpLowering : public OpConversionPattern<AggOp> {
         ssize_t numCols = matrixType.getNumCols();
 
         if (numRows < 0 || numCols < 0) {
-            return rewriter.notifyMatchFailure(
-                op, "aggDimOp codegen currently only works with matrix dimensions that are known at compile time");
+            throw ErrorHandler::compilerError(
+                loc, "AggDimOpLowering",
+                "aggDimOp codegen currently only works with matrix dimensions that are known at compile time");
         }
 
         Type matrixElementType = matrixType.getElementType();

diff --git a/src/compiler/lowering/CMakeLists.txt b/src/compiler/lowering/CMakeLists.txt
@@ -34,6 +34,7 @@ add_mlir_dialect_library(MLIRDaphneTransforms
     AggAllOpLowering.cpp
     AggDimOpLowering.cpp
     TransposeOpLowering.cpp
+    SparsityExploitationPass.cpp
 
     SliceRowOpLowering.cpp
     SliceColOpLowering.cpp
-Original file line number
+Diff line change
@@ Expand Up / @@ -40,6 +40,8 @@ print(utils.x); @@
         }
     ```
+    NOTE: to use a user config, the json file path needs to be passed as CLI arg to the DAPHNE binary `daphne --config=<filename>`
     NOTE: `default_dirs` can hold many paths and it will look for the **one** specified file in each, whereas any other library names have a list consisting of **one** directory,  from which **all** files will be imported (can be easily extended to multiple directories).
     Example:
@@ Expand Down @@