diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b1208e667..e267a64bf 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -47,8 +47,7 @@ jobs:
     - name: Testing
       run: |
         mkdir --parents src/api/python/tmp
-        PYTHONPATH="$PYTHONPATH:$PWD/src/" bin/run_tests
-  
+        LD_LIBRARY_PATH=$PWD/lib:$LD_LIBRARY_PATH PATH=$PWD/bin:/usr/lib/llvm-10/bin:$PATH PYTHONPATH="$PYTHONPATH:$PWD/src/:/usr/lib/llvm-10/build/utils/lit/" bin/run_tests
 
     - name: "List generated files"
       run: |
@@ -64,4 +63,4 @@ jobs:
         name: daphne
         path: |
           bin/
-          lib/
\ No newline at end of file
+          lib/
diff --git a/.gitignore b/.gitignore
index ca9e5dec4..7110d9658 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,9 @@ build_*/
 /lib
 /tmp
 
+# runtime dump
+**/*.ll
+
 # documentation build output
 doc_build/
 
@@ -25,7 +28,26 @@ __pycache__/
 .idea/
 .clion.source.upload.marker
 
+# local test/dev scripts
 tmpdaphne.daphne
+*.daphne
+*.mlir
+*.log
+
+# tags file
+tags
+tags.lock
+tags.temp
+
+# clangd cache
+.cache/
+
+# gdb
+.gdb_history
+
+# compile commands
+compile_commands.json
+
 
 # release scripts output
 /artifacts
@@ -36,3 +58,7 @@ profiler/
 precompiled-dependencies/
 /cmake*/
 /data
+
+# Allow .daphne and .mlir files in test/
+!test/**/*.mlir
+!test/**/*.daphne
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3d4940066..51c895637 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -182,4 +182,5 @@ add_subdirectory(src/util)
 
 add_dependencies(CompilerUtils MLIRDaphneTransformsIncGen)
 
+add_subdirectory(daphne-opt)
 add_subdirectory(test)
diff --git a/UserConfig.json b/UserConfig.json
index 8f8e73075..5443be282 100644
--- a/UserConfig.json
+++ b/UserConfig.json
@@ -3,6 +3,7 @@
     "use_vectorized_exec": false,
     "use_obj_ref_mgnt": true,
     "cuda_fuse_any": false,
+    "use_mlir_codegen": false,
     "vectorized_single_queue": false,
     "debug_llvm": false,
     "explain_kernels": false,
@@ -14,6 +15,7 @@
     "explain_type_adaptation": false,
     "explain_vectorized": false,
     "explain_obj_ref_mgnt": false,
+    "explain_mlir_codegen": false,
     "taskPartitioningScheme": "STATIC",
     "numberOfThreads": -1,
     "minimumTaskSize": 1,
diff --git a/containers/daphne.Dockerfile b/containers/daphne.Dockerfile
index 28b138aa4..96ac02124 100644
--- a/containers/daphne.Dockerfile
+++ b/containers/daphne.Dockerfile
@@ -64,7 +64,7 @@ LABEL "org.opencontainers.image.version"="$TIMESTAMP"
 LABEL "org.opencontainers.image.created"="${CREATION_DATE}"
 LABEL "org.opencontainers.image.revision"="${GIT_HASH}"
 RUN apt-get -qq -y update && apt-get -y upgrade && apt-get -y --no-install-recommends install  \
-    libtinfo6 libssl1.1 zlib1g python3-numpy python3-pandas \
+    libtinfo6 libssl1.1 zlib1g python3-numpy python3-pandas\
     && apt-get clean && rm -rf /var/lib/apt/lists/*
 COPY --from=daphne-build $DAPHNE_DIR/bin/* /usr/local/bin
 COPY --from=daphne-build $DAPHNE_DIR/lib/* /usr/local/lib
diff --git a/daphne-opt/CMakeLists.txt b/daphne-opt/CMakeLists.txt
new file mode 100644
index 000000000..b89da923a
--- /dev/null
+++ b/daphne-opt/CMakeLists.txt
@@ -0,0 +1,45 @@
+# Copyright 2023 The DAPHNE Consortium
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
+set(LIBS
+        ${dialect_libs}
+        ${conversion_libs}
+
+        MLIRDaphne
+        MLIRAnalysis
+        MLIRCallInterfaces
+        MLIRCastInterfaces
+        MLIRExecutionEngine
+        MLIRIR
+        # MLIRLLVMCommonConversion
+        MLIRLLVMToLLVMIRTranslation
+        # MLIRMemRefDialect
+        # MLIRLLVMDialect
+        MLIRParser
+        MLIRPass
+        MLIRSideEffectInterfaces
+        MLIRSupport
+        MLIRTargetLLVMIRExport
+        MLIRTransforms
+        MLIROptLib
+        )
+add_llvm_executable(daphne-opt daphne-opt.cpp)
+set_target_properties(daphne-opt PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
+
+llvm_update_compile_flags(daphne-opt)
+target_link_libraries(daphne-opt PRIVATE ${LIBS})
+
+mlir_check_all_link_libraries(daphne-opt)
diff --git a/daphne-opt/daphne-opt.cpp b/daphne-opt/daphne-opt.cpp
new file mode 100644
index 000000000..380f0b5cf
--- /dev/null
+++ b/daphne-opt/daphne-opt.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "daphne-opt.h"
+
+#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
+
+#include "ir/daphneir/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Linalg/Passes.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/InitAllDialects.h"
+#include "mlir/InitAllPasses.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/FileUtilities.h"
+#include "mlir/Tools/mlir-opt/MlirOptMain.h"
+
+int main(int argc, char **argv) {
+    mlir::registerAllPasses();
+    // NOTE: One can also register standalone passes here.
+    mlir::daphne::registerDaphnePasses();
+
+    mlir::DialectRegistry registry;
+    registry.insert<mlir::daphne::DaphneDialect, mlir::arith::ArithDialect,
+                    mlir::func::FuncDialect, mlir::scf::SCFDialect,
+                    mlir::LLVM::LLVMDialect, mlir::AffineDialect,
+                    mlir::memref::MemRefDialect, mlir::linalg::LinalgDialect,
+                    mlir::math::MathDialect>();
+    // Add the following to include *all* MLIR Core dialects, or selectively
+    // include what you need like above. You only need to register dialects that
+    // will be *parsed* by the tool, not the one generated
+    // registerAllDialects(registry);
+
+    return mlir::asMainReturnCode(mlir::MlirOptMain(
+        argc, argv, "Standalone DAPHNE optimizing compiler driver\n",
+        registry));
+}
diff --git a/daphne-opt/daphne-opt.h b/daphne-opt/daphne-opt.h
new file mode 100644
index 000000000..3b0f77bea
--- /dev/null
+++ b/daphne-opt/daphne-opt.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DAPHNEOPT_DAPHNEOP_H
+#define DAPHNEOPT_DAPHNEOP_H
+
+#include "mlir/IR/Dialect.h"
+
+#include "ir/daphneir/Daphne.h"
+
+#endif // DAPHNEOPT_DAPHNEOP_H
diff --git a/doc/Codegen.md b/doc/Codegen.md
new file mode 100644
index 000000000..8d690c1bc
--- /dev/null
+++ b/doc/Codegen.md
@@ -0,0 +1,100 @@
+# Code Generation with MLIR
+
+This document describes the process of directly generating code with the MLIR
+framework.
+
+## Motivation
+
+DAPHNE provides a kernel for (almost) every DaphneIR operation which reside in
+`src/runtime/local/kernels/`. These are precompiled as a shared library and
+linked during compile-time. Even though these kernels can be highly optimized
+and thus achieve great runtime characteristics, they may not provide a desired
+level of extensibility for custom value types. They may also be lacking
+information only available at compile-time that could enable further
+optimizations. Additionally, through the process of progressively lowering the
+input IR, the code generation pipeline may enable more optimization
+possibilities such as operator or loop fusion.
+
+
+As an alternative way to implement our operators we provide the code generation
+pipeline which progressively lowers the DaphneIR available after parsing the
+DaphneDSL script to operations in either the same dialect or operations from
+other dialects. With that, we can optionally replace certain kernels by
+generating code directly, and also perform a hybrid compilation approach where
+we mix kernel calls with code generation in order to exploit advantages of
+both, precompiled kernel libraries and code generation. Code generation passes
+are found in `src/compiler/lowering/`.
+
+
+## Guidelines
+
+Currently, the code generation pipeline is enabled with the CLI flag
+`--mlir-codegen`. This adds the following passes that perform transformations and
+lowerings:
+
+- [DenseMatrixOptPass](src/compiler/lowering/DaphneOptPass.cpp)
+- [MatMulOpLoweringPass](src/compiler/lowering/MatMulOpLowering.cpp)
+- [AggAllLoweringPass](src/compiler/lowering/AggAllOpLowering.cpp)
+- [MapOpLoweringPass](src/compiler/lowering/MapOpLowering.cpp)
+- InlinerPass
+- [LowerEwOpPass](src/compiler/lowering/EwOpsLowering.cpp)
+- ConvertMathToLLVMPass
+- [ModOpLoweringPass](src/compiler/lowering/ModOpLowering.cpp)
+- Canonicalizer
+- CSE
+- LoopFusion
+- AffineScalarReplacement
+- LowerAffinePass
+
+These passes are added in the `DaphneIrExecutor::buildCodegenPipeline`
+function. The `--mlir-hybrid-codegen` flag disables the `MatMulOpLoweringPass` since the
+kernel implementation vastly outperforms the generated code of this pass.
+
+
+#### Runtime Interoperability
+
+Runtime interoperability with the `DenseMatrix` object is achieved with two
+kernels in `src/runtime/local/kernels/ConvertDenseMatrixToMemRef.h` and
+`src/runtime/local/kernels/ConvertMemRefToDenseMatrix.h` and the corresponding
+DaphneOps `Daphne_ConvertMemRefToDenseMatrix` and
+`Daphne_ConvertDenseMatrixToMemRef`. These kernels define how a MemRef is
+passed to a kernel and how a kernel can return a `StridedMemRefType`.
+
+
+#### Debugging
+
+In order to enable our debug `PrintIRPass` pass, one has to add `--explain
+mlir_code_gen` when running `daphne`. Additionally, it is recommended to use the
+`daphne-opt` tool to test passes in isolation. One just has to provide the
+input IR for a pass to `daphne-opt` and the correct flag to run the pass (or
+multiple passes) on the IR. `daphne-opt` provides all the functionality of the
+`mlir-opt` tool.
+
+`daphne-opt --lower-ew --debug-only=dialect-conversion ew.mlir` performs the
+`LowerEwOpPass` on the input file `ew.mlir` while providing dialect conversion
+debug information.
+
+
+
+#### Testing
+
+To test the generated code, there currently are two different approaches.
+
+End-to-end tests can be found under `test/api/cli/codegen/` and are part of the
+existing Catch2 test-suite with the its own tag, `TAG_CODEGEN`.
+
+Additionally, there are tests that check the generated IR by running the
+`llvm-lit`, `daphne-opt`, and `FileCheck` utilities. These tests reside under
+`test/compiler/lowering/`. They are `.mlir` files containing the input IR of a
+certain pass, or pass pipeline, and the `llvm-lit` directive at the top of the
+file (`RUN:`). In that line we specify how `llvm-lit` executes the test, e.g.,
+`// RUN: daphne-opt --lower-ew %s | FileCheck %s`, means that `daphne-opt` is
+called with the `--lower-ew` flag and the current file as input, the output of
+that, in addition to the file itself, is piped to `FileCheck`. `FileCheck` uses
+the comments in the `.mlir` file to check for certain conditions, e.g., `//
+CHECK-NOT: daphne.ewAdd` looks through the IR and fails if `daphne.ewAdd` can be
+found. These `llvm-lit` tests are all run by the `codegen` testcase in
+`test/codegen/Codegen.cpp`.
+
+
+All codegen tests can be executed by running `bin/run_tests '[codegen]'`.
diff --git a/doc/GettingStarted.md b/doc/GettingStarted.md
index 98d02b0f0..20d072d05 100644
--- a/doc/GettingStarted.md
+++ b/doc/GettingStarted.md
@@ -42,29 +42,30 @@ launching DAPHNE via Docker (see below) should work the same way as in a native
 
 ### Software
 
-| tool/lib                             | version known to work (*) | comment                                                                                                                                 |
-|--------------------------------------|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------|
-| GCC/G++                              | 9.3.0                     | Last checked version: 12.2                                                                                                              |
-| clang                                | 10.0.0                    |                                                                                                                                         |
-| cmake                                | 3.20                      | On Ubuntu 20.04, install by `sudo snap install cmake --classic` to fulfill the version requirement; `apt` provides only version 3.16.3. |
-| git                                  | 2.25.1                    |                                                                                                                                         |
-| libssl-dev                           | 1.1.1                     | Dependency introduced while optimizing grpc build (which used to build ssl unnecessarily)                                               |
-| libpfm4-dev                          | 4.10                      | This dependency is needed for profiling support [DAPHNE-#479]                                                                           |
-| lld                                  | 10.0.0                    |                                                                                                                                         |
-| ninja                                | 1.10.0                    |                                                                                                                                         |
-| pkg-config                           | 0.29.1                    |                                                                                                                                         |
-| python3                              | 3.8.5                     |                                                                                                                                         |
-| numpy                                | 1.19.5                    |                                                                                                                                         |
-| pandas                               | 0.25.3                    |                                                                                                                                         |
-| java (e.g. openjdk)                  | 11 (1.7 should be fine)   |                                                                                                                                         |
-| gfortran                             | 9.3.0                     |                                                                                                                                         |
-| uuid-dev                             |                           |                                                                                                                                         |
-| wget                                 |                           | Used to fetch additional dependencies and other artefacts                                                                               |
-| jq                                   |                           | json commandline processor used in docker image generation scripts                                                                      |
-| ***                                  | ***                       | ***                                                                                                                                     |
-| CUDA SDK                             | 11.7.1                    | Optional for CUDA ops                                                                                                                   |
-| OneAPI SDK                           | 2022.x                    | Optional for OneAPI ops                                                                                                                 |
-| Intel FPGA SDK or OneAPI FPGA Add-On | 2022.x                    | Optional for FPGAOPENCL ops                                                                                                             |
+| tool/lib                             | version known to work (*)    | comment                                                                                                                                 |
+|--------------------------------------|------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------|
+| GCC/G++                              | 9.3.0                        | Last checked version: 12.2                                                                                                              |
+| clang                                | 10.0.0                       |                                                                                                                                         |
+| cmake                                | 3.20                         | On Ubuntu 20.04, install by `sudo snap install cmake --classic` to fulfill the version requirement; `apt` provides only version 3.16.3. |
+| git                                  | 2.25.1                       |                                                                                                                                         |
+| libssl-dev                           | 1.1.1                        | Dependency introduced while optimizing grpc build (which used to build ssl unnecessarily)                                               |
+| libpfm4-dev                          | 4.10                         | This dependency is needed for profiling support [DAPHNE-#479]                                                                           |
+| lld                                  | 10.0.0                       |                                                                                                                                         |
+| ninja                                | 1.10.0                       |                                                                                                                                         |
+| pkg-config                           | 0.29.1                       |                                                                                                                                         |
+| python3                              | 3.8.5                        |                                                                                                                                         |
+| numpy                                | 1.19.5                       |                                                                                                                                         |
+| pandas                               | 0.25.3                       |                                                                                                                                         |
+| java (e.g. openjdk)                  | 11 (1.7 should be fine)      |                                                                                                                                         |
+| gfortran                             | 9.3.0                        |                                                                                                                                         |
+| uuid-dev                             |                              |                                                                                                                                         |
+| llvm-10-tools                        | 10, 15                       | On Ubuntu 22.04 you may need to install a newer `llvm-*-tools` version, such as `llvm-15-tools`.                                        |
+| wget                                 |                              | Used to fetch additional dependencies and other artefacts                                                                               |
+| jq                                   |                              | json commandline processor used in docker image generation scripts                                                                      |
+| ***                                  | ***                          | ***                                                                                                                                     |
+| CUDA SDK                             | 11.7.1                       | Optional for CUDA ops                                                                                                                   |
+| OneAPI SDK                           | 2022.x                       | Optional for OneAPI ops                                                                                                                 |
+| Intel FPGA SDK or OneAPI FPGA Add-On | 2022.x                       | Optional for FPGAOPENCL ops                                                                                                             |
 
 ### Hardware
 
diff --git a/install-ubuntu-packages.sh b/install-ubuntu-packages.sh
index 8281493d9..6644005db 100644
--- a/install-ubuntu-packages.sh
+++ b/install-ubuntu-packages.sh
@@ -15,5 +15,6 @@
 # limitations under the License.
 
 # This is a convenience script to install the required packages on Ubuntu 20+ systems to compile DAPHNE
+# On Ubuntu 22.04 you may need to change the version of llvm-10-tools to a newer one, such as llvm-15-tools.
 sudo apt install build-essential clang cmake git libssl-dev libpfm4-dev lld ninja-build pkg-config python3-numpy \
- python3-pandas default-jdk-headless gfortran uuid-dev wget unzip jq
+ python3-pandas default-jdk-headless gfortran uuid-dev wget unzip jq llvm-10-tools
diff --git a/src/api/cli/DaphneUserConfig.h b/src/api/cli/DaphneUserConfig.h
index 92a5e6b23..3b7a2de93 100644
--- a/src/api/cli/DaphneUserConfig.h
+++ b/src/api/cli/DaphneUserConfig.h
@@ -42,6 +42,8 @@ struct DaphneUserConfig {
     bool use_obj_ref_mgnt = true;
     bool use_ipa_const_propa = true;
     bool use_phy_op_selection = true;
+    bool use_mlir_codegen = false;
+    bool use_mlir_hybrid_codegen = false;
     bool cuda_fuse_any = false;
     bool vectorized_single_queue = false;
     bool prePartitionRows = false;
@@ -63,6 +65,8 @@ struct DaphneUserConfig {
     bool explain_type_adaptation = false;
     bool explain_vectorized = false;
     bool explain_obj_ref_mgnt = false;
+    bool explain_mlir_codegen = false;
+
     SelfSchedulingScheme taskPartitioningScheme = STATIC;
     QueueTypeOption queueSetupScheme = CENTRALIZED;
 	VictimSelectionLogic victimSelection = SEQPRI;
diff --git a/src/api/internal/daphne_internal.cpp b/src/api/internal/daphne_internal.cpp
index 138012c79..5ba81007c 100644
--- a/src/api/internal/daphne_internal.cpp
+++ b/src/api/internal/daphne_internal.cpp
@@ -256,6 +256,14 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
             "libdir", cat(daphneOptions),
             desc("The directory containing kernel libraries")
     );
+    static opt<bool> mlirCodegen(
+        "mlir-codegen", cat(daphneOptions),
+        desc("Enables lowering of certain DaphneIR operations on DenseMatrix to low-level MLIR operations.")
+    );
+    static opt<bool> performHybridCodegen(
+        "mlir-hybrid-codegen", cat(daphneOptions),
+        desc("Enables prototypical hybrid code generation combining pre-compiled kernels and MLIR code generation.")
+    );
 
     enum ExplainArgs {
       kernels,
@@ -268,7 +276,8 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
       phy_op_selection,
       type_adaptation,
       vectorized,
-      obj_ref_mgnt
+      obj_ref_mgnt,
+      mlir_codegen
     };
 
     static llvm::cl::list<ExplainArgs> explainArgList(
@@ -286,7 +295,8 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
             clEnumVal(vectorized, "Show DaphneIR after vectorization"),
             clEnumVal(obj_ref_mgnt, "Show DaphneIR after managing object references"),
             clEnumVal(kernels, "Show DaphneIR after kernel lowering"),
-            clEnumVal(llvm, "Show DaphneIR after llvm lowering")),
+            clEnumVal(llvm, "Show DaphneIR after llvm lowering"),
+            clEnumVal(mlir_codegen, "Show DaphneIR after MLIR codegen")),
         CommaSeparated);
 
     static llvm::cl::list<string> scriptArgs1(
@@ -367,6 +377,9 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
     user_config.use_obj_ref_mgnt = !noObjRefMgnt;
     user_config.use_ipa_const_propa = !noIPAConstPropa;
     user_config.use_phy_op_selection = !noPhyOpSelection;
+    user_config.use_mlir_codegen = mlirCodegen;
+    user_config.use_mlir_hybrid_codegen = performHybridCodegen;
+
     if(!libDir.getValue().empty())
         user_config.libdir = libDir.getValue();
     user_config.library_paths.push_back(user_config.libdir + "/libAllKernels.so");
@@ -428,6 +441,9 @@ int startDAPHNE(int argc, const char** argv, DaphneLibResult* daphneLibRes, int
             case obj_ref_mgnt:
                 user_config.explain_obj_ref_mgnt = true;
                 break;
+            case mlir_codegen:
+                user_config.explain_mlir_codegen = true;
+                break;
         }
     }
 
diff --git a/src/compiler/execution/DaphneIrExecutor.cpp b/src/compiler/execution/DaphneIrExecutor.cpp
index 2376ad20b..1c5ab19f5 100644
--- a/src/compiler/execution/DaphneIrExecutor.cpp
+++ b/src/compiler/execution/DaphneIrExecutor.cpp
@@ -14,234 +14,310 @@
  *  limitations under the License.
  */
 
+#include "DaphneIrExecutor.h"
+
 #include <ir/daphneir/Daphne.h>
 #include <ir/daphneir/Passes.h>
-#include "DaphneIrExecutor.h"
+#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
+#include <mlir/Dialect/LLVMIR/Transforms/Passes.h>
+
+#include <filesystem>
+#include <memory>
+#include <utility>
 
 #include "llvm/Support/TargetSelect.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
+#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
 #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
 #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Linalg/Passes.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/ExecutionEngine/ExecutionEngine.h"
 #include "mlir/ExecutionEngine/OptUtils.h"
 #include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Transforms/Passes.h"
-#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
-#include <mlir/Dialect/LLVMIR/Transforms/Passes.h>
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Transforms/Passes.h"
 
-#include <filesystem>
-#include <memory>
-#include <utility>
-
-DaphneIrExecutor::DaphneIrExecutor(bool selectMatrixRepresentations, DaphneUserConfig cfg) : userConfig_(std::move(cfg)),
-        selectMatrixRepresentations_(selectMatrixRepresentations) {
+DaphneIrExecutor::DaphneIrExecutor(bool selectMatrixRepresentations,
+                                   DaphneUserConfig cfg)
+    : userConfig_(std::move(cfg)),
+      selectMatrixRepresentations_(selectMatrixRepresentations) {
     // register loggers
-    if(userConfig_.log_ptr != nullptr)
-        userConfig_.log_ptr->registerLoggers();
+    if (userConfig_.log_ptr != nullptr) userConfig_.log_ptr->registerLoggers();
 
     context_.getOrLoadDialect<mlir::daphne::DaphneDialect>();
     context_.getOrLoadDialect<mlir::arith::ArithDialect>();
     context_.getOrLoadDialect<mlir::func::FuncDialect>();
     context_.getOrLoadDialect<mlir::scf::SCFDialect>();
     context_.getOrLoadDialect<mlir::LLVM::LLVMDialect>();
+    context_.getOrLoadDialect<mlir::AffineDialect>();
+    context_.getOrLoadDialect<mlir::memref::MemRefDialect>();
+    context_.getOrLoadDialect<mlir::linalg::LinalgDialect>();
+    context_.getOrLoadDialect<mlir::math::MathDialect>();
 
     llvm::InitializeNativeTarget();
     llvm::InitializeNativeTargetAsmPrinter();
 }
 
-bool DaphneIrExecutor::runPasses(mlir::ModuleOp module)
-{
-    // FIXME: operations in `template` functions (functions with unknown inputs) can't be verified
+bool DaphneIrExecutor::runPasses(mlir::ModuleOp module) {
+    // FIXME: operations in `template` functions (functions with unknown inputs)
+    // can't be verified
     //  as their type constraints are not met.
-    //if (failed(mlir::verify(module))) {
-        //module->emitError("failed to verify the module right after parsing");
-        //return false;
+    // if (failed(mlir::verify(module))) {
+    // module->emitError("failed to verify the module right after parsing");
+    // return false;
     //}
 
-    if (module) {
-        // This flag is really useful to figure out why the lowering failed
-        llvm::DebugFlag = userConfig_.debug_llvm;
-        {
-            mlir::PassManager pm(&context_);
-            // TODO Enable the verifier for all passes where it is possible.
-            // Originally, it was only turned off for the SpecializeGenericFunctionsPass.
-            pm.enableVerifier(false);
-
-            if(userConfig_.explain_parsing)
-                pm.addPass(mlir::daphne::createPrintIRPass("IR after parsing:"));
-
-            pm.addPass(mlir::createCanonicalizerPass());
-            pm.addPass(mlir::createCSEPass());
-            if(userConfig_.explain_parsing_simplified)
-                pm.addPass(mlir::daphne::createPrintIRPass("IR after parsing and some simplifications:"));
-
-            pm.addPass(mlir::daphne::createRewriteSqlOpPass()); // calls SQL Parser
-            if(userConfig_.explain_sql)
-                pm.addPass(mlir::daphne::createPrintIRPass("IR after SQL parsing:"));
-
-            pm.addPass(mlir::daphne::createSpecializeGenericFunctionsPass(userConfig_));
-            if(userConfig_.explain_property_inference)
-                pm.addPass(mlir::daphne::createPrintIRPass("IR after inference:"));
-
-            if(failed(pm.run(module))) {
-                module->dump();
-                module->emitError("module pass error");
-                return false;
-            }
-        }
+    if (!module) return false;
+
+    // This flag is really useful to figure out why the lowering failed
+    llvm::DebugFlag = userConfig_.debug_llvm;
+    {
         mlir::PassManager pm(&context_);
+        // TODO Enable the verifier for all passes where it is possible.
+        // Originally, it was only turned off for the
+        // SpecializeGenericFunctionsPass.
+        pm.enableVerifier(false);
+
+        if (userConfig_.explain_parsing)
+            pm.addPass(mlir::daphne::createPrintIRPass("IR after parsing:"));
 
-        // Note that property inference and canonicalization have already been done
-        // in the SpecializeGenericFunctionsPass, so actually, it's not necessary
-        // here anymore.
-        // TODO There is a cyclic dependency between (shape) inference and
-        // constant folding (included in canonicalization), at the moment we
-        // run only three iterations of both passes (see #173).
-        pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createInferencePass());
         pm.addPass(mlir::createCanonicalizerPass());
+        pm.addPass(mlir::createCSEPass());
+        if (userConfig_.explain_parsing_simplified)
+            pm.addPass(mlir::daphne::createPrintIRPass(
+                "IR after parsing and some simplifications:"));
 
-        if(selectMatrixRepresentations_)
-            pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createSelectMatrixRepresentationsPass());
-        if(userConfig_.explain_select_matrix_repr)
-            pm.addPass(mlir::daphne::createPrintIRPass("IR after selecting matrix representations:"));
+        pm.addPass(mlir::daphne::createRewriteSqlOpPass());  // calls SQL Parser
+        if (userConfig_.explain_sql)
+            pm.addPass(
+                mlir::daphne::createPrintIRPass("IR after SQL parsing:"));
 
-        if(userConfig_.use_phy_op_selection) {
-            pm.addPass(mlir::daphne::createPhyOperatorSelectionPass());
-            pm.addPass(mlir::createCSEPass());
+        pm.addPass(
+            mlir::daphne::createSpecializeGenericFunctionsPass(userConfig_));
+        if (userConfig_.explain_property_inference)
+            pm.addPass(mlir::daphne::createPrintIRPass("IR after inference:"));
+
+        if (failed(pm.run(module))) {
+            module->dump();
+            module->emitError("module pass error");
+            return false;
         }
-        if(userConfig_.explain_phy_op_selection)
-            pm.addPass(mlir::daphne::createPrintIRPass("IR after selecting physical operators:"));
+    }
 
-        pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createAdaptTypesToKernelsPass());
-        if(userConfig_.explain_type_adaptation)
-            pm.addPass(mlir::daphne::createPrintIRPass("IR after type adaptation:"));
+    mlir::PassManager pm(&context_);
+    // Note that property inference and canonicalization have already been done
+    // in the SpecializeGenericFunctionsPass, so actually, it's not necessary
+    // here anymore.
+
+    // TODO There is a cyclic dependency between (shape) inference and
+    // constant folding (included in canonicalization), at the moment we
+    // run only three iterations of both passes (see #173).
+    pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createInferencePass());
+    pm.addPass(mlir::createCanonicalizerPass());
+
+    if (selectMatrixRepresentations_)
+        pm.addNestedPass<mlir::func::FuncOp>(
+            mlir::daphne::createSelectMatrixRepresentationsPass());
+    if (userConfig_.explain_select_matrix_repr)
+        pm.addPass(mlir::daphne::createPrintIRPass(
+            "IR after selecting matrix representations:"));
+
+    if (userConfig_.use_phy_op_selection) {
+        pm.addPass(mlir::daphne::createPhyOperatorSelectionPass());
+        pm.addPass(mlir::createCSEPass());
+    }
+    if (userConfig_.explain_phy_op_selection)
+        pm.addPass(mlir::daphne::createPrintIRPass(
+            "IR after selecting physical operators:"));
+
+    pm.addNestedPass<mlir::func::FuncOp>(
+        mlir::daphne::createAdaptTypesToKernelsPass());
+    if (userConfig_.explain_type_adaptation)
+        pm.addPass(
+            mlir::daphne::createPrintIRPass("IR after type adaptation:"));
 
 #if 0
-        if (userConfig_.use_distributed) {
-            pm.addPass(mlir::daphne::createDistributeComputationsPass());
-            //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution:"));
-            pm.addPass(mlir::createCSEPass());
-            //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - CSE:"));
-            pm.addPass(mlir::createCanonicalizerPass());
-            //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - canonicalization:"));
-            pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createWhileLoopInvariantCodeMotionPass());
-            //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - WhileLICM:"));
-        }
+    if (userConfig_.use_distributed) {
+        pm.addPass(mlir::daphne::createDistributeComputationsPass());
+        //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution"));
+        pm.addPass(mlir::createCSEPass());
+        //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - CSE"));
+        pm.addPass(mlir::createCanonicalizerPass());
+        //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - canonicalization"));
+        pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createWhileLoopInvariantCodeMotionPass());
+        //pm.addPass(mlir::daphne::createPrintIRPass("IR after distribution - WhileLICM"));
+    }
 #endif
-        
-        // For now, in order to use the distributed runtime we also require the vectorized engine to be enabled
-        // to create pipelines. Therefore, *if* distributed runtime is enabled, we need to make a vectorization pass.
-        if(userConfig_.use_vectorized_exec || userConfig_.use_distributed) {
-            // TODO: add inference here if we have rewrites that could apply to vectorized pipelines due to smaller sizes
-            pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createVectorizeComputationsPass());
-            pm.addPass(mlir::createCanonicalizerPass());
-        }
-        if(userConfig_.explain_vectorized)
-            pm.addPass(mlir::daphne::createPrintIRPass("IR after vectorization:"));
-        
-        if (userConfig_.use_distributed)
-            pm.addPass(mlir::daphne::createDistributePipelinesPass());
 
-        if (userConfig_.enable_profiling)
-            pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createProfilingPass());
+    // For now, in order to use the distributed runtime we also require the
+    // vectorized engine to be enabled to create pipelines. Therefore, *if*
+    // distributed runtime is enabled, we need to make a vectorization pass.
+    if (userConfig_.use_vectorized_exec || userConfig_.use_distributed) {
+        // TODO: add inference here if we have rewrites that could apply to
+        // vectorized pipelines due to smaller sizes
+        pm.addNestedPass<mlir::func::FuncOp>(
+            mlir::daphne::createVectorizeComputationsPass());
+        pm.addPass(mlir::createCanonicalizerPass());
+    }
+    if (userConfig_.explain_vectorized)
+        pm.addPass(mlir::daphne::createPrintIRPass("IR after vectorization:"));
 
-        pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createInsertDaphneContextPass(userConfig_));
+    if (userConfig_.use_distributed)
+        pm.addPass(mlir::daphne::createDistributePipelinesPass());
+
+    if (userConfig_.use_mlir_codegen || userConfig_.use_mlir_hybrid_codegen) buildCodegenPipeline(pm);
+
+    if (userConfig_.enable_profiling)
+        pm.addNestedPass<mlir::func::FuncOp>(
+            mlir::daphne::createProfilingPass());
+
+    pm.addNestedPass<mlir::func::FuncOp>(
+        mlir::daphne::createInsertDaphneContextPass(userConfig_));
 
 #ifdef USE_CUDA
-        if(userConfig_.use_cuda)
-            pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createMarkCUDAOpsPass(userConfig_));
+    if (userConfig_.use_cuda)
+        pm.addNestedPass<mlir::func::FuncOp>(
+            mlir::daphne::createMarkCUDAOpsPass(userConfig_));
 #endif
 
 #ifdef USE_FPGAOPENCL
-        if(userConfig_.use_fpgaopencl)
-            pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createMarkFPGAOPENCLOpsPass(userConfig_));
+    if (userConfig_.use_fpgaopencl)
+        pm.addNestedPass<mlir::func::FuncOp>(
+            mlir::daphne::createMarkFPGAOPENCLOpsPass(userConfig_));
 #endif
 
-        // Tidy up the IR before managing object reference counters with IncRefOp and DecRefOp.
-        // This is important, because otherwise, an SSA value whose references are managed could
-        // be cleared away by common subexpression elimination (CSE), while retaining its
-        // IncRefOps/DecRefOps, which could lead to double frees etc.
-        pm.addPass(mlir::createCanonicalizerPass());
-        pm.addPass(mlir::createCSEPass());
+    // Tidy up the IR before managing object reference counters with IncRefOp
+    // and DecRefOp. This is important, because otherwise, an SSA value whose
+    // references are managed could be cleared away by common subexpression
+    // elimination (CSE), while retaining its IncRefOps/DecRefOps, which could
+    // lead to double frees etc.
+    pm.addPass(mlir::createCanonicalizerPass());
+    pm.addPass(mlir::createCSEPass());
 
-        if(userConfig_.use_obj_ref_mgnt)
-            pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createManageObjRefsPass());
-        if(userConfig_.explain_obj_ref_mgnt)
-            pm.addPass(mlir::daphne::createPrintIRPass("IR after managing object references:"));
+    if (userConfig_.use_obj_ref_mgnt)
+        pm.addNestedPass<mlir::func::FuncOp>(
+            mlir::daphne::createManageObjRefsPass());
+    if (userConfig_.explain_obj_ref_mgnt)
+        pm.addPass(mlir::daphne::createPrintIRPass(
+            "IR after managing object references:"));
 
-        pm.addNestedPass<mlir::func::FuncOp>(mlir::daphne::createRewriteToCallKernelOpPass());
-        if(userConfig_.explain_kernels)
-            pm.addPass(mlir::daphne::createPrintIRPass("IR after kernel lowering:"));
+    pm.addNestedPass<mlir::func::FuncOp>(
+        mlir::daphne::createRewriteToCallKernelOpPass());
+    if (userConfig_.explain_kernels)
+        pm.addPass(
+            mlir::daphne::createPrintIRPass("IR after kernel lowering:"));
 
-        pm.addPass(mlir::createConvertSCFToCFPass());
-        pm.addNestedPass<mlir::func::FuncOp>(mlir::LLVM::createRequestCWrappersPass());
-        pm.addPass(mlir::daphne::createLowerToLLVMPass(userConfig_));
-        pm.addPass(mlir::createReconcileUnrealizedCastsPass());
-        if(userConfig_.explain_llvm)
-            pm.addPass(mlir::daphne::createPrintIRPass("IR after llvm lowering:"));
+    pm.addPass(mlir::createConvertSCFToCFPass());
+    pm.addNestedPass<mlir::func::FuncOp>(
+        mlir::LLVM::createRequestCWrappersPass());
+    pm.addPass(mlir::daphne::createLowerToLLVMPass(userConfig_));
+    pm.addPass(mlir::createReconcileUnrealizedCastsPass());
+    if (userConfig_.explain_llvm)
+        pm.addPass(mlir::daphne::createPrintIRPass("IR after llvm lowering:"));
 
-        if (failed(pm.run(module))) {
-            module->dump();
-            module->emitError("module pass error");
-            return false;
-        }
-        return true;
+    if (failed(pm.run(module))) {
+        module->dump();
+        module->emitError("module pass error");
+        return false;
     }
-    return false;
+
+    return true;
 }
 
-std::unique_ptr<mlir::ExecutionEngine> DaphneIrExecutor::createExecutionEngine(mlir::ModuleOp module)
-{
-    if (module) {
-        // An optimization pipeline to use within the execution engine.
-        auto optPipeline = mlir::makeOptimizingTransformer(0, 0, nullptr);
-        std::vector<llvm::StringRef> sharedLibRefs;
-        // This next line adds to our Linux platform lock-in
-        std::string daphne_executable_dir(std::filesystem::canonical("/proc/self/exe").parent_path());
-        if(userConfig_.libdir.empty()) {
-            sharedLibRefPaths.push_back(std::string(daphne_executable_dir + "/../lib/libAllKernels.so"));
-            sharedLibRefs.emplace_back(sharedLibRefPaths.back());
-        }
-        else {
-            sharedLibRefs.insert(sharedLibRefs.end(), userConfig_.library_paths.begin(), userConfig_.library_paths.end());
-        }
+std::unique_ptr<mlir::ExecutionEngine> DaphneIrExecutor::createExecutionEngine(
+    mlir::ModuleOp module) {
+    if (!module) return nullptr;
+    // An optimization pipeline to use within the execution engine.
+    unsigned optLevel = 0;
+    unsigned sizeLevel = 0;
+    llvm::TargetMachine *targetMachine = nullptr;
+    auto optPipeline = mlir::makeOptimizingTransformer(optLevel, sizeLevel, targetMachine);
+    std::vector<llvm::StringRef> sharedLibRefs;
+    // This next line adds to our Linux platform lock-in
+    std::string daphne_executable_dir(
+        std::filesystem::canonical("/proc/self/exe").parent_path());
+    if (userConfig_.libdir.empty()) {
+        sharedLibRefPaths.push_back(
+            std::string(daphne_executable_dir + "/../lib/libAllKernels.so"));
+        sharedLibRefs.emplace_back(sharedLibRefPaths.back());
+    } else {
+        sharedLibRefs.insert(sharedLibRefs.end(),
+                             userConfig_.library_paths.begin(),
+                             userConfig_.library_paths.end());
+    }
 
 #ifdef USE_CUDA
-        if(userConfig_.use_cuda) {
-            sharedLibRefPaths.push_back(std::string(daphne_executable_dir + "/../lib/libCUDAKernels.so"));
-            sharedLibRefs.emplace_back(sharedLibRefPaths.back());
-        }
+    if (userConfig_.use_cuda) {
+        sharedLibRefPaths.push_back(
+            std::string(daphne_executable_dir + "/../lib/libCUDAKernels.so"));
+        sharedLibRefs.emplace_back(sharedLibRefPaths.back());
+    }
 #endif
- 
+
 #ifdef USE_FPGAOPENCL
-        if(userConfig_.use_fpgaopencl) {
-            sharedLibRefPaths.push_back(std::string(daphne_executable_dir + "/../lib/libFPGAOPENCLKernels.so"));
-            sharedLibRefs.emplace_back(sharedLibRefPaths.back());
-        }
+    if (userConfig_.use_fpgaopencl) {
+        sharedLibRefPaths.push_back(std::string(
+            daphne_executable_dir + "/../lib/libFPGAOPENCLKernels.so"));
+        sharedLibRefs.emplace_back(sharedLibRefPaths.back());
+    }
 #endif
-        registerLLVMDialectTranslation(context_);
-        // module.dump();
-        mlir::ExecutionEngineOptions options;
-        options.llvmModuleBuilder = nullptr;
-        options.transformer = optPipeline;
-        options.jitCodeGenOptLevel = llvm::CodeGenOpt::Level::Default;
-        options.sharedLibPaths = llvm::ArrayRef<llvm::StringRef>(sharedLibRefs);
-        options.enableObjectDump = true;
-        options.enableGDBNotificationListener = true;
-        options.enablePerfNotificationListener = true;
-        auto maybeEngine = mlir::ExecutionEngine::create(module, options);
-
-        if (!maybeEngine) {
-            llvm::errs() << "Failed to create JIT-Execution engine: "
-                         << maybeEngine.takeError();
-            return nullptr;
-        }
-        return std::move(maybeEngine.get());
+    registerLLVMDialectTranslation(context_);
+    // module.dump();
+    mlir::ExecutionEngineOptions options;
+    options.llvmModuleBuilder = nullptr;
+    options.transformer = optPipeline;
+    options.jitCodeGenOptLevel = llvm::CodeGenOpt::Level::Default;
+    options.sharedLibPaths = llvm::ArrayRef<llvm::StringRef>(sharedLibRefs);
+    options.enableObjectDump = true;
+    options.enableGDBNotificationListener = true;
+    options.enablePerfNotificationListener = true;
+    auto maybeEngine = mlir::ExecutionEngine::create(module, options);
+
+    if (!maybeEngine) {
+        llvm::errs() << "Failed to create JIT-Execution engine: "
+                     << maybeEngine.takeError();
+        return nullptr;
     }
-    return nullptr;
+    return std::move(maybeEngine.get());
+}
+
+void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) {
+    if (userConfig_.explain_mlir_codegen)
+        pm.addPass(
+            mlir::daphne::createPrintIRPass("IR before codegen pipeline"));
+
+    pm.addPass(mlir::daphne::createDaphneOptPass());
+
+    if (!userConfig_.use_mlir_hybrid_codegen) {
+        pm.addPass(mlir::daphne::createMatMulOpLoweringPass());
+    }
+
+    pm.addPass(mlir::daphne::createAggAllOpLoweringPass());
+    pm.addPass(mlir::daphne::createMapOpLoweringPass());
+    pm.addPass(mlir::createInlinerPass());
+
+    pm.addPass(mlir::daphne::createEwOpLoweringPass());
+    pm.addPass(mlir::createConvertMathToLLVMPass());
+    pm.addPass(mlir::daphne::createModOpLoweringPass());
+    pm.addPass(mlir::createCanonicalizerPass());
+    pm.addPass(mlir::createCSEPass());
+    pm.addNestedPass<mlir::func::FuncOp>(mlir::createLoopFusionPass());
+    pm.addNestedPass<mlir::func::FuncOp>(
+        mlir::createAffineScalarReplacementPass());
+    pm.addPass(mlir::createLowerAffinePass());
+
+    if (userConfig_.explain_mlir_codegen)
+        pm.addPass(
+            mlir::daphne::createPrintIRPass("IR after codegen pipeline"));
 }
diff --git a/src/compiler/execution/DaphneIrExecutor.h b/src/compiler/execution/DaphneIrExecutor.h
index 05d32d7b1..ef1c32d13 100644
--- a/src/compiler/execution/DaphneIrExecutor.h
+++ b/src/compiler/execution/DaphneIrExecutor.h
@@ -19,6 +19,7 @@
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/ExecutionEngine/ExecutionEngine.h"
 #include <api/cli/DaphneUserConfig.h>
+#include "mlir/Pass/PassManager.h"
 
 class DaphneIrExecutor
 {
@@ -36,5 +37,7 @@ class DaphneIrExecutor
     bool selectMatrixRepresentations_;
     // Storage for lib paths needed for StringRefs
     std::vector<std::string> sharedLibRefPaths;
+
+    void buildCodegenPipeline(mlir::PassManager &);
 };
 
diff --git a/src/compiler/explanation/PrintIRPass.cpp b/src/compiler/explanation/PrintIRPass.cpp
index 6dabf88d6..3adf1bf5b 100644
--- a/src/compiler/explanation/PrintIRPass.cpp
+++ b/src/compiler/explanation/PrintIRPass.cpp
@@ -17,34 +17,39 @@
 #include <ir/daphneir/Daphne.h>
 #include <ir/daphneir/Passes.h>
 
-#include <string>
 #include <iostream>
+#include <string>
 
 using namespace mlir;
 
 /**
  * @brief A compiler pass that simply prints the IR.
- * 
+ *
  * Useful for manual testing and debugging, since this pass can easily be
  * integrated after any other pass to have a look at the IR.
  */
 class PrintIRPass : public PassWrapper<PrintIRPass, OperationPass<ModuleOp>> {
-    
     std::string message;
-    
-public:
-    PrintIRPass(const std::string message) : message(message) {
-        //
-    }
-    
+
+   public:
+    PrintIRPass(const std::string message) : message(message) {}
+
     void runOnOperation() final;
+
+    StringRef getArgument() const final { return "print-ir"; }
+    StringRef getDescription() const final {
+        return "Pass for debugging purposes, prints the IR at the current "
+               "stage in the compilation pipeline.";
+    }
 };
 
 void PrintIRPass::runOnOperation() {
     std::cerr << message << std::endl;
-    
+
     auto module = getOperation();
-    module.dump();
+    OpPrintingFlags flags = {};
+    flags.enableDebugInfo(/*enable=*/false, /*prettyForm=*/false);
+    module.print(llvm::errs(), flags);
 }
 
 std::unique_ptr<Pass> daphne::createPrintIRPass(const std::string message) {
diff --git a/src/compiler/inference/AdaptTypesToKernelsPass.cpp b/src/compiler/inference/AdaptTypesToKernelsPass.cpp
index 94a261866..22812ba9d 100644
--- a/src/compiler/inference/AdaptTypesToKernelsPass.cpp
+++ b/src/compiler/inference/AdaptTypesToKernelsPass.cpp
@@ -40,6 +40,10 @@ using namespace mlir;
 struct AdaptTypesToKernelsPass : public PassWrapper<AdaptTypesToKernelsPass, OperationPass<func::FuncOp>>
 {
     void runOnOperation() final;
+    StringRef getArgument() const final { return "adapt-types-to-kernels"; }
+    StringRef getDescription() const final {
+        return "TODO";
+    }
 };
 
 void AdaptTypesToKernelsPass::runOnOperation()
diff --git a/src/compiler/inference/InferencePass.cpp b/src/compiler/inference/InferencePass.cpp
index 414a2afb6..c0af79ac0 100644
--- a/src/compiler/inference/InferencePass.cpp
+++ b/src/compiler/inference/InferencePass.cpp
@@ -519,8 +519,11 @@ class InferencePass : public PassWrapper<InferencePass, OperationPass<func::Func
             return false;
         });
     }
+
+    StringRef getArgument() const final { return "inference"; }
+    StringRef getDescription() const final { return "TODO"; }
 };
 
 std::unique_ptr<Pass> daphne::createInferencePass(daphne::InferenceConfig cfg) {
     return std::make_unique<InferencePass>(cfg);
-}
\ No newline at end of file
+}
diff --git a/src/compiler/inference/SelectMatrixRepresentationsPass.cpp b/src/compiler/inference/SelectMatrixRepresentationsPass.cpp
index 11f74280b..9049b0a9c 100644
--- a/src/compiler/inference/SelectMatrixRepresentationsPass.cpp
+++ b/src/compiler/inference/SelectMatrixRepresentationsPass.cpp
@@ -161,6 +161,9 @@ class SelectMatrixRepresentationsPass : public PassWrapper<SelectMatrixRepresent
             f.getBody().back().getTerminator()->getOperandTypes()));
     }
 
+    StringRef getArgument() const final { return "select-matrix-representations"; }
+    StringRef getDescription() const final { return "TODO"; }
+
     static bool returnsKnownProperties(Operation *op) {
         return llvm::any_of(op->getResultTypes(), [](Type rt) {
             if(auto mt = rt.dyn_cast<daphne::MatrixType>())
@@ -172,4 +175,4 @@ class SelectMatrixRepresentationsPass : public PassWrapper<SelectMatrixRepresent
 
 std::unique_ptr<Pass> daphne::createSelectMatrixRepresentationsPass() {
     return std::make_unique<SelectMatrixRepresentationsPass>();
-}
\ No newline at end of file
+}
diff --git a/src/compiler/lowering/AggAllOpLowering.cpp b/src/compiler/lowering/AggAllOpLowering.cpp
new file mode 100644
index 000000000..f3f16f861
--- /dev/null
+++ b/src/compiler/lowering/AggAllOpLowering.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "compiler/utils/CompilerUtils.h"
+#include "compiler/utils/LoweringUtils.h"
+#include "ir/daphneir/Daphne.h"
+#include "ir/daphneir/Passes.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/Transforms/FuncConversions.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/UseDefLists.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+using namespace mlir;
+
+class SumAllOpLowering : public OpConversionPattern<daphne::AllAggSumOp> {
+   public:
+    using OpConversionPattern::OpConversionPattern;
+
+    LogicalResult matchAndRewrite(
+        daphne::AllAggSumOp op, OpAdaptor adaptor,
+        ConversionPatternRewriter &rewriter) const override {
+        mlir::daphne::MatrixType matrixType =
+            adaptor.getArg().getType().dyn_cast<mlir::daphne::MatrixType>();
+
+        auto loc = op->getLoc();
+        auto nR = matrixType.getNumRows();
+        auto nC = matrixType.getNumCols();
+
+        auto matrixElementType = matrixType.getElementType();
+        auto memRefType = mlir::MemRefType::get({nR, nC}, matrixElementType);
+        auto memRef = rewriter.create<mlir::daphne::ConvertDenseMatrixToMemRef>(
+            op->getLoc(), memRefType, adaptor.getArg());
+
+        Value sum = rewriter.create<mlir::arith::ConstantOp>(
+            loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+
+        SmallVector<Value, 4> loopIvs;
+        SmallVector<AffineForOp, 2> forOps;
+        auto outerLoop =
+            rewriter.create<AffineForOp>(loc, 0, nR, 1, ValueRange{sum});
+        for (Operation &nested : *outerLoop.getBody()) {
+            rewriter.eraseOp(&nested);
+        }
+        loopIvs.push_back(outerLoop.getInductionVar());
+        // outer loop body
+        rewriter.setInsertionPointToStart(outerLoop.getBody());
+        Value sum_iter = rewriter.create<mlir::arith::ConstantOp>(
+            loc, rewriter.getF64Type(), rewriter.getF64FloatAttr(0));
+        // inner loop
+        auto innerLoop =
+            rewriter.create<AffineForOp>(loc, 0, nC, 1, ValueRange{sum_iter});
+        for (Operation &nested : *innerLoop.getBody()) {
+            rewriter.eraseOp(&nested);
+        }
+        loopIvs.push_back(innerLoop.getInductionVar());
+        // inner loop body
+        rewriter.setInsertionPointToStart(innerLoop.getBody());
+        // load value from memref
+        auto elementLoad =
+            rewriter.create<memref::LoadOp>(loc, memRef, loopIvs);
+        // sum loop iter arg and memref value
+        mlir::Value inner_sum = rewriter.create<mlir::arith::AddFOp>(
+            loc, innerLoop.getRegionIterArgs()[0], elementLoad);
+        // yield inner loop result
+        rewriter.setInsertionPointToEnd(innerLoop.getBody());
+        rewriter.create<AffineYieldOp>(loc, inner_sum);
+        // yield outer loop result
+        rewriter.setInsertionPointToEnd(outerLoop.getBody());
+        mlir::Value outer_sum = rewriter.create<mlir::arith::AddFOp>(
+            loc, outerLoop.getRegionIterArgs()[0], innerLoop.getResult(0));
+        rewriter.create<AffineYieldOp>(loc, outer_sum);
+
+        rewriter.setInsertionPointAfter(outerLoop);
+        rewriter.create<daphne::DecRefOp>(loc, adaptor.getArg());
+        // replace sumAll op with result of loops
+        rewriter.replaceOp(op, outerLoop.getResult(0));
+
+        return success();
+    }
+};
+
+namespace {
+/**
+ * @brief Lowers the daphne::AggAll operator to a set of affine loops and
+ * performs the aggregation on a MemRef which is created from the input
+ * DenseMatrix.
+ *
+ * This rewrite may enable loop fusion of the produced affine loops by
+ * running the loop fusion pass.
+ */
+struct AggAllLoweringPass
+    : public mlir::PassWrapper<AggAllLoweringPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+    explicit AggAllLoweringPass() {}
+
+    StringRef getArgument() const final { return "lower-agg"; }
+    StringRef getDescription() const final {
+        return "Lowers AggAll operators to a set of affine loops and performs "
+               "the aggregation on a MemRef which is created from the input "
+               "DenseMatrix.";
+    }
+
+    void getDependentDialects(mlir::DialectRegistry &registry) const override {
+        registry.insert<mlir::LLVM::LLVMDialect, mlir::AffineDialect,
+                        mlir::memref::MemRefDialect>();
+    }
+    void runOnOperation() final;
+};
+}  // end anonymous namespace
+
+void AggAllLoweringPass::runOnOperation() {
+    mlir::ConversionTarget target(getContext());
+    mlir::RewritePatternSet patterns(&getContext());
+    LowerToLLVMOptions llvmOptions(&getContext());
+    LLVMTypeConverter typeConverter(&getContext(), llvmOptions);
+
+    target.addLegalDialect<mlir::memref::MemRefDialect>();
+    target.addLegalDialect<mlir::arith::ArithDialect>();
+    target.addLegalDialect<mlir::scf::SCFDialect>();
+    target.addLegalDialect<mlir::AffineDialect>();
+    target.addLegalDialect<mlir::linalg::LinalgDialect>();
+    target.addLegalDialect<mlir::LLVM::LLVMDialect>();
+
+    target.addLegalOp<mlir::daphne::ConvertDenseMatrixToMemRef>();
+    target.addLegalOp<mlir::daphne::ConvertMemRefToDenseMatrix>();
+    target.addLegalOp<mlir::daphne::DecRefOp>();
+
+    target.addIllegalOp<mlir::daphne::AllAggSumOp>();
+
+    patterns.insert<SumAllOpLowering>(&getContext());
+    auto module = getOperation();
+    if (failed(applyPartialConversion(module, target, std::move(patterns)))) {
+        signalPassFailure();
+    }
+}
+
+std::unique_ptr<mlir::Pass> mlir::daphne::createAggAllOpLoweringPass() {
+    return std::make_unique<AggAllLoweringPass>();
+}
diff --git a/src/compiler/lowering/CMakeLists.txt b/src/compiler/lowering/CMakeLists.txt
index 0484a8b5c..6b9ac25af 100644
--- a/src/compiler/lowering/CMakeLists.txt
+++ b/src/compiler/lowering/CMakeLists.txt
@@ -27,6 +27,12 @@ add_mlir_dialect_library(MLIRDaphneTransforms
     SpecializeGenericFunctionsPass.cpp
     VectorizeComputationsPass.cpp
     WhileLoopInvariantCodeMotionPass.cpp
+    DaphneOptPass.cpp
+    EwOpsLowering.cpp
+    ModOpLowering.cpp
+    MapOpLowering.cpp
+    MatMulOpLowering.cpp
+    AggAllOpLowering.cpp
 
     DEPENDS
     MLIRDaphneOpsIncGen
@@ -35,9 +41,14 @@ add_mlir_dialect_library(MLIRDaphneTransforms
     LINK_COMPONENTS
     Core
 )
+
 target_link_libraries(MLIRDaphneTransforms PUBLIC
     CompilerUtils
+    MLIRSCFToControlFlow
     MLIRArithToLLVM
+    MLIRMemRefToLLVM
+    MLIRAffineToStandard
+    MLIRLinalgToStandard
     MLIRControlFlowToLLVM
     MLIRFuncToLLVM
     MLIRFuncTransforms
diff --git a/src/compiler/lowering/DaphneOptPass.cpp b/src/compiler/lowering/DaphneOptPass.cpp
new file mode 100644
index 000000000..8795962e2
--- /dev/null
+++ b/src/compiler/lowering/DaphneOptPass.cpp
@@ -0,0 +1,102 @@
+#include "compiler/utils/CompilerUtils.h"
+#include "compiler/utils/LoweringUtils.h"
+#include "ir/daphneir/Daphne.h"
+#include "ir/daphneir/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+#define DEBUG_TYPE "dm-opt"
+
+using namespace mlir;
+
+class IntegerModOpt : public mlir::OpConversionPattern<mlir::daphne::EwModOp> {
+   public:
+    using OpConversionPattern::OpConversionPattern;
+
+    [[nodiscard]] static bool optimization_viable(mlir::daphne::EwModOp op) {
+        if (!op.getRhs().getType().isUnsignedInteger()) return false;
+
+        std::pair<bool, uint64_t> isConstant =
+            CompilerUtils::isConstant<uint64_t>(op.getRhs());
+        // Apply (lhs % rhs) to (lhs & (rhs - 1)) optimization when rhs is a power of two
+        return isConstant.first && (isConstant.second & (isConstant.second - 1)) == 0;
+    }
+
+    mlir::LogicalResult matchAndRewrite(
+        mlir::daphne::EwModOp op, OpAdaptor adaptor,
+        mlir::ConversionPatternRewriter &rewriter) const override {
+        mlir::Value cst_one = rewriter.create<mlir::daphne::ConstantOp>(
+            op.getLoc(), static_cast<uint64_t>(1));
+        mlir::Value sub = rewriter.create<mlir::daphne::EwSubOp>(
+            op.getLoc(), adaptor.getRhs(), cst_one);
+        mlir::Value andOp = rewriter.create<mlir::daphne::EwBitwiseAndOp>(
+            op.getLoc(), adaptor.getLhs(), sub);
+        rewriter.replaceOp(op, andOp);
+        return success();
+    }
+};
+
+namespace {
+/**
+ * @brief This pass transforms operations (currently limited to the EwModOp) in
+ * the DaphneDialect to a different set of operations also from the
+ * DaphneDialect.
+ */
+struct DenseMatrixOptPass
+    : public mlir::PassWrapper<DenseMatrixOptPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+    explicit DenseMatrixOptPass() {}
+
+    void getDependentDialects(mlir::DialectRegistry &registry) const override {
+        registry.insert<mlir::LLVM::LLVMDialect, mlir::arith::ArithDialect,
+                        mlir::daphne::DaphneDialect>();
+    }
+    void runOnOperation() final;
+
+    StringRef getArgument() const final { return "opt-daphne"; }
+    StringRef getDescription() const final {
+        return "Performs optimizations on the DaphneIR by transforming "
+               "operations in the DaphneDialect to a set of other operation "
+               "also from the DaphneDialect.";
+    }
+};
+}  // end anonymous namespace
+
+void DenseMatrixOptPass::runOnOperation() {
+    mlir::ConversionTarget target(getContext());
+    mlir::RewritePatternSet patterns(&getContext());
+    mlir::LowerToLLVMOptions llvmOptions(&getContext());
+    mlir::LLVMTypeConverter typeConverter(&getContext(), llvmOptions);
+
+    typeConverter.addConversion([](Type type) { return type; });
+
+    target.addLegalDialect<mlir::BuiltinDialect>();
+    target.addLegalDialect<mlir::arith::ArithDialect>();
+    target.addLegalDialect<mlir::daphne::DaphneDialect>();
+
+    target.addDynamicallyLegalOp<mlir::daphne::EwModOp>(
+        [&](mlir::daphne::EwModOp op) {
+            return !IntegerModOpt::optimization_viable(op);
+        });
+
+    patterns.insert<IntegerModOpt>(typeConverter, &getContext());
+
+    auto module = getOperation();
+    if (failed(applyPartialConversion(module, target, std::move(patterns)))) {
+        signalPassFailure();
+    }
+}
+
+std::unique_ptr<mlir::Pass> mlir::daphne::createDaphneOptPass() {
+    return std::make_unique<DenseMatrixOptPass>();
+}
diff --git a/src/compiler/lowering/DistributeComputationsPass.cpp b/src/compiler/lowering/DistributeComputationsPass.cpp
index d57a00a62..09b57a9a2 100644
--- a/src/compiler/lowering/DistributeComputationsPass.cpp
+++ b/src/compiler/lowering/DistributeComputationsPass.cpp
@@ -73,6 +73,9 @@ struct DistributeComputationsPass
     : public PassWrapper<DistributeComputationsPass, OperationPass<ModuleOp>>
 {
     void runOnOperation() final;
+
+    StringRef getArgument() const final { return "distribute-computation"; }
+    StringRef getDescription() const final { return "TODO"; }
 };
 }
 
diff --git a/src/compiler/lowering/DistributePipelinesPass.cpp b/src/compiler/lowering/DistributePipelinesPass.cpp
index ae4ce4698..d4ea14468 100644
--- a/src/compiler/lowering/DistributePipelinesPass.cpp
+++ b/src/compiler/lowering/DistributePipelinesPass.cpp
@@ -67,6 +67,9 @@ struct DistributePipelinesPass
     : public PassWrapper<DistributePipelinesPass, OperationPass<ModuleOp>>
 {
     void runOnOperation() final;
+
+    StringRef getArgument() const final { return "distribute-pipelines"; }
+    StringRef getDescription() const final { return "TODO"; }
 };
 
 void DistributePipelinesPass::runOnOperation()
diff --git a/src/compiler/lowering/EwOpsLowering.cpp b/src/compiler/lowering/EwOpsLowering.cpp
new file mode 100644
index 000000000..d892fdfe8
--- /dev/null
+++ b/src/compiler/lowering/EwOpsLowering.cpp
@@ -0,0 +1,344 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "compiler/utils/CompilerUtils.h"
+#include "compiler/utils/LoweringUtils.h"
+#include "ir/daphneir/Daphne.h"
+#include "ir/daphneir/Passes.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/IR/UseDefLists.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+using namespace mlir;
+
+template <class UnaryOp, class IOp, class FOp>
+struct UnaryOpLowering : public mlir::OpConversionPattern<UnaryOp> {
+    using OpAdaptor = typename mlir::OpConversionPattern<UnaryOp>::OpAdaptor;
+
+   public:
+    UnaryOpLowering(mlir::TypeConverter &typeConverter, mlir::MLIRContext *ctx)
+        : mlir::OpConversionPattern<UnaryOp>(typeConverter, ctx) {
+        this->setDebugName("EwDaphneOpsLowering");
+    }
+
+    mlir::LogicalResult matchAndRewrite(
+        UnaryOp op, OpAdaptor adaptor,
+        mlir::ConversionPatternRewriter &rewriter) const override {
+        mlir::Type type = op.getType();
+
+        if (type.isa<mlir::IntegerType>()) {
+            rewriter.replaceOpWithNewOp<IOp>(op.getOperation(),
+                                             adaptor.getOperands());
+        } else if (type.isa<mlir::FloatType>()) {
+            rewriter.replaceOpWithNewOp<FOp>(op.getOperation(),
+                                             adaptor.getOperands());
+        } else {
+            return mlir::failure();
+        }
+        return mlir::success();
+    }
+};
+
+template <class BinaryOp, class IOp, class FOp>
+class BinaryOpLowering final : public mlir::OpConversionPattern<BinaryOp> {
+    using OpAdaptor = typename mlir::OpConversionPattern<BinaryOp>::OpAdaptor;
+
+   public:
+    BinaryOpLowering(mlir::TypeConverter &typeConverter, mlir::MLIRContext *ctx)
+        : mlir::OpConversionPattern<BinaryOp>(typeConverter, ctx) {
+        this->setDebugName("EwDaphneOpLowering");
+    }
+
+    mlir::LogicalResult convertEwScalar(
+        BinaryOp op, OpAdaptor adaptor,
+        mlir::ConversionPatternRewriter &rewriter) const {
+        auto lhs = adaptor.getLhs();
+        auto rhs = adaptor.getRhs();
+        auto loc = op.getLoc();
+
+        if (lhs.getType().template isa<mlir::FloatType>() &&
+            rhs.getType().template isa<mlir::FloatType>()) {
+            rewriter.replaceOpWithNewOp<FOp>(op.getOperation(),
+                                             adaptor.getOperands());
+            return mlir::success();
+        }
+
+        Value castedLhs = this->typeConverter->materializeTargetConversion(
+            rewriter, loc,
+            rewriter.getIntegerType(
+                adaptor.getRhs().getType().getIntOrFloatBitWidth()),
+            ValueRange{adaptor.getLhs()});
+
+        Value castedRhs = this->typeConverter->materializeTargetConversion(
+            rewriter, loc,
+            rewriter.getIntegerType(
+                adaptor.getRhs().getType().getIntOrFloatBitWidth()),
+            ValueRange{adaptor.getRhs()});
+
+        Value binaryOp = rewriter.create<IOp>(loc, castedLhs, castedRhs);
+
+        Value res = this->typeConverter->materializeSourceConversion(
+            rewriter, loc, lhs.getType(), ValueRange{binaryOp});
+
+        rewriter.replaceOp(op, res);
+        return mlir::success();
+    }
+
+    mlir::LogicalResult matchAndRewrite(
+        BinaryOp op, OpAdaptor adaptor,
+        mlir::ConversionPatternRewriter &rewriter) const override {
+        auto lhs = adaptor.getLhs();
+        auto rhs = adaptor.getRhs();
+
+        // no matrix
+        if (!lhs.getType().template isa<mlir::daphne::MatrixType>() &&
+            !rhs.getType().template isa<mlir::daphne::MatrixType>())
+            return convertEwScalar(op, adaptor, rewriter);
+
+        // for now assume matrix is LHS and RHS is non matrix
+        mlir::daphne::MatrixType lhsMatrixType =
+            adaptor.getLhs()
+                .getType()
+                .template dyn_cast<mlir::daphne::MatrixType>();
+        auto matrixElementType = lhsMatrixType.getElementType();
+        auto lhsRows = lhsMatrixType.getNumRows();
+        auto lhsCols = lhsMatrixType.getNumCols();
+        auto lhsMemRefType =
+            mlir::MemRefType::get({lhsRows, lhsCols}, matrixElementType);
+
+        mlir::Type elementType{};
+        mlir::Value memRefLhs =
+            rewriter.create<mlir::daphne::ConvertDenseMatrixToMemRef>(
+                op->getLoc(), lhsMemRefType, adaptor.getLhs());
+
+        mlir::Value memRefRhs{};
+        bool isMatrixMatrix =
+            rhs.getType().template isa<mlir::daphne::MatrixType>();
+
+        if (isMatrixMatrix) {
+            memRefRhs =
+                rewriter.create<mlir::daphne::ConvertDenseMatrixToMemRef>(
+                    op->getLoc(), lhsMemRefType, adaptor.getRhs());
+            elementType = lhsMemRefType.getElementType();
+        } else {
+            elementType = rhs.getType();
+        }
+
+        mlir::Value outputMemRef =
+            insertMemRefAlloc(lhsMemRefType, op->getLoc(), rewriter);
+
+        SmallVector<int64_t, 4> lowerBounds(/*Rank=*/2, /*Value=*/0);
+        SmallVector<int64_t, 4> steps(/*Rank=*/2, /*Value=*/1);
+        buildAffineLoopNest(
+            rewriter, op.getLoc(), lowerBounds,
+            {lhsMatrixType.getNumRows(), lhsMatrixType.getNumCols()}, steps,
+            [&](OpBuilder &nestedBuilder, Location loc, ValueRange ivs) {
+                mlir::Value loadLhs =
+                    nestedBuilder.create<AffineLoadOp>(loc, memRefLhs, ivs);
+                mlir::Value binaryOp{};
+
+                if (adaptor.getRhs()
+                        .getType()
+                        .template isa<mlir::FloatType>()) {
+                    binaryOp = nestedBuilder.create<FOp>(loc, loadLhs,
+                                                         adaptor.getRhs());
+
+                    nestedBuilder.create<AffineStoreOp>(loc, binaryOp,
+                                                        outputMemRef, ivs);
+                    return;
+                }
+
+                mlir::Value rhs{};
+                if (isMatrixMatrix)
+                    rhs =
+                        nestedBuilder.create<AffineLoadOp>(loc, memRefRhs, ivs);
+                else
+                    rhs = adaptor.getRhs();
+
+                // is integer
+                if (elementType.isInteger(
+                        elementType.getIntOrFloatBitWidth())) {
+                    Value castedLhs =
+                        this->typeConverter->materializeTargetConversion(
+                            nestedBuilder, loc,
+                            nestedBuilder.getIntegerType(
+                                lhsMemRefType.getElementTypeBitWidth()),
+                            ValueRange{loadLhs});
+
+                    Value castedRhs =
+                        this->typeConverter->materializeTargetConversion(
+                            nestedBuilder, loc,
+                            nestedBuilder.getIntegerType(
+                                lhsMemRefType.getElementTypeBitWidth()),
+                            ValueRange{rhs});
+
+                    binaryOp =
+                        nestedBuilder.create<IOp>(loc, castedLhs, castedRhs);
+                    Value castedRes =
+                        this->typeConverter->materializeSourceConversion(
+                            nestedBuilder, loc, elementType,
+                            ValueRange{binaryOp});
+                    nestedBuilder.create<AffineStoreOp>(loc, castedRes,
+                                                        outputMemRef, ivs);
+                } else {
+                    // is float
+                    binaryOp = nestedBuilder.create<FOp>(loc, loadLhs, rhs);
+                    nestedBuilder.create<AffineStoreOp>(loc, binaryOp,
+                                                        outputMemRef, ivs);
+                }
+            });
+        mlir::Value output = convertMemRefToDenseMatrix(
+            op->getLoc(), rewriter, outputMemRef, op.getType());
+
+        rewriter.replaceOp(op, output);
+        return mlir::success();
+    }
+};
+
+// clang-format off
+// math::sqrt only supports floating point, DAPHNE promotes argument type of sqrt to f32/64
+using SqrtOpLowering = UnaryOpLowering<mlir::daphne::EwSqrtOp, mlir::math::SqrtOp, mlir::math::SqrtOp>;
+using AbsOpLowering = UnaryOpLowering<mlir::daphne::EwAbsOp, mlir::math::AbsIOp, mlir::math::AbsFOp>;
+using AddOpLowering = BinaryOpLowering<mlir::daphne::EwAddOp, mlir::arith::AddIOp, mlir::arith::AddFOp>;
+using SubOpLowering = BinaryOpLowering<mlir::daphne::EwSubOp, mlir::arith::SubIOp, mlir::arith::SubFOp>;
+using MulOpLowering = BinaryOpLowering<mlir::daphne::EwMulOp, mlir::arith::MulIOp, mlir::arith::MulFOp>;
+using DivOpLowering = BinaryOpLowering<mlir::daphne::EwDivOp, mlir::arith::DivSIOp, mlir::arith::DivFOp>;
+using PowOpLowering = BinaryOpLowering<mlir::daphne::EwPowOp, mlir::math::PowFOp, mlir::math::PowFOp>;
+// clang-format on
+
+namespace {
+/**
+ * @brief This pass lowers element-wise operations to affine loop
+ * structures and arithmetic operations.
+ *
+ * This rewrite may enable loop fusion of the produced affine loops by
+ * running the loop fusion pass.
+ */
+struct EwOpLoweringPass
+    : public mlir::PassWrapper<EwOpLoweringPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+    explicit EwOpLoweringPass() {}
+
+    void getDependentDialects(mlir::DialectRegistry &registry) const override {
+        registry.insert<mlir::LLVM::LLVMDialect, mlir::AffineDialect,
+                        mlir::memref::MemRefDialect,
+                        mlir::daphne::DaphneDialect, mlir::math::MathDialect>();
+    }
+    void runOnOperation() final;
+
+    StringRef getArgument() const final { return "lower-ew"; }
+    StringRef getDescription() const final {
+        return "This pass lowers element-wise operations to affine-loop "
+               "structures and arithmetic operations.";
+    }
+};
+}  // end anonymous namespace
+
+void populateLowerEwOpConversionPatterns(mlir::LLVMTypeConverter &typeConverter,
+                                         mlir::RewritePatternSet &patterns) {
+    // clang-format off
+    patterns.insert<
+        AddOpLowering,
+        SubOpLowering,
+        MulOpLowering,
+        SqrtOpLowering,
+        AbsOpLowering,
+        DivOpLowering,
+        PowOpLowering>(typeConverter, patterns.getContext());
+    // clang-format on
+}
+
+void EwOpLoweringPass::runOnOperation() {
+    mlir::ConversionTarget target(getContext());
+    mlir::RewritePatternSet patterns(&getContext());
+    mlir::LowerToLLVMOptions llvmOptions(&getContext());
+    mlir::LLVMTypeConverter typeConverter(&getContext(), llvmOptions);
+
+    typeConverter.addConversion(convertInteger);
+    typeConverter.addConversion(convertFloat);
+    typeConverter.addConversion([](Type type) { return type; });
+    typeConverter.addArgumentMaterialization(materializeCastFromIllegal);
+    typeConverter.addSourceMaterialization(materializeCastToIllegal);
+    typeConverter.addTargetMaterialization(materializeCastFromIllegal);
+
+    target.addLegalDialect<mlir::arith::ArithDialect,
+                           mlir::memref::MemRefDialect, mlir::AffineDialect,
+                           mlir::LLVM::LLVMDialect, mlir::daphne::DaphneDialect,
+                           mlir::BuiltinDialect, mlir::math::MathDialect>();
+
+    target.addDynamicallyLegalOp<mlir::daphne::EwSqrtOp, mlir::daphne::EwAbsOp>(
+        [](Operation *op) {
+            return op->getOperandTypes()[0].isa<mlir::daphne::MatrixType>();
+        });
+
+    target.addDynamicallyLegalOp<mlir::daphne::EwAddOp, mlir::daphne::EwSubOp,
+                                 mlir::daphne::EwMulOp, mlir::daphne::EwPowOp,
+                                 mlir::daphne::EwDivOp>([](Operation *op) {
+        if (op->getOperandTypes()[0].isa<mlir::daphne::MatrixType>() &&
+            op->getOperandTypes()[1].isa<mlir::daphne::MatrixType>()) {
+            mlir::daphne::MatrixType lhs =
+                op->getOperandTypes()[0]
+                    .template dyn_cast<mlir::daphne::MatrixType>();
+            mlir::daphne::MatrixType rhs =
+                op->getOperandTypes()[1]
+                    .template dyn_cast<mlir::daphne::MatrixType>();
+            if (lhs.getNumRows() != rhs.getNumRows() ||
+                lhs.getNumCols() != rhs.getNumCols() ||
+                lhs.getNumRows() == -1 || lhs.getNumCols() == -1)
+                return true;
+
+            return false;
+        }
+
+        if (op->getOperandTypes()[0].isa<mlir::daphne::MatrixType>()) {
+            mlir::daphne::MatrixType lhsMatrixType =
+                op->getOperandTypes()[0].dyn_cast<mlir::daphne::MatrixType>();
+            return lhsMatrixType.getNumRows() == -1 || lhsMatrixType.getNumCols() == -1;
+        }
+
+        return false;
+    });
+
+    populateLowerEwOpConversionPatterns(typeConverter, patterns);
+
+    auto module = getOperation();
+    if (failed(applyPartialConversion(module, target, std::move(patterns))))
+        signalPassFailure();
+}
+
+std::unique_ptr<mlir::Pass> mlir::daphne::createEwOpLoweringPass() {
+    return std::make_unique<EwOpLoweringPass>();
+}
diff --git a/src/compiler/lowering/LowerToLLVMPass.cpp b/src/compiler/lowering/LowerToLLVMPass.cpp
index 6baa7e4ce..6fd9c975e 100644
--- a/src/compiler/lowering/LowerToLLVMPass.cpp
+++ b/src/compiler/lowering/LowerToLLVMPass.cpp
@@ -18,16 +18,23 @@
 #include "ir/daphneir/Passes.h"
 #include "compiler/utils/CompilerUtils.h"
 
+#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
+
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
 #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Func/Transforms/FuncConversions.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Transforms/DialectConversion.h"
 
 #include <memory>
@@ -41,35 +48,6 @@ using namespace mlir;
 // be combined into a single variadic result.
 const std::string ATTR_HASVARIADICRESULTS = "hasVariadicResults";
 
-#if 0
-// At the moment, all of these operations are lowered to kernel calls.
-template <typename BinaryOp, typename ReplIOp, typename ReplFOp>
-struct BinaryOpLowering : public OpConversionPattern<BinaryOp>
-{
-    using OpConversionPattern<BinaryOp>::OpConversionPattern;
-
-    LogicalResult
-    matchAndRewrite(BinaryOp op, OpAdaptor adaptor,
-                    ConversionPatternRewriter &rewriter) const override
-    {
-        Type type = op.getType();
-        if (type.isa<IntegerType>()) {
-            rewriter.replaceOpWithNewOp<ReplIOp>(op.getOperation(), adaptor.getOperands());
-        }
-        else if (type.isa<FloatType>()) {
-            rewriter.replaceOpWithNewOp<ReplFOp>(op.getOperation(), adaptor.getOperands());
-        }
-        else {
-            return failure();
-        }
-        return success();
-    }
-};
-using AddOpLowering = BinaryOpLowering<daphne::AddOp, AddIOp, AddFOp>;
-using SubOpLowering = BinaryOpLowering<daphne::SubOp, SubIOp, SubFOp>;
-using MulOpLowering = BinaryOpLowering<daphne::MulOp, MulIOp, MulFOp>;
-#endif
-
 struct ReturnOpLowering : public OpRewritePattern<daphne::ReturnOp>
 {
     using OpRewritePattern<daphne::ReturnOp>::OpRewritePattern;
@@ -308,16 +286,18 @@ class CallKernelOpLowering : public OpConversionPattern<daphne::CallKernelOp>
         auto loc = op.getLoc();
 
         auto inputOutputTypes = getLLVMInputOutputTypes(
-                                                        loc, rewriter.getContext(), typeConverter,
-                                                        op.getResultTypes(), ValueRange(adaptor.getOperands()).getTypes(),
-                                                        hasVarRes, rewriter.getIndexType());
+            loc, rewriter.getContext(), typeConverter, op.getResultTypes(),
+            ValueRange(adaptor.getOperands()).getTypes(), hasVarRes,
+            rewriter.getIndexType());
 
         // create function protoype and get `FlatSymbolRefAttr` to it
         auto kernelRef = getOrInsertFunctionAttr(
-                                                 rewriter, module, op.getCalleeAttr().getValue(),
-                                                 getKernelFuncSignature(rewriter.getContext(), inputOutputTypes));
+            rewriter, module, op.getCalleeAttr().getValue(),
+            getKernelFuncSignature(rewriter.getContext(), inputOutputTypes));
 
-        auto kernelOperands = allocOutputReferences(loc, rewriter, adaptor.getOperands(), inputOutputTypes, op->getNumResults(), hasVarRes);
+        auto kernelOperands = allocOutputReferences(
+            loc, rewriter, adaptor.getOperands(), inputOutputTypes,
+            op->getNumResults(), hasVarRes);
 
         // call function
         // The kernel call has an empty list of return types, because our
@@ -934,6 +914,7 @@ void DaphneLowerToLLVMPass::runOnOperation()
     RewritePatternSet patterns(&getContext());
 
     LowerToLLVMOptions llvmOptions(&getContext());
+    // llvmOptions.useBarePtrCallConv = true;
     LLVMTypeConverter typeConverter(&getContext(), llvmOptions);
     typeConverter.addConversion([&](daphne::MatrixType t)
     {
@@ -985,9 +966,13 @@ void DaphneLowerToLLVMPass::runOnOperation()
     LLVMConversionTarget target(getContext());
 
     // populate dialect conversions
-    arith::populateArithToLLVMConversionPatterns(typeConverter, patterns);
-    populateFuncToLLVMConversionPatterns(typeConverter, patterns);
+    mlir::linalg::populateLinalgToStandardConversionPatterns(patterns);
+    populateAffineToStdConversionPatterns(patterns);
+    populateSCFToControlFlowConversionPatterns(patterns);
+    mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, patterns);
+    populateFinalizeMemRefToLLVMConversionPatterns(typeConverter, patterns);
     cf::populateControlFlowToLLVMConversionPatterns(typeConverter, patterns);
+    populateFuncToLLVMConversionPatterns(typeConverter, patterns);
     populateReturnOpTypeConversionPattern(patterns, typeConverter);
 
     target.addLegalOp<ModuleOp>();
diff --git a/src/compiler/lowering/ManageObjRefsPass.cpp b/src/compiler/lowering/ManageObjRefsPass.cpp
index b819912b6..90120163f 100644
--- a/src/compiler/lowering/ManageObjRefsPass.cpp
+++ b/src/compiler/lowering/ManageObjRefsPass.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <compiler/utils/CompilerUtils.h>
+#include <compiler/utils/LoweringUtils.h>
 #include <ir/daphneir/Daphne.h>
 #include <ir/daphneir/Passes.h>
 
@@ -26,11 +27,11 @@ using namespace mlir;
 /**
  * @brief Inserts DaphneIR operations for managing the reference counters of
  * runtime data objects.
- * 
+ *
  * Thus, it takes care of freeing data objects (e.g., intermediate results) at
  * the right points. The operations employed for reference management are
  * `IncRefOp` and `DecRefOp`.
- * 
+ *
  * The core ideas are:
  * - We decrease the reference counter of each SSA value (block argument or
  *   op result) to prevent memory leaks.
@@ -48,12 +49,23 @@ struct ManageObjRefsPass : public PassWrapper<ManageObjRefsPass, OperationPass<f
 {
     explicit ManageObjRefsPass() {}
     void runOnOperation() final;
+
+    StringRef getArgument() const final { return "manage-obj-refs"; }
+    StringRef getDescription() const final { return "TODO"; }
 };
 
+void processMemRefInterop(OpBuilder builder, Value v) {
+    Operation* lastUseOp = findLastUseOfSSAValue(v);
+
+    builder.setInsertionPointAfter(lastUseOp);
+    builder.create<daphne::DecRefOp>(builder.getUnknownLoc(),
+                                     v.getDefiningOp()->getOperand(0));
+}
+
 /**
  * @brief Inserts a `DecRefOp` in the right place, to decrease the reference
  * counter of the given value.
- * 
+ *
  * @param builder
  * @param v
  */
@@ -62,21 +74,22 @@ void processValue(OpBuilder builder, Value v) {
     // removed soon anyway).
     // We only need to manage the reference counters of DAPHNE data objects
     // like matrices and frames (not of scalars).
+
+    Operation* defOp = v.getDefiningOp();
+    if (defOp && llvm::isa<daphne::ConvertDenseMatrixToMemRef>(defOp))
+        processMemRefInterop(builder, v);
+
     if(!v.getType().isa<daphne::MatrixType, daphne::FrameType>())
         return;
-    
-    Operation * defOp = v.getDefiningOp();
 
-    Operation * decRefAfterOp = nullptr;
-    if(v.use_empty()) {
+    Operation* decRefAfterOp = nullptr;
+    if (v.use_empty()) {
         // If the given SSA value has no uses, we want to decrease its
         // reference counter directly after its definition (nullptr for block
         // args). Note that ideally, there should be no unused SSA values.
-        if(defOp)
-            decRefAfterOp = defOp;
+        if (defOp) decRefAfterOp = defOp;
         // else: decRefAfterOp stays nullptr
-    }
-    else {
+    } else {
         // If the given SSA value has uses, we need to find the last of them.
         // Note that the iterator over the uses provided by the value does not
         // seem to follow any useful order, in general, so we need to find out
@@ -85,26 +98,15 @@ void processValue(OpBuilder builder, Value v) {
         // value in the block where the value was defined, to simplify things.
         // So if the user of the value is in a descendant block, we need to
         // find its parent op in the block where the given value is defined.
-        Operation * lastUseOp = nullptr;
-        // TODO What about Block::findAncestorInBlock()?
-        for(OpOperand & use : v.getUses()) {
-            Operation * thisUseOp = use.getOwner();
-            // Find parent op in the block where v is defined.
-            while(thisUseOp->getBlock() != v.getParentBlock())
-                thisUseOp = thisUseOp->getParentOp();
-            // Determine if this is a later use.
-            if(!lastUseOp || lastUseOp->isBeforeInBlock(thisUseOp))
-                lastUseOp = thisUseOp;
-        }
-        decRefAfterOp = lastUseOp;
+        decRefAfterOp = findLastUseOfSSAValue(v);
     }
 
     // At this point, decRefAfterOp is nullptr, or the last user of v, or the
     // defining op of v.
-    
+
     if(decRefAfterOp) {
         // The given value is used and/or an OpResult.
-        
+
         // Don't insert a DecRefOp if the last user is a terminator.
         if(decRefAfterOp->hasTrait<OpTrait::IsTerminator>())
             // The value is handed out of its block (e.g., return, yield, ...).
@@ -116,7 +118,7 @@ void processValue(OpBuilder builder, Value v) {
         // runtime is on the main branch.
         // Don't insert a DecRefOp if there is already one. Currently, this can
         // happen only on the distributed worker, since the IR it gets already
-        // contains 
+        // contains
         if(isa<daphne::DecRefOp>(decRefAfterOp))
             return;
 
@@ -136,7 +138,7 @@ void processValue(OpBuilder builder, Value v) {
         else
             builder.setInsertionPointToStart(pb);
     }
-    
+
     // Finally create the DecRefOp.
     builder.create<daphne::DecRefOp>(builder.getUnknownLoc(), v);
 }
@@ -144,9 +146,9 @@ void processValue(OpBuilder builder, Value v) {
 /**
  * @brief Inserts an `IncRefOp` for the given value if its type is a DAPHNE
  * data type (matrix, frame).
- * 
+ *
  * If the type is unknown, throw an exception.
- * 
+ *
  * @param v
  * @param b
  */
@@ -164,7 +166,7 @@ void incRefIfObj(Value v, OpBuilder & b) {
 /**
  * @brief Inserts an `IncRefOp` for each operand of the given operation whose
  * type is a DAPHNE data type (matrix, frame), right before the operation.
- * 
+ *
  * @param op
  * @param b
  */
@@ -177,7 +179,7 @@ void incRefArgs(Operation& op, OpBuilder & b) {
 /**
  * @brief Manages the reference counters of all values defined in the given
  * block by inserting `IncRefOp` and `DecRefOp` in the right places.
- * 
+ *
  * @param builder
  * @param b
  */
@@ -185,14 +187,14 @@ void processBlock(OpBuilder builder, Block * b) {
     // Make sure that the reference counters of block arguments are decreased.
     for(BlockArgument& arg : b->getArguments())
         processValue(builder, arg);
-    
+
     // Make sure the the reference counters of op results are decreased, and
     // Increase the reference counters of operands where necessary.
     for(Operation& op : b->getOperations()) {
         // 1) Increase the reference counters of operands, if necessary.
 
         // TODO We could use traits to identify those cases.
-        
+
         // Casts that will not call a kernel.
         if(auto co = dyn_cast<daphne::CastOp>(op)) {
             if(co.isTrivialCast() || co.isRemovePropertyCast())
@@ -228,13 +230,13 @@ void processBlock(OpBuilder builder, Block * b) {
         //   Note: We do not increase the reference counters of the arguments
         //   of vectorized pipelines, because internally, a pipeline processes
         //   views into its inputs. These are individual data objects.
-        
-        
+
+
         // 2) Make sure the the reference counters of op results are decreased.
         for(Value v : op.getResults())
             processValue(builder, v);
-        
-        
+
+
         // 3) Recurse into the op, if it has regions.
         for(Region& r : op.getRegions())
             for(Block& b2 : r.getBlocks())
@@ -252,4 +254,4 @@ void ManageObjRefsPass::runOnOperation()
 std::unique_ptr<Pass> daphne::createManageObjRefsPass()
 {
     return std::make_unique<ManageObjRefsPass>();
-}
\ No newline at end of file
+}
diff --git a/src/compiler/lowering/MapOpLowering.cpp b/src/compiler/lowering/MapOpLowering.cpp
new file mode 100644
index 000000000..27fff5dcc
--- /dev/null
+++ b/src/compiler/lowering/MapOpLowering.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/utils/CompilerUtils.h"
+#include "compiler/utils/LoweringUtils.h"
+#include "ir/daphneir/Daphne.h"
+#include "ir/daphneir/Passes.h"
+#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+using namespace mlir;
+
+class InlineMapOpLowering
+    : public mlir::OpConversionPattern<mlir::daphne::MapOp> {
+   public:
+    using OpConversionPattern::OpConversionPattern;
+
+    mlir::LogicalResult matchAndRewrite(
+        mlir::daphne::MapOp op, OpAdaptor adaptor,
+        mlir::ConversionPatternRewriter &rewriter) const override {
+        auto loc = op->getLoc();
+
+        mlir::daphne::MatrixType lhsMatrixType =
+            op->getOperandTypes().front().dyn_cast<mlir::daphne::MatrixType>();
+        auto matrixElementType = lhsMatrixType.getElementType();
+        auto lhsMemRefType = mlir::MemRefType::get(
+            {lhsMatrixType.getNumRows(), lhsMatrixType.getNumCols()}, matrixElementType);
+
+        mlir::Value lhs =
+            rewriter.create<mlir::daphne::ConvertDenseMatrixToMemRef>(
+                loc, lhsMemRefType, adaptor.getArg());
+        mlir::ModuleOp module = op->getParentOfType<mlir::ModuleOp>();
+        func::FuncOp udfFuncOp =
+            module.lookupSymbol<func::FuncOp>(op.getFunc());
+
+        SmallVector<Value, 4> loopIvs;
+
+        auto outerLoop =
+            rewriter.create<AffineForOp>(loc, 0, lhsMatrixType.getNumRows(), 1);
+        for (Operation &nested : *outerLoop.getBody()) {
+            rewriter.eraseOp(&nested);
+        }
+        loopIvs.push_back(outerLoop.getInductionVar());
+
+        // outer loop body
+        rewriter.setInsertionPointToStart(outerLoop.getBody());
+        auto innerLoop =
+            rewriter.create<AffineForOp>(loc, 0, lhsMatrixType.getNumCols(), 1);
+        for (Operation &nested : *innerLoop.getBody()) {
+            rewriter.eraseOp(&nested);
+        }
+        loopIvs.push_back(innerLoop.getInductionVar());
+        rewriter.create<AffineYieldOp>(loc);
+        rewriter.setInsertionPointToStart(innerLoop.getBody());
+
+        // inner loop body
+        mlir::Value lhsValue = rewriter.create<AffineLoadOp>(loc, lhs, loopIvs);
+        mlir::Value res =
+            rewriter.create<func::CallOp>(loc, udfFuncOp, ValueRange{lhsValue})
+                ->getResult(0);
+        rewriter.create<AffineStoreOp>(loc, res, lhs, loopIvs);
+        rewriter.create<AffineYieldOp>(loc);
+
+        rewriter.setInsertionPointAfter(outerLoop);
+        mlir::Value output = convertMemRefToDenseMatrix(op->getLoc(), rewriter,
+                                                        lhs, op.getType());
+        rewriter.replaceOp(op, output);
+        return mlir::success();
+    }
+};
+
+namespace {
+/**
+ * @brief The MapOpLoweringPass rewrites the daphne::MapOp operator
+ * to a set of perfectly nested affine loops and inserts for each element a call
+ * to the UDF assigned to the daphne::MapOp.
+ *
+ * This rewrite enables subsequent inlining pass to completely replace
+ * the daphne::MapOp by inlining the produced CallOps from this pass.
+ */
+struct MapOpLoweringPass
+    : public mlir::PassWrapper<MapOpLoweringPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+    explicit MapOpLoweringPass() {}
+
+    void getDependentDialects(mlir::DialectRegistry &registry) const override {
+        registry.insert<mlir::LLVM::LLVMDialect, mlir::AffineDialect,
+                        mlir::memref::MemRefDialect,
+                        mlir::daphne::DaphneDialect, mlir::func::FuncDialect>();
+    }
+    void runOnOperation() final;
+
+    StringRef getArgument() const final { return "lower-map"; }
+    StringRef getDescription() const final {
+        return "Lowers the daphne.mapOp operation to"
+               "a set of affine loops, directly calling the UDF. "
+               "Subsequent use of the inlining pass may inline the call to the "
+               "UDF.";
+    }
+};
+}  // end anonymous namespace
+
+void MapOpLoweringPass::runOnOperation() {
+    mlir::ConversionTarget target(getContext());
+    mlir::RewritePatternSet patterns(&getContext());
+    mlir::LowerToLLVMOptions llvmOptions(&getContext());
+    mlir::LLVMTypeConverter typeConverter(&getContext(), llvmOptions);
+
+    target.addLegalDialect<mlir::AffineDialect, arith::ArithDialect,
+                           memref::MemRefDialect, mlir::daphne::DaphneDialect,
+                           mlir::func::FuncDialect>();
+
+    target.addIllegalOp<mlir::daphne::MapOp>();
+
+    patterns.insert<InlineMapOpLowering>(&getContext());
+    auto module = getOperation();
+    if (failed(applyPartialConversion(module, target, std::move(patterns)))) {
+        signalPassFailure();
+    }
+}
+
+std::unique_ptr<mlir::Pass> mlir::daphne::createMapOpLoweringPass() {
+    return std::make_unique<MapOpLoweringPass>();
+}
diff --git a/src/compiler/lowering/MatMulOpLowering.cpp b/src/compiler/lowering/MatMulOpLowering.cpp
new file mode 100644
index 000000000..6c401e266
--- /dev/null
+++ b/src/compiler/lowering/MatMulOpLowering.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "compiler/utils/CompilerUtils.h"
+#include "compiler/utils/LoweringUtils.h"
+#include "ir/daphneir/Daphne.h"
+#include "ir/daphneir/Passes.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/Transforms/FuncConversions.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/UseDefLists.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+using namespace mlir;
+
+static constexpr int ROW = 0;
+static constexpr int COL = 1;
+
+void affineMatMul(mlir::Value &lhs, mlir::Value &rhs, mlir::Value &output,
+                  ConversionPatternRewriter &rewriter, mlir::Location loc,
+                  ArrayRef<int64_t> lhsShape, ArrayRef<int64_t> rhsShape,
+                  mlir::MLIRContext *ctx) {
+    SmallVector<Value, 4> loopIvs;
+
+    // row loop
+    auto rowLoop = rewriter.create<AffineForOp>(loc, 0, lhsShape[ROW], 1);
+    for (Operation &nested : *rowLoop.getBody()) {
+        rewriter.eraseOp(&nested);
+    }
+
+    // row loop body
+    rewriter.setInsertionPointToStart(rowLoop.getBody());
+
+    // fma loop
+    auto innerLoop = rewriter.create<AffineForOp>(loc, 0, rhsShape[ROW], 1);
+    for (Operation &nested : *innerLoop.getBody()) {
+        rewriter.eraseOp(&nested);
+    }
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+
+    // col loop
+    auto colLoop = rewriter.create<AffineForOp>(loc, 0, rhsShape[COL], 1);
+    for (Operation &nested : *colLoop.getBody()) {
+        rewriter.eraseOp(&nested);
+    }
+
+    // col loop body
+    rewriter.setInsertionPointToStart(colLoop.getBody());
+
+    loopIvs.push_back(rowLoop.getInductionVar());
+    loopIvs.push_back(colLoop.getInductionVar());
+    loopIvs.push_back(innerLoop.getInductionVar());
+
+    // load
+    mlir::Value a = rewriter.create<memref::LoadOp>(
+        loc, lhs, ValueRange{loopIvs[0], loopIvs[2]});
+    mlir::Value b = rewriter.create<memref::LoadOp>(
+        loc, rhs, ValueRange{loopIvs[2], loopIvs[1]});
+    mlir::Value c = rewriter.create<memref::LoadOp>(
+        loc, output, ValueRange{loopIvs[0], loopIvs[1]});
+
+    // fma
+    mlir::Value fma = rewriter.create<LLVM::FMAOp>(loc, a, b, c);
+
+    // store
+    rewriter.create<memref::StoreOp>(loc, fma, output,
+                                     ValueRange{loopIvs[0], loopIvs[1]});
+
+    // AffineYieldOp at end of loop blocks
+    rewriter.setInsertionPointToEnd(rowLoop.getBody());
+    rewriter.create<AffineYieldOp>(loc);
+    rewriter.setInsertionPointToEnd(colLoop.getBody());
+    rewriter.create<AffineYieldOp>(loc);
+    rewriter.setInsertionPointToEnd(innerLoop.getBody());
+    rewriter.create<AffineYieldOp>(loc);
+    rewriter.setInsertionPointAfter(rowLoop);
+}
+
+class MatMulLowering : public OpConversionPattern<daphne::MatMulOp> {
+   public:
+    using OpConversionPattern::OpConversionPattern;
+
+    LogicalResult matchAndRewrite(
+        daphne::MatMulOp op, OpAdaptor adaptor,
+        ConversionPatternRewriter &rewriter) const override {
+        auto loc = op->getLoc();
+        mlir::daphne::MatrixType lhsMatrixType =
+            adaptor.getLhs().getType().dyn_cast<mlir::daphne::MatrixType>();
+        mlir::daphne::MatrixType rhsMatrixType =
+            adaptor.getRhs().getType().dyn_cast<mlir::daphne::MatrixType>();
+
+        auto lhsRows = lhsMatrixType.getNumRows();
+        auto lhsCols = lhsMatrixType.getNumCols();
+
+        auto rhsRows = rhsMatrixType.getNumRows();
+        auto rhsCols = rhsMatrixType.getNumCols();
+
+        auto matrixElementType = lhsMatrixType.getElementType();
+
+        // TODO(phil): if shape is unknown, e.g., row/col = -1 we currently
+        // can't create a MemRefType
+        auto lhsMemRefType =
+            mlir::MemRefType::get({lhsRows, lhsCols}, matrixElementType);
+        auto rhsMemRefType =
+            mlir::MemRefType::get({rhsRows, rhsCols}, matrixElementType);
+
+        mlir::MemRefType outputMemRefType =
+            mlir::MemRefType::get({lhsRows, rhsCols}, matrixElementType);
+
+        // daphne::Matrix -> memref
+        mlir::Value lhs =
+            rewriter.create<mlir::daphne::ConvertDenseMatrixToMemRef>(
+                op->getLoc(), lhsMemRefType, adaptor.getLhs());
+        mlir::Value rhs =
+            rewriter.create<mlir::daphne::ConvertDenseMatrixToMemRef>(
+                op->getLoc(), rhsMemRefType, adaptor.getRhs());
+
+        // Alloc output memref
+        mlir::Value outputMemRef =
+            insertMemRefAlloc(outputMemRefType, loc, rewriter);
+
+        // Fill the output MemRef
+        affineFillMemRef(0.0, rewriter, loc, outputMemRefType.getShape(),
+                         op->getContext(), outputMemRef, matrixElementType);
+        // Do the actual MatMul with hand built codegen
+        affineMatMul(lhs, rhs, outputMemRef, rewriter, loc,
+                     lhsMemRefType.getShape(), rhsMemRefType.getShape(),
+                     op->getContext());
+
+        mlir::Value DM = convertMemRefToDenseMatrix(loc, rewriter, outputMemRef,
+                                                    op.getType());
+
+        rewriter.replaceOp(op, DM);
+        return success();
+    }
+};
+
+namespace {
+/**
+ * @brief The MatMulLoweringPass rewrites the MatMulOp from the DaphneDialect
+ * to a affine loop structure implementing a naive iterative matrix
+ * multiplication.
+ *
+ * The naive iterative algorithm is simply a perfectly nested
+ * loop algorithm running in O(n^3) performing the 3 load operations in it's
+ * inner loop body, calculates an FMA and stores the result in the output
+ * matrix.
+ */
+struct MatMulLoweringPass
+    : public mlir::PassWrapper<MatMulLoweringPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+    explicit MatMulLoweringPass() {}
+
+    StringRef getArgument() const final { return "lower-mm"; }
+    StringRef getDescription() const final {
+        return "This pass lowers the MatMulOp to an affine loop structure "
+               "performing a naive iterative matrix multiplication.";
+    }
+
+    void getDependentDialects(mlir::DialectRegistry &registry) const override {
+        registry.insert<mlir::LLVM::LLVMDialect, mlir::AffineDialect,
+                        mlir::memref::MemRefDialect>();
+    }
+    void runOnOperation() final;
+};
+}  // end anonymous namespace
+
+void MatMulLoweringPass::runOnOperation() {
+    mlir::ConversionTarget target(getContext());
+    mlir::RewritePatternSet patterns(&getContext());
+    LowerToLLVMOptions llvmOptions(&getContext());
+    LLVMTypeConverter typeConverter(&getContext(), llvmOptions);
+
+    target.addLegalDialect<mlir::memref::MemRefDialect>();
+    target.addLegalDialect<mlir::arith::ArithDialect>();
+    target.addLegalDialect<mlir::scf::SCFDialect>();
+    target.addLegalDialect<mlir::AffineDialect>();
+    target.addLegalDialect<mlir::linalg::LinalgDialect>();
+    target.addLegalDialect<mlir::LLVM::LLVMDialect>();
+
+    target.addLegalOp<mlir::daphne::ConvertDenseMatrixToMemRef>();
+    target.addLegalOp<mlir::daphne::ConvertMemRefToDenseMatrix>();
+    target.addLegalOp<mlir::daphne::DecRefOp>();
+
+    target.addIllegalOp<mlir::daphne::MatMulOp>();
+
+    patterns.insert<MatMulLowering>(&getContext());
+    auto module = getOperation();
+    if (failed(applyPartialConversion(module, target, std::move(patterns)))) {
+        signalPassFailure();
+    }
+}
+
+std::unique_ptr<mlir::Pass> mlir::daphne::createMatMulOpLoweringPass() {
+    return std::make_unique<MatMulLoweringPass>();
+}
diff --git a/src/compiler/lowering/ModOpLowering.cpp b/src/compiler/lowering/ModOpLowering.cpp
new file mode 100644
index 000000000..05fdf7ea4
--- /dev/null
+++ b/src/compiler/lowering/ModOpLowering.cpp
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/utils/CompilerUtils.h"
+#include "compiler/utils/LoweringUtils.h"
+#include "ir/daphneir/Daphne.h"
+#include "ir/daphneir/Passes.h"
+#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+using namespace mlir;
+
+class EwModOpLowering
+    : public mlir::OpConversionPattern<mlir::daphne::EwModOp> {
+   public:
+    using OpConversionPattern::OpConversionPattern;
+
+    [[nodiscard]] bool optimization_viable(mlir::Value divisor) const {
+        std::pair<bool, int64_t> isConstant =
+            CompilerUtils::isConstant<int64_t>(divisor);
+        return isConstant.first && (isConstant.second & (isConstant.second - 1)) == 0;
+    }
+
+    void optimizeEwModOp(mlir::Value memRef, mlir::Value divisor,
+                         ArrayRef<int64_t> shape,
+                         ConversionPatternRewriter &rewriter,
+                         Location loc) const {
+        // divisor - 1
+        mlir::Value cst_one = rewriter.create<mlir::arith::ConstantOp>(
+            loc, rewriter.getI64Type(), rewriter.getI64IntegerAttr(1));
+
+        auto casted_divisor = typeConverter->materializeTargetConversion(
+            rewriter, loc, rewriter.getI64Type(), ValueRange{divisor});
+
+        mlir::Value rhs =
+            rewriter.create<mlir::arith::SubIOp>(loc, casted_divisor, cst_one);
+
+        SmallVector<int64_t, 4> lowerBounds(/*Rank=*/2, /*Value=*/0);
+        SmallVector<int64_t, 4> steps(/*Rank=*/2, /*Value=*/1);
+        buildAffineLoopNest(
+            rewriter, loc, lowerBounds, shape, steps,
+            [&](OpBuilder &nestedBuilder, Location loc, ValueRange ivs) {
+                mlir::Value load =
+                    nestedBuilder.create<AffineLoadOp>(loc, memRef, ivs);
+                mlir::Value res{};
+
+                Value castedLhs =
+                    this->typeConverter->materializeTargetConversion(
+                        nestedBuilder, loc,
+                        nestedBuilder.getIntegerType(
+                            divisor.getType().getIntOrFloatBitWidth()),
+                        ValueRange{load});
+
+                res = nestedBuilder.create<arith::AndIOp>(loc, castedLhs, rhs);
+                Value castedRes =
+                    this->typeConverter->materializeSourceConversion(
+                        nestedBuilder, loc, divisor.getType(), ValueRange{res});
+
+                nestedBuilder.create<AffineStoreOp>(loc, castedRes, memRef,
+                                                    ivs);
+            });
+    }
+
+    void lowerEwModOp(mlir::Value memRef, mlir::Value divisor,
+                      ArrayRef<int64_t> shape,
+                      ConversionPatternRewriter &rewriter, Location loc) const {
+        SmallVector<int64_t, 4> lowerBounds(/*Rank=*/2, /*Value=*/0);
+        SmallVector<int64_t, 4> steps(/*Rank=*/2, /*Value=*/1);
+        buildAffineLoopNest(
+            rewriter, loc, lowerBounds, shape, steps,
+            [&](OpBuilder &nestedBuilder, Location loc, ValueRange ivs) {
+                mlir::Value load =
+                    nestedBuilder.create<AffineLoadOp>(loc, memRef, ivs);
+                mlir::Value res{};
+
+                // this is enough since divisor will be casted to float if
+                // matrix is float
+                if (divisor.getType().isa<mlir::FloatType>()) {
+                    res =
+                        nestedBuilder.create<arith::RemFOp>(loc, load, divisor);
+                    nestedBuilder.create<AffineStoreOp>(loc, res, memRef, ivs);
+                    return;
+                }
+
+                Value castedLhs =
+                    this->typeConverter->materializeTargetConversion(
+                        nestedBuilder, loc,
+                        nestedBuilder.getIntegerType(
+                            divisor.getType().getIntOrFloatBitWidth()),
+                        ValueRange{load});
+
+                Value castedRhs =
+                    this->typeConverter->materializeTargetConversion(
+                        nestedBuilder, loc,
+                        nestedBuilder.getIntegerType(
+                            divisor.getType().getIntOrFloatBitWidth()),
+                        ValueRange{divisor});
+
+                res = nestedBuilder.create<arith::RemSIOp>(loc, castedLhs,
+                                                           castedRhs);
+                Value castedRes =
+                    this->typeConverter->materializeSourceConversion(
+                        nestedBuilder, loc, divisor.getType(), ValueRange{res});
+
+                nestedBuilder.create<AffineStoreOp>(loc, castedRes, memRef,
+                                                    ivs);
+            });
+    }
+
+    mlir::LogicalResult matchAndRewrite(
+        mlir::daphne::EwModOp op, OpAdaptor adaptor,
+        mlir::ConversionPatternRewriter &rewriter) const override {
+        mlir::daphne::MatrixType lhsTensor =
+            adaptor.getLhs().getType().dyn_cast<mlir::daphne::MatrixType>();
+        auto lhsRows = lhsTensor.getNumRows();
+        auto lhsCols = lhsTensor.getNumCols();
+
+        auto lhsMemRefType = mlir::MemRefType::get({lhsRows, lhsCols},
+                                                   lhsTensor.getElementType());
+
+        // daphne::Matrix -> memref
+        mlir::Value lhs =
+            rewriter.create<mlir::daphne::ConvertDenseMatrixToMemRef>(
+                op->getLoc(), lhsMemRefType, adaptor.getLhs());
+        mlir::Value rhs = adaptor.getRhs();
+
+        if (optimization_viable(rhs))
+            optimizeEwModOp(lhs, rhs,
+                            {lhsTensor.getNumRows(), lhsTensor.getNumCols()},
+                            rewriter, op->getLoc());
+        else
+            lowerEwModOp(lhs, rhs,
+                         {lhsTensor.getNumRows(), lhsTensor.getNumCols()},
+                         rewriter, op->getLoc());
+
+        mlir::Value output = convertMemRefToDenseMatrix(op->getLoc(), rewriter,
+                                                        lhs, op.getType());
+        rewriter.replaceOp(op, output);
+        return success();
+    }
+};
+
+namespace {
+/**
+ * @brief Performs an integer mod optimization on the EwModOp operator by
+ * lowering to an affine loop structure and performing the mod op on values
+ * loaded from a MemRef.
+ *
+ * If possible, we additionally perform the integer modulo optimization by
+ * replacing the modulo with an bitwise AND and a subtraction.
+ */
+struct ModOpLoweringPass
+    : public mlir::PassWrapper<ModOpLoweringPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+    explicit ModOpLoweringPass() {}
+
+    void getDependentDialects(mlir::DialectRegistry &registry) const override {
+        registry
+            .insert<mlir::LLVM::LLVMDialect, mlir::AffineDialect,
+                    mlir::memref::MemRefDialect, mlir::daphne::DaphneDialect>();
+    }
+    void runOnOperation() final;
+
+    StringRef getArgument() const final { return "lower-mod"; }
+    StringRef getDescription() const final {
+        return "Performs an integer mod optimization on the EwModOp operator "
+               "by lowering to an affine loop structure"
+               "and performing the mod op on values loaded from a MemRef.";
+    }
+};
+}  // end anonymous namespace
+
+void ModOpLoweringPass::runOnOperation() {
+    mlir::ConversionTarget target(getContext());
+    mlir::RewritePatternSet patterns(&getContext());
+    mlir::LowerToLLVMOptions llvmOptions(&getContext());
+    mlir::LLVMTypeConverter typeConverter(&getContext(), llvmOptions);
+
+    typeConverter.addConversion(convertInteger);
+    typeConverter.addConversion(convertFloat);
+    typeConverter.addConversion([](Type type) { return type; });
+    typeConverter.addArgumentMaterialization(materializeCastFromIllegal);
+    typeConverter.addSourceMaterialization(materializeCastToIllegal);
+    typeConverter.addTargetMaterialization(materializeCastFromIllegal);
+
+    target.addLegalDialect<mlir::memref::MemRefDialect>();
+    target.addLegalDialect<mlir::arith::ArithDialect>();
+    target.addLegalDialect<mlir::AffineDialect>();
+    target.addLegalDialect<mlir::LLVM::LLVMDialect>();
+    target.addLegalDialect<mlir::BuiltinDialect>();
+    target.addLegalDialect<mlir::daphne::DaphneDialect>();
+
+    target.addIllegalOp<mlir::daphne::EwModOp>();
+
+    patterns.insert<EwModOpLowering>(typeConverter, &getContext());
+    auto module = getOperation();
+    if (failed(applyPartialConversion(module, target, std::move(patterns)))) {
+        signalPassFailure();
+    }
+}
+
+std::unique_ptr<mlir::Pass> mlir::daphne::createModOpLoweringPass() {
+    return std::make_unique<ModOpLoweringPass>();
+}
diff --git a/src/compiler/lowering/RewriteSqlOpPass.cpp b/src/compiler/lowering/RewriteSqlOpPass.cpp
index 401544ac3..9c3d2d32d 100644
--- a/src/compiler/lowering/RewriteSqlOpPass.cpp
+++ b/src/compiler/lowering/RewriteSqlOpPass.cpp
@@ -85,6 +85,9 @@ namespace
     : public PassWrapper <RewriteSqlOpPass, OperationPass<ModuleOp>>
     {
         void runOnOperation() final;
+
+    StringRef getArgument() const final { return "rewrite-sqlop"; }
+    StringRef getDescription() const final { return "TODO"; }
     };
 }
 
diff --git a/src/compiler/lowering/RewriteToCallKernelOpPass.cpp b/src/compiler/lowering/RewriteToCallKernelOpPass.cpp
index b9e78f319..4454aaec8 100644
--- a/src/compiler/lowering/RewriteToCallKernelOpPass.cpp
+++ b/src/compiler/lowering/RewriteToCallKernelOpPass.cpp
@@ -18,9 +18,14 @@
 #include "ir/daphneir/Daphne.h"
 #include "ir/daphneir/Passes.h"
 
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/BuiltinDialect.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/IR/IRMapping.h"
 
@@ -364,6 +369,7 @@ namespace
 
             // Inject the current DaphneContext as the last input parameter to
             // all kernel calls, unless it's a CreateDaphneContextOp.
+
             if(!llvm::isa<daphne::CreateDaphneContextOp>(op))
                 newOperands.push_back(dctx);
 
@@ -494,8 +500,12 @@ void RewriteToCallKernelOpPass::runOnOperation()
     // Specification of (il)legal dialects/operations. All DaphneIR operations
     // but those explicitly marked as legal will be replaced by CallKernelOp.
     ConversionTarget target(getContext());
-    target.addLegalDialect<arith::ArithDialect, LLVM::LLVMDialect, scf::SCFDialect>();
-    target.addLegalOp<ModuleOp, func::FuncOp>();
+    target.addLegalDialect<mlir::AffineDialect, LLVM::LLVMDialect,
+                           scf::SCFDialect, memref::MemRefDialect,
+                           mlir::linalg::LinalgDialect,
+                           mlir::arith::ArithDialect, mlir::BuiltinDialect>();
+
+    target.addLegalOp<ModuleOp, func::FuncOp, func::CallOp, func::ReturnOp>();
     target.addIllegalDialect<daphne::DaphneDialect>();
     target.addLegalOp<
             daphne::ConstantOp,
@@ -504,6 +514,8 @@ void RewriteToCallKernelOpPass::runOnOperation()
             daphne::CreateVariadicPackOp,
             daphne::StoreVariadicPackOp,
             daphne::VectorizedPipelineOp,
+            scf::ForOp,
+            memref::LoadOp,
             daphne::GenericCallOp,
             daphne::MapOp
     >();
diff --git a/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp b/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp
index 453f65525..15ebd9b03 100644
--- a/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp
+++ b/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp
@@ -387,6 +387,9 @@ namespace {
 
     public:
         void runOnOperation() final;
+
+    StringRef getArgument() const final { return "specialize-generic-funcs"; }
+    StringRef getDescription() const final { return "TODO"; }
     };
 }
 
diff --git a/src/compiler/lowering/WhileLoopInvariantCodeMotionPass.cpp b/src/compiler/lowering/WhileLoopInvariantCodeMotionPass.cpp
index 58c042af0..8e933155e 100644
--- a/src/compiler/lowering/WhileLoopInvariantCodeMotionPass.cpp
+++ b/src/compiler/lowering/WhileLoopInvariantCodeMotionPass.cpp
@@ -36,6 +36,9 @@ using namespace mlir;
 struct WhileLoopInvariantCodeMotionPass
 : public PassWrapper <WhileLoopInvariantCodeMotionPass, OperationPass<func::FuncOp>> {
     void runOnOperation() final;
+
+    StringRef getArgument() const final { return "while-loop-invariant-code-motion"; }
+    StringRef getDescription() const final { return "TODO"; }
 };
 
 void WhileLoopInvariantCodeMotionPass::runOnOperation() {
diff --git a/src/compiler/utils/CMakeLists.txt b/src/compiler/utils/CMakeLists.txt
index a7acd88bf..73e8cd7c0 100644
--- a/src/compiler/utils/CMakeLists.txt
+++ b/src/compiler/utils/CMakeLists.txt
@@ -14,9 +14,10 @@
 
 add_library(CompilerUtils STATIC
         CompilerUtils.cpp
+        LoweringUtils.cpp
         TypePrinting.cpp
 )
 
 target_link_libraries(CompilerUtils PUBLIC
         DaphneMetaDataParser
-)
\ No newline at end of file
+)
diff --git a/src/compiler/utils/CompilerUtils.cpp b/src/compiler/utils/CompilerUtils.cpp
index 9ec231f55..43fb800f1 100644
--- a/src/compiler/utils/CompilerUtils.cpp
+++ b/src/compiler/utils/CompilerUtils.cpp
@@ -57,6 +57,14 @@ std::pair<bool, int64_t> CompilerUtils::isConstant<int64_t>(mlir::Value v) {
     );
 }
 
+
+template<>
+std::pair<bool, uint64_t> CompilerUtils::isConstant<uint64_t>(mlir::Value v) {
+    return isConstantHelper<uint64_t, mlir::IntegerAttr>(
+            v, [](mlir::IntegerAttr attr){return attr.getValue().getLimitedValue();}
+    );
+}
+
 template<>
 std::pair<bool, float> CompilerUtils::isConstant<float>(mlir::Value v) {
     return isConstantHelper<float, mlir::FloatAttr>(
diff --git a/src/compiler/utils/CompilerUtils.h b/src/compiler/utils/CompilerUtils.h
index b934f55ea..13e4973b6 100644
--- a/src/compiler/utils/CompilerUtils.h
+++ b/src/compiler/utils/CompilerUtils.h
@@ -178,6 +178,9 @@ struct CompilerUtils {
             return "Descriptor";
         else if(t.isa<mlir::daphne::TargetType>())
             return "Target";
+        else if(auto memRefType = t.dyn_cast<mlir::MemRefType>()) {
+            return "StridedMemRefType_" + mlirTypeToCppTypeName(memRefType.getElementType(), false) + "_2";
+        }
 
         std::string typeName;
         llvm::raw_string_ostream rsos(typeName);
@@ -261,4 +264,4 @@ struct CompilerUtils {
             return vt;
     }
 
-};
\ No newline at end of file
+};
diff --git a/src/compiler/utils/LoweringUtils.cpp b/src/compiler/utils/LoweringUtils.cpp
new file mode 100644
index 000000000..943dbd304
--- /dev/null
+++ b/src/compiler/utils/LoweringUtils.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LoweringUtils.h"
+
+#include <ir/daphneir/Passes.h>
+
+#include "ir/daphneir/Daphne.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Transforms/Passes.h"
+
+/// Insert an allocation for the given MemRefType.
+mlir::Value insertMemRefAlloc(mlir::MemRefType type, mlir::Location loc,
+                              mlir::PatternRewriter &rewriter) {
+    auto alloc = rewriter.create<mlir::memref::AllocOp>(loc, type);
+
+    // Make sure to allocate at the beginning of the block.
+    auto *parentBlock = alloc->getBlock();
+    alloc->moveBefore(&parentBlock->front());
+
+    return alloc;
+}
+
+void insertMemRefDealloc(mlir::Value memref, mlir::Location loc,
+                         mlir::PatternRewriter &rewriter) {
+    auto dealloc = rewriter.create<mlir::memref::DeallocOp>(loc, memref);
+    dealloc->moveBefore(&memref.getParentBlock()->back());
+}
+
+// TODO(phil) try to provide function templates to remove duplication
+void affineFillMemRefInt(int value, mlir::ConversionPatternRewriter &rewriter,
+                         mlir::Location loc, mlir::ArrayRef<int64_t> shape,
+                         mlir::MLIRContext *ctx, mlir::Value memRef,
+                         mlir::Type elemType) {
+    constexpr int ROW = 0;
+    constexpr int COL = 1;
+    mlir::Value fillValue = rewriter.create<mlir::arith::ConstantOp>(
+        loc, rewriter.getI64Type(), rewriter.getI64IntegerAttr(value));
+
+    llvm::SmallVector<mlir::Value, 4> loopIvs;
+
+    auto outerLoop = rewriter.create<mlir::AffineForOp>(loc, 0, shape[ROW], 1);
+    for (mlir::Operation &nested : *outerLoop.getBody()) {
+        rewriter.eraseOp(&nested);
+    }
+    loopIvs.push_back(outerLoop.getInductionVar());
+
+    // outer loop body
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto innerLoop = rewriter.create<mlir::AffineForOp>(loc, 0, shape[COL], 1);
+    for (mlir::Operation &nested : *innerLoop.getBody()) {
+        rewriter.eraseOp(&nested);
+    }
+    loopIvs.push_back(innerLoop.getInductionVar());
+    rewriter.create<mlir::AffineYieldOp>(loc);
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    rewriter.create<mlir::AffineStoreOp>(loc, fillValue, memRef, loopIvs);
+
+    rewriter.create<mlir::AffineYieldOp>(loc);
+    rewriter.setInsertionPointAfter(outerLoop);
+}
+
+void affineFillMemRef(double value, mlir::ConversionPatternRewriter &rewriter,
+                      mlir::Location loc, mlir::ArrayRef<int64_t> shape,
+                      mlir::MLIRContext *ctx, mlir::Value memRef,
+                      mlir::Type elemType) {
+    constexpr int ROW = 0;
+    constexpr int COL = 1;
+    mlir::Value fillValue = rewriter.create<mlir::arith::ConstantOp>(
+        loc, elemType, rewriter.getFloatAttr(elemType, value));
+
+    llvm::SmallVector<mlir::Value, 4> loopIvs;
+
+    auto outerLoop = rewriter.create<mlir::AffineForOp>(loc, 0, shape[ROW], 1);
+    for (mlir::Operation &nested : *outerLoop.getBody()) {
+        rewriter.eraseOp(&nested);
+    }
+    loopIvs.push_back(outerLoop.getInductionVar());
+
+    // outer loop body
+    rewriter.setInsertionPointToStart(outerLoop.getBody());
+    auto innerLoop = rewriter.create<mlir::AffineForOp>(loc, 0, shape[COL], 1);
+    for (mlir::Operation &nested : *innerLoop.getBody()) {
+        rewriter.eraseOp(&nested);
+    }
+    loopIvs.push_back(innerLoop.getInductionVar());
+    rewriter.create<mlir::AffineYieldOp>(loc);
+    rewriter.setInsertionPointToStart(innerLoop.getBody());
+    rewriter.create<mlir::AffineStoreOp>(loc, fillValue, memRef, loopIvs);
+
+    rewriter.create<mlir::AffineYieldOp>(loc);
+    rewriter.setInsertionPointAfter(outerLoop);
+}
+
+mlir::Value convertMemRefToDenseMatrix(
+    mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+    mlir::Value memRef, mlir::Type type) {
+    auto extractStridedMetadataOp =
+        rewriter.create<mlir::memref::ExtractStridedMetadataOp>(loc, memRef);
+    // aligned ptr (memref.data)
+    mlir::Value alignedPtr =
+        rewriter.create<mlir::memref::ExtractAlignedPointerAsIndexOp>(loc,
+                                                                      memRef);
+    // offset
+    mlir::Value offset = extractStridedMetadataOp.getOffset();
+    // strides
+    mlir::ResultRange strides = extractStridedMetadataOp.getStrides();
+    // sizes
+    mlir::ResultRange sizes = extractStridedMetadataOp.getSizes();
+
+    return rewriter.create<mlir::daphne::ConvertMemRefToDenseMatrix>(
+        loc, type, alignedPtr, offset, sizes[0], sizes[1], strides[0],
+        strides[1]);
+}
+
+mlir::Type convertFloat(mlir::FloatType floatType) {
+    return mlir::IntegerType::get(floatType.getContext(),
+                                  floatType.getIntOrFloatBitWidth());
+}
+
+mlir::Type convertInteger(mlir::IntegerType intType) {
+    return mlir::IntegerType::get(intType.getContext(),
+                                  intType.getIntOrFloatBitWidth());
+}
+
+llvm::Optional<mlir::Value> materializeCastFromIllegal(mlir::OpBuilder &builder,
+                                                       mlir::Type type,
+                                                       mlir::ValueRange inputs,
+                                                       mlir::Location loc) {
+    mlir::Type fromType = getElementTypeOrSelf(inputs[0].getType());
+    mlir::Type toType = getElementTypeOrSelf(type);
+
+    if ((!fromType.isSignedInteger() && !fromType.isUnsignedInteger()) ||
+        !toType.isSignlessInteger())
+        return std::nullopt;
+    // Use unrealized conversion casts to do signful->signless conversions.
+    return builder
+        .create<mlir::UnrealizedConversionCastOp>(loc, type, inputs[0])
+        ->getResult(0);
+}
+
+llvm::Optional<mlir::Value> materializeCastToIllegal(mlir::OpBuilder &builder,
+                                                     mlir::Type type,
+                                                     mlir::ValueRange inputs,
+                                                     mlir::Location loc) {
+    mlir::Type fromType = getElementTypeOrSelf(inputs[0].getType());
+    mlir::Type toType = getElementTypeOrSelf(type);
+
+    if (!fromType.isSignlessInteger() ||
+        (!toType.isSignedInteger() && !toType.isUnsignedInteger()))
+        return std::nullopt;
+    // Use unrealized conversion casts to do signless->signful conversions.
+    return builder
+        .create<mlir::UnrealizedConversionCastOp>(loc, type, inputs[0])
+        ->getResult(0);
+}
+
+mlir::Operation *findLastUseOfSSAValue(mlir::Value &v) {
+    mlir::Operation *lastUseOp = nullptr;
+
+    for (mlir::OpOperand &use : v.getUses()) {
+        mlir::Operation *thisUseOp = use.getOwner();
+        // Find parent op in the block where v is defined.
+        while (thisUseOp->getBlock() != v.getParentBlock())
+            thisUseOp = thisUseOp->getParentOp();
+        // Determine if this is a later use.
+        if (!lastUseOp || lastUseOp->isBeforeInBlock(thisUseOp))
+            lastUseOp = thisUseOp;
+    }
+
+    return lastUseOp;
+}
diff --git a/src/compiler/utils/LoweringUtils.h b/src/compiler/utils/LoweringUtils.h
new file mode 100644
index 000000000..5555b1324
--- /dev/null
+++ b/src/compiler/utils/LoweringUtils.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "llvm/ADT/ArrayRef.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Visitors.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+mlir::Value insertMemRefAlloc(mlir::MemRefType type, mlir::Location loc,
+                              mlir::PatternRewriter &rewriter);
+
+void insertMemRefDealloc(mlir::Value memref, mlir::Location loc,
+                         mlir::PatternRewriter &rewriter);
+
+void affineFillMemRefInt(int value, mlir::ConversionPatternRewriter &rewriter,
+                         mlir::Location loc, mlir::ArrayRef<int64_t> shape,
+                         mlir::MLIRContext *ctx, mlir::Value memRef,
+                         mlir::Type elemType);
+
+void affineFillMemRef(double value, mlir::ConversionPatternRewriter &rewriter,
+                      mlir::Location loc, mlir::ArrayRef<int64_t> shape,
+                      mlir::MLIRContext *ctx, mlir::Value memRef,
+                      mlir::Type elemType);
+
+mlir::Value convertMemRefToDenseMatrix(mlir::Location,
+                                       mlir::ConversionPatternRewriter &,
+                                       mlir::Value memRef, mlir::Type);
+
+llvm::Optional<mlir::Value> materializeCastFromIllegal(mlir::OpBuilder &builder,
+                                                       mlir::Type type,
+                                                       mlir::ValueRange inputs,
+                                                       mlir::Location loc);
+
+llvm::Optional<mlir::Value> materializeCastToIllegal(mlir::OpBuilder &builder,
+                                                     mlir::Type type,
+                                                     mlir::ValueRange inputs,
+                                                     mlir::Location loc);
+
+mlir::Type convertFloat(mlir::FloatType floatType);
+
+mlir::Type convertInteger(mlir::IntegerType intType);
+
+mlir::Operation *findLastUseOfSSAValue(mlir::Value &v);
diff --git a/src/ir/daphneir/CMakeLists.txt b/src/ir/daphneir/CMakeLists.txt
index c6ef724e3..6036aefcd 100644
--- a/src/ir/daphneir/CMakeLists.txt
+++ b/src/ir/daphneir/CMakeLists.txt
@@ -52,4 +52,4 @@ add_mlir_dialect_library(MLIRDaphne
 
     LINK_LIBS PUBLIC
     CompilerUtils
-)
\ No newline at end of file
+)
diff --git a/src/ir/daphneir/Daphne.h b/src/ir/daphneir/Daphne.h
index f5cd35985..73a2e6b23 100644
--- a/src/ir/daphneir/Daphne.h
+++ b/src/ir/daphneir/Daphne.h
@@ -33,6 +33,7 @@
 #include "mlir/IR/AttrTypeSubElements.h"
 #pragma GCC diagnostic pop
 
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
diff --git a/src/ir/daphneir/DaphneDialect.cpp b/src/ir/daphneir/DaphneDialect.cpp
index ded725c4f..205e7c4e9 100644
--- a/src/ir/daphneir/DaphneDialect.cpp
+++ b/src/ir/daphneir/DaphneDialect.cpp
@@ -16,24 +16,33 @@
 
 #include <compiler/utils/CompilerUtils.h>
 #include <ir/daphneir/Daphne.h>
+
 #include <ir/daphneir/DaphneOpsEnums.cpp.inc>
+
+#include "mlir/Support/LogicalResult.h"
 #define GET_OP_CLASSES
 #include <ir/daphneir/DaphneOps.cpp.inc>
 #define GET_TYPEDEF_CLASSES
-#include <ir/daphneir/DaphneOpsTypes.cpp.inc>
+#include <llvm/ADT/APInt.h>
+#include <llvm/ADT/APSInt.h>
+#include <llvm/ADT/BitVector.h>
+
 #include <ir/daphneir/DaphneOpsDialect.cpp.inc>
+#include <ir/daphneir/DaphneOpsTypes.cpp.inc>
 
+#include "llvm/ADT/ArrayRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/FunctionImplementation.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/SymbolTable.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/Interfaces/CallInterfaces.h"
 #include "mlir/Interfaces/CastInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
@@ -41,12 +50,46 @@
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "mlir/Interfaces/VectorInterfaces.h"
 #include "mlir/Interfaces/ViewLikeInterface.h"
+#include "mlir/Transforms/InliningUtils.h"
 
 #include <llvm/ADT/BitVector.h>
 #include <llvm/ADT/APInt.h>
 #include <llvm/ADT/APSInt.h>
 #include <llvm/ADT/DenseMap.h>
 
+struct DaphneInlinerInterface : public mlir::DialectInlinerInterface {
+  using DialectInlinerInterface::DialectInlinerInterface;
+
+  bool isLegalToInline(mlir::Operation *call, mlir::Operation *callable,
+                       bool wouldBeCloned) const final {
+    return true;
+  }
+
+  bool isLegalToInline(mlir::Operation *, mlir::Region *, bool, mlir::IRMapping &) const final {
+    return true;
+  }
+
+  bool isLegalToInline(mlir::Region *, mlir::Region *, bool, mlir::IRMapping &) const final {
+    return true;
+  }
+
+  void handleTerminator(mlir::Operation *op,
+                        mlir::ArrayRef<mlir::Value> valuesToRepl) const final {
+    auto returnOp = mlir::dyn_cast<mlir::daphne::ReturnOp>(op);
+
+    // Replace the values directly with the return operands.
+    assert(returnOp.getNumOperands() == valuesToRepl.size());
+    for (const auto &it : llvm::enumerate(returnOp.getOperands()))
+      valuesToRepl[it.index()].replaceAllUsesWith(it.value());
+  }
+
+  mlir::Operation *materializeCallConversion(mlir::OpBuilder &builder, mlir::Value input,
+                                       mlir::Type resultType,
+                                       mlir::Location conversionLoc) const final {
+    return builder.create<mlir::daphne::CastOp>(conversionLoc, resultType, input);
+  }
+};
+
 void mlir::daphne::DaphneDialect::initialize()
 {
     addOperations<
@@ -57,6 +100,7 @@ void mlir::daphne::DaphneDialect::initialize()
         #define GET_TYPEDEF_LIST
         #include <ir/daphneir/DaphneOpsTypes.cpp.inc>
     >();
+    addInterfaces<DaphneInlinerInterface>();
 }
 
 mlir::Operation *mlir::daphne::DaphneDialect::materializeConstant(OpBuilder &builder,
@@ -179,6 +223,9 @@ mlir::Type mlir::daphne::DaphneDialect::parseType(mlir::DialectAsmParser &parser
     else if (keyword == "String") {
         return StringType::get(parser.getBuilder().getContext());
     }
+    else if (keyword == "DaphneContext") {
+        return mlir::daphne::DaphneContextType::get(parser.getBuilder().getContext());
+    }
     else {
         parser.emitError(parser.getCurrentLocation()) << "Parsing failed, keyword `" << keyword << "` not recognized!";
         return nullptr;
@@ -363,6 +410,7 @@ ::mlir::LogicalResult mlir::daphne::MatrixType::verify(
             // Value type is known.
             || elementType.isSignedInteger(64)
             || elementType.isUnsignedInteger(8)
+            || elementType.isUnsignedInteger(64)
             || elementType.isF32()
             || elementType.isF64()
             || elementType.isIndex()
@@ -783,6 +831,10 @@ mlir::OpFoldResult mlir::daphne::EwAndOp::fold(FoldAdaptor adaptor) {
     return {};
 }
 
+mlir::OpFoldResult mlir::daphne::EwBitwiseAndOp::fold(FoldAdaptor adaptor) {
+    return {};
+}
+
 mlir::OpFoldResult mlir::daphne::EwOrOp::fold(FoldAdaptor adaptor) {
     ArrayRef<Attribute> operands = adaptor.getOperands();
     auto boolOp = [](const bool &a, const bool &b) { return a || b; };
@@ -1323,4 +1375,35 @@ mlir::LogicalResult mlir::daphne::CondOp::canonicalize(mlir::daphne::CondOp op,
 
         return mlir::success();
     }
-}
\ No newline at end of file
+}
+
+mlir::LogicalResult mlir::daphne::ConvertDenseMatrixToMemRef::canonicalize(
+    mlir::daphne::ConvertDenseMatrixToMemRef op,
+    mlir::PatternRewriter &rewriter) {
+    // removes unnecessary conversions of MemRef -> DM -> MemRef
+    mlir::Operation *dmNode = op->getOperand(0).getDefiningOp();
+
+    if (!llvm::isa<mlir::daphne::ConvertMemRefToDenseMatrix>(dmNode))
+        return failure();
+
+    mlir::Operation *originalMemRefOp =
+        dmNode->getPrevNode()->getOperand(0).getDefiningOp();
+    op.replaceAllUsesWith(originalMemRefOp);
+
+    rewriter.eraseOp(op);
+    if (dmNode->getUsers().empty()) rewriter.eraseOp(dmNode);
+
+    return mlir::success();
+}
+
+mlir::LogicalResult mlir::daphne::ConvertMemRefToDenseMatrix::canonicalize(
+    mlir::daphne::ConvertMemRefToDenseMatrix op,
+    mlir::PatternRewriter &rewriter) {
+    mlir::Operation *extractPtr = op->getPrevNode();
+    auto srcMemRef = extractPtr->getOperand(0).getDefiningOp();
+    extractPtr->moveAfter(srcMemRef);
+    op->moveAfter(extractPtr);
+
+    return mlir::success();
+}
+
diff --git a/src/ir/daphneir/DaphneDistributableOpInterface.cpp b/src/ir/daphneir/DaphneDistributableOpInterface.cpp
index 20ee390a0..416179a6c 100644
--- a/src/ir/daphneir/DaphneDistributableOpInterface.cpp
+++ b/src/ir/daphneir/DaphneDistributableOpInterface.cpp
@@ -134,6 +134,9 @@ IMPL_EWBINARYOP(EwAndOp)
 IMPL_EWBINARYOP(EwOrOp)
 IMPL_EWBINARYOP(EwXorOp)
 
+// Bitwise
+IMPL_EWBINARYOP(EwBitwiseAndOp);
+
 // Strings
 IMPL_EWBINARYOP(EwConcatOp)
 
@@ -170,4 +173,4 @@ std::vector<mlir::Value> daphne::RowAggMaxOp::createEquivalentDistributedDAG(
 
 std::vector<bool> daphne::RowAggMaxOp::getOperandDistrPrimitives() {
     return {false};
-}
\ No newline at end of file
+}
diff --git a/src/ir/daphneir/DaphneOps.td b/src/ir/daphneir/DaphneOps.td
index b16310932..e4cd6a96b 100644
--- a/src/ir/daphneir/DaphneOps.td
+++ b/src/ir/daphneir/DaphneOps.td
@@ -34,8 +34,11 @@ include "ir/daphneir/DaphneTypeInferenceTraits.td"
 include "ir/daphneir/CUDASupport.td"
 include "ir/daphneir/FPGAOPENCLSupport.td"
 
+include "mlir/Dialect/LLVMIR/LLVMTypes.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/CallInterfaces.td"
+include "mlir/IR/AttrTypeBase.td"
 
 // ****************************************************************************
 // Custom constraints
@@ -66,6 +69,28 @@ class TypesMatchOrOneIsMatrixOfOther<string a, string b> : PredOpTrait<
 class Daphne_Op<string mnemonic, list<Trait> traits = []> :
         Op<Daphne_Dialect, mnemonic, traits>;
 
+// ****************************************************************************
+// DAPHNE Runtime Interoperability
+// ****************************************************************************
+
+def Daphne_ConvertMemRefToDenseMatrix : Daphne_Op<"convertMemRefToDenseMatrix"> {
+    let summary = "Return a DenseMatrix.";
+    let description = [{ Constructs a DenseMatrix given a rank 2 StridedMemRefType. }];
+
+    /* let arguments = (ins AnyMemRef:$arg); */
+    let hasCanonicalizeMethod = 1;
+    let arguments = (ins Size:$base, Size:$offset, Size:$size0, Size:$size1, Size:$stride0, Size:$stride1);
+    let results = (outs MatrixOrU:$res);
+}
+
+def Daphne_ConvertDenseMatrixToMemRef : Daphne_Op<"convertDenseMatrixToMemRef", [Pure]> {
+    let summary = "Given a DenseMatrix, return a StridedMemRefType.";
+    let description = [{ Constructs a StridedMemRefType with rank 2 from a DenseMatrix* with already allocated memory. }];
+    let hasCanonicalizeMethod = 1;
+    let arguments = (ins MatrixOrU:$arg);
+    let results = (outs AnyMemRef:$output);
+}
+
 // ****************************************************************************
 // Data generation
 // ****************************************************************************
@@ -110,7 +135,6 @@ def Daphne_RandMatrixOp : Daphne_Op<"randMatrix", [
     NumRowsFromIthScalar<0>, NumColsFromIthScalar<1>, DeclareOpInterfaceMethods<InferTypesOpInterface>,
     SparsityFromIthScalar<4>, CastArgsToResTypeRandMatrixOp
 ]> {
-    //let arguments = (ins Size:$numRows, Size:$numCols, AnyScalar:$min, AnyScalar:$max, F64:$sparsity, Seed:$seed, StrScalar:$pdf);
     let arguments = (ins Size:$numRows, Size:$numCols, AnyScalar:$min, AnyScalar:$max, F64:$sparsity, IntScalar:$seed);
     let results = (outs MatrixOrU:$res);
 }
@@ -173,7 +197,8 @@ def Daphne_MatMulOp : Daphne_Op<"matMul", [
 class Daphne_EwUnaryOp<string name, Type scalarType, list<Trait> traits = []> : Daphne_Op<name, !listconcat(traits, [
     DataTypeFromFirstArg,
     ShapeFromArg,
-    CastArgsToResType
+    CastArgsToResType,
+    NoMemoryEffect
 ])> {
     let arguments = (ins AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$arg);
     let results = (outs AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$res);
@@ -228,7 +253,8 @@ class Daphne_EwBinaryOp<string name, Type scalarType, list<Trait> traits = []>
     DeclareOpInterfaceMethods<DistributableOpInterface>,
     DeclareOpInterfaceMethods<VectorizableOpInterface>,
     ShapeEwBinary,
-    CastArgsToResType
+    CastArgsToResType,
+    NoMemoryEffect
 ])> {
     let arguments = (ins AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$lhs, AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$rhs);
     let results = (outs AnyTypeOf<[MatrixOf<[scalarType]>, scalarType, Unknown]>:$res);
@@ -279,6 +305,12 @@ def Daphne_EwAndOp    : Daphne_EwBinaryOp<"ewAnd", NumScalar, [Commutative, Valu
 def Daphne_EwOrOp     : Daphne_EwBinaryOp<"ewOr" , NumScalar, [Commutative, ValueTypeFromArgsInt]>;
 def Daphne_EwXorOp    : Daphne_EwBinaryOp<"ewXor", NumScalar, [Commutative, ValueTypeFromArgsInt]>;
 
+// ----------------------------------------------------------------------------
+// Bitwise
+// ----------------------------------------------------------------------------
+
+def Daphne_EwBitwiseAndOp    : Daphne_EwBinaryOp<"ewBitwiseAnd", NumScalar, [Commutative, ValueTypeFromArgsInt]>;
+
 // ----------------------------------------------------------------------------
 // Strings
 // ----------------------------------------------------------------------------
@@ -1288,7 +1320,7 @@ def Daphne_PrintOp : Daphne_Op<"print"> {
     // TODO We might change it to only accept scalars here and enforce toString
     // for matrices and frames. But currently, we need it like that for the
     // rest of the program.
-    let arguments = (ins AnyTypeOf<[AnyScalar, MatrixOrFrame, Unknown]>:$arg, BoolScalar:$newline, BoolScalar:$err);
+    let arguments = (ins AnyTypeOf<[AnyScalar, MatrixOrFrame, AnyMemRef, Unknown]>:$arg, BoolScalar:$newline, BoolScalar:$err);
     let results = (outs); // no results
 }
 
diff --git a/src/ir/daphneir/DaphneVectorizableOpInterface.cpp b/src/ir/daphneir/DaphneVectorizableOpInterface.cpp
index 0ab264dc6..9785c9ba3 100644
--- a/src/ir/daphneir/DaphneVectorizableOpInterface.cpp
+++ b/src/ir/daphneir/DaphneVectorizableOpInterface.cpp
@@ -189,6 +189,9 @@ IMPL_SPLIT_COMBINE_EWBINARYOP(EwAndOp)
 IMPL_SPLIT_COMBINE_EWBINARYOP(EwOrOp)
 IMPL_SPLIT_COMBINE_EWBINARYOP(EwXorOp)
 
+// Bitwise
+IMPL_SPLIT_COMBINE_EWBINARYOP(EwBitwiseAndOp);
+
 // Strings
 IMPL_SPLIT_COMBINE_EWBINARYOP(EwConcatOp)
 
diff --git a/src/ir/daphneir/Passes.h b/src/ir/daphneir/Passes.h
index 3f7d7ef18..ec9c5f45a 100644
--- a/src/ir/daphneir/Passes.h
+++ b/src/ir/daphneir/Passes.h
@@ -43,8 +43,15 @@ namespace mlir::daphne {
     std::unique_ptr<Pass> createAdaptTypesToKernelsPass();
     std::unique_ptr<Pass> createDistributeComputationsPass();
     std::unique_ptr<Pass> createDistributePipelinesPass();
+    std::unique_ptr<Pass> createMapOpLoweringPass();
+    std::unique_ptr<Pass> createEwOpLoweringPass();
+    std::unique_ptr<Pass> createModOpLoweringPass();
     std::unique_ptr<Pass> createInferencePass(InferenceConfig cfg = {false, true, true, true, true});
     std::unique_ptr<Pass> createInsertDaphneContextPass(const DaphneUserConfig& cfg);
+    std::unique_ptr<Pass> createDaphneOptPass();
+    std::unique_ptr<Pass> createMatMulOpLoweringPass();
+    std::unique_ptr<Pass> createAggAllOpLoweringPass();
+    std::unique_ptr<Pass> createMemRefTestPass();
     std::unique_ptr<Pass> createProfilingPass();
     std::unique_ptr<Pass> createLowerToLLVMPass(const DaphneUserConfig& cfg);
     std::unique_ptr<Pass> createManageObjRefsPass();
diff --git a/src/ir/daphneir/Passes.td b/src/ir/daphneir/Passes.td
index 20fc2a5ee..39725a131 100644
--- a/src/ir/daphneir/Passes.td
+++ b/src/ir/daphneir/Passes.td
@@ -55,4 +55,25 @@ def WhileLoopInvariantCodeMotionPass : Pass<"while-loop-invariant-code-motion",
     let constructor = "mlir::daphne::createWhileLoopInvariantCodeMotionPass()";
 }
 
-#endif // SRC_IR_DAPHNEIR_PASSES_TD
\ No newline at end of file
+def AggAllLoweringPass : Pass<"lower-agg", "::mlir::func::FuncOp"> {
+    let constructor = "mlir::daphne::createAggAllOpLoweringPass()";
+}
+
+def MatMulOpLoweringPass : Pass<"lower-mm", "::mlir::func::FuncOp"> {
+    let constructor = "mlir::daphne::createMatMulOpLoweringPass()";
+}
+
+def DaphneOpsOptPass : Pass<"opt-daphne", "::mlir::func::FuncOp"> {
+    let constructor = "mlir::daphne::createDaphneOptPass()";
+}
+
+def MapOpLoweringPass: Pass<"lower-map", "::mlir::func::FuncOp"> {
+    let constructor = "mlir::daphne::createMapOpLoweringPass()";
+}
+
+def LowerEwOpPass: Pass<"lower-ew", "::mlir::func::FuncOp"> {
+    let constructor = "mlir::daphne::createEwOpLoweringPass()";
+}
+
+
+#endif // SRC_IR_DAPHNEIR_PASSES_TD
diff --git a/src/parser/config/ConfigParser.cpp b/src/parser/config/ConfigParser.cpp
index d34e446a1..3debbab4d 100644
--- a/src/parser/config/ConfigParser.cpp
+++ b/src/parser/config/ConfigParser.cpp
@@ -51,6 +51,8 @@ void ConfigParser::readUserConfig(const std::string& filename, DaphneUserConfig&
         config.use_ipa_const_propa = jf.at(DaphneConfigJsonParams::USE_IPA_CONST_PROPA).get<bool>();
     if (keyExists(jf, DaphneConfigJsonParams::USE_PHY_OP_SELECTION))
         config.use_phy_op_selection = jf.at(DaphneConfigJsonParams::USE_PHY_OP_SELECTION).get<bool>();
+    if (keyExists(jf, DaphneConfigJsonParams::USE_MLIR_CODEGEN))
+        config.use_mlir_codegen = jf.at(DaphneConfigJsonParams::USE_MLIR_CODEGEN).get<bool>();
     if (keyExists(jf, DaphneConfigJsonParams::CUDA_FUSE_ANY))
         config.cuda_fuse_any = jf.at(DaphneConfigJsonParams::CUDA_FUSE_ANY).get<bool>();
     if (keyExists(jf, DaphneConfigJsonParams::VECTORIZED_SINGLE_QUEUE))
@@ -79,6 +81,8 @@ void ConfigParser::readUserConfig(const std::string& filename, DaphneUserConfig&
         config.explain_vectorized = jf.at(DaphneConfigJsonParams::EXPLAIN_VECTORIZED).get<bool>();
     if (keyExists(jf, DaphneConfigJsonParams::EXPLAIN_OBJ_REF_MGNT))
         config.explain_obj_ref_mgnt = jf.at(DaphneConfigJsonParams::EXPLAIN_OBJ_REF_MGNT).get<bool>();
+    if (keyExists(jf, DaphneConfigJsonParams::EXPLAIN_MLIR_CODEGEN))
+        config.explain_mlir_codegen = jf.at(DaphneConfigJsonParams::EXPLAIN_MLIR_CODEGEN).get<bool>();
     if (keyExists(jf, DaphneConfigJsonParams::TASK_PARTITIONING_SCHEME)) {
         config.taskPartitioningScheme = jf.at(DaphneConfigJsonParams::TASK_PARTITIONING_SCHEME).get<SelfSchedulingScheme>();
         if (config.taskPartitioningScheme == SelfSchedulingScheme::INVALID) {
@@ -142,4 +146,4 @@ void ConfigParser::checkAnyUnexpectedKeys(const nlohmann::basic_json<>& j, const
                 .append("' file"));
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/parser/config/JsonParams.h b/src/parser/config/JsonParams.h
index 8e9c1c257..172143258 100644
--- a/src/parser/config/JsonParams.h
+++ b/src/parser/config/JsonParams.h
@@ -30,6 +30,7 @@ struct DaphneConfigJsonParams {
     inline static const std::string USE_OBJ_REF_MGNT = "use_obj_ref_mgnt";
     inline static const std::string USE_IPA_CONST_PROPA = "use_ipa_const_propa";
     inline static const std::string USE_PHY_OP_SELECTION = "use_phy_op_selection";
+    inline static const std::string USE_MLIR_CODEGEN = "use_mlir_codegen";
     inline static const std::string CUDA_FUSE_ANY = "cuda_fuse_any";
     inline static const std::string VECTORIZED_SINGLE_QUEUE = "vectorized_single_queue";
 
@@ -45,6 +46,7 @@ struct DaphneConfigJsonParams {
     inline static const std::string EXPLAIN_TYPE_ADAPTATION = "explain_type_adaptation";
     inline static const std::string EXPLAIN_VECTORIZED = "explain_vectorized";
     inline static const std::string EXPLAIN_OBJ_REF_MGNT = "explain_obj_ref_mgnt";
+    inline static const std::string EXPLAIN_MLIR_CODEGEN = "explain_mlir_codegen";
     inline static const std::string TASK_PARTITIONING_SCHEME = "taskPartitioningScheme";
     inline static const std::string NUMBER_OF_THREADS = "numberOfThreads";
     inline static const std::string MINIMUM_TASK_SIZE = "minimumTaskSize";
@@ -53,13 +55,14 @@ struct DaphneConfigJsonParams {
     inline static const std::string LIBRARY_PATHS = "library_paths";
     inline static const std::string DAPHNEDSL_IMPORT_PATHS = "daphnedsl_import_paths";
     inline static const std::string LOGGING = "logging";
-    
+
     inline static const std::string JSON_PARAMS[] = {
             USE_CUDA_,
             USE_VECTORIZED_EXEC,
             USE_OBJ_REF_MGNT,
             USE_IPA_CONST_PROPA,
             USE_PHY_OP_SELECTION,
+            USE_MLIR_CODEGEN,
             CUDA_FUSE_ANY,
             VECTORIZED_SINGLE_QUEUE,
             DEBUG_LLVM,
@@ -73,6 +76,7 @@ struct DaphneConfigJsonParams {
             EXPLAIN_PHY_OP_SELECTION,
             EXPLAIN_TYPE_ADAPTATION,
             EXPLAIN_VECTORIZED,
+            EXPLAIN_MLIR_CODEGEN,
             EXPLAIN_OBJ_REF_MGNT,
             TASK_PARTITIONING_SCHEME,
             NUMBER_OF_THREADS,
diff --git a/src/runtime/local/kernels/BinaryOpCode.h b/src/runtime/local/kernels/BinaryOpCode.h
index 7d61dc0eb..54d878b4c 100644
--- a/src/runtime/local/kernels/BinaryOpCode.h
+++ b/src/runtime/local/kernels/BinaryOpCode.h
@@ -18,29 +18,33 @@
 
 enum class BinaryOpCode {
     // Arithmetic.
-    ADD, // addition
-    SUB, // subtraction
-    MUL, // multiplication
-    DIV, // division
-    POW, // to the power of
-    MOD, // modulus
-    LOG, // logarithm
+    ADD,  // addition
+    SUB,  // subtraction
+    MUL,  // multiplication
+    DIV,  // division
+    POW,  // to the power of
+    MOD,  // modulus
+    LOG,  // logarithm
+
     // Comparisons.
-    EQ,  // equal
-    NEQ, // not equal
-    LT,  // less than
-    LE,  // less equal
-    GT,  // greater than
-    GE,  // greater equal
-    
+    EQ,   // equal
+    NEQ,  // not equal
+    LT,   // less than
+    LE,   // less equal
+    GT,   // greater than
+    GE,   // greater equal
+
     // Min/max.
     MIN,
     MAX,
-    
+
     // Logical.
     AND,
     OR,
+
+    // Bitwise.
+    BITWISE_AND,
 };
 
 static std::string_view binary_op_codes[] = {"ADD", "SUB", "MUL", "DIV", "POW", "MOD", "LOG", "EQ", "NEQ", "LT", "LE",
-        "GT", "GE", "MIN", "MAX", "AND", "OR"};
\ No newline at end of file
+        "GT", "GE", "MIN", "MAX", "AND", "OR", "BITWISE_AND"};
diff --git a/src/runtime/local/kernels/ConvertDenseMatrixToMemRef.h b/src/runtime/local/kernels/ConvertDenseMatrixToMemRef.h
new file mode 100644
index 000000000..c281db96c
--- /dev/null
+++ b/src/runtime/local/kernels/ConvertDenseMatrixToMemRef.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "mlir/ExecutionEngine/CRunnerUtils.h"
+#include "runtime/local/datastructures/DenseMatrix.h"
+
+template <typename T>
+inline StridedMemRefType<T, 2> convertDenseMatrixToMemRef(
+    const DenseMatrix<T> *input, DCTX(ctx)) {
+    StridedMemRefType<T, 2> memRef{};
+    memRef.basePtr = input->getValuesSharedPtr().get();
+    memRef.data = memRef.basePtr;
+    memRef.offset = 0;
+    memRef.sizes[0] = input->getNumRows();
+    memRef.sizes[1] = input->getNumCols();
+
+    // TODO(phil): needs to be calculated for non row-major memory layouts
+    memRef.strides[0] = input->getNumCols();
+    memRef.strides[1] = 1;
+    input->increaseRefCounter();
+
+    return memRef;
+}
diff --git a/src/runtime/local/kernels/ConvertMemRefToDenseMatrix.h b/src/runtime/local/kernels/ConvertMemRefToDenseMatrix.h
new file mode 100644
index 000000000..96779ea70
--- /dev/null
+++ b/src/runtime/local/kernels/ConvertMemRefToDenseMatrix.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "runtime/local/context/DaphneContext.h"
+#include "runtime/local/datastructures/DenseMatrix.h"
+
+template <typename T>
+inline void convertMemRefToDenseMatrix(DenseMatrix<T>*& result, size_t basePtr,
+                                       size_t offset, size_t size0,
+                                       size_t size1, size_t stride0,
+                                       size_t stride1, DCTX(ctx)) {
+    auto no_op_deleter = [](T*) {};
+    T* valuePtr = reinterpret_cast<T*>(basePtr);
+    std::shared_ptr<T[]> ptr(valuePtr, no_op_deleter);
+    result = DataObjectFactory::create<DenseMatrix<T>>(size0, size1, ptr);
+}
+
diff --git a/src/runtime/local/kernels/MatMul.h b/src/runtime/local/kernels/MatMul.h
index d0cfefbeb..fd5ff9e19 100644
--- a/src/runtime/local/kernels/MatMul.h
+++ b/src/runtime/local/kernels/MatMul.h
@@ -51,4 +51,3 @@ void matMul(DTRes *& res, const DTLhs * lhs, const DTRhs * rhs, bool transa, boo
 
 
 
-
diff --git a/src/runtime/local/kernels/genKernelInst.py b/src/runtime/local/kernels/genKernelInst.py
index fedbe87ee..d2b2c2a44 100755
--- a/src/runtime/local/kernels/genKernelInst.py
+++ b/src/runtime/local/kernels/genKernelInst.py
@@ -118,7 +118,10 @@ def generateKernelInstantiation(kernelTemplateInfo, templateValues, opCodes, out
             .replace(" **", "" if rp["isOutput"] else "_variadic")
             .replace(" *", "_variadic" if "isVariadic" in rp and rp["isVariadic"] else "")
             .replace("& ", "")
-            .replace("<", "_").replace(">", "")
+            .replace("<", "_")
+            .replace(">", "")
+            .replace(",", "_")
+            .replace(" ", "_")
         for rp in extendedRuntimeParams
     ])
     if typesForName != "":
diff --git a/src/runtime/local/kernels/kernels.json b/src/runtime/local/kernels/kernels.json
index c7129ec5f..1ffd4d625 100644
--- a/src/runtime/local/kernels/kernels.json
+++ b/src/runtime/local/kernels/kernels.json
@@ -784,6 +784,80 @@
             []
         ]
     },
+    {
+        "kernelTemplate": {
+            "header": "ConvertMemRefToDenseMatrix.h",
+            "opName": "convertMemRefToDenseMatrix",
+            "returnType": "void",
+            "templateParams": [
+                {
+                    "name": "VT",
+                    "isDataType": false
+                }
+            ],
+            "runtimeParams": [
+                {
+                    "type": "DenseMatrix<VT> *&",
+                    "name": "result"
+                },
+                {
+                    "type": "size_t",
+                    "name": "basePtr"
+                },
+                {
+                    "type": "size_t",
+                    "name": "offset"
+                },
+                {
+                    "type": "size_t",
+                    "name": "size0"
+                },
+                {
+                    "type": "size_t",
+                    "name": "size1"
+                },
+                {
+                    "type": "size_t",
+                    "name": "stride0"
+                },
+                {
+                    "type": "size_t",
+                    "name": "stride1"
+                }
+            ]
+        },
+        "instantiations": [
+            ["int64_t"],
+            ["uint64_t"],
+            ["float"],
+            ["double"]
+        ]
+    },
+    {
+        "kernelTemplate": {
+            "header": "ConvertDenseMatrixToMemRef.h",
+            "opName": "convertDenseMatrixToMemRef",
+            "returnType": "StridedMemRefType<VT,2>",
+            "templateParams": [
+                {
+                    "name": "VT",
+                    "isDataType": false
+                }
+            ],
+            "runtimeParams": [
+                {
+                    "type": "DenseMatrix<VT> *",
+                    "name": "input"
+                }
+            ]
+        },
+        "instantiations": [
+            ["int64_t"],
+            ["uint64_t"],
+            ["float"],
+            ["double"]
+        ]
+    },
     {
         "kernelTemplate": {
             "header": "CreateFrame.h",
@@ -1086,7 +1160,7 @@
                     [["DenseMatrix", "double"], ["DenseMatrix", "double"], "double"],
                     [["DenseMatrix", "int64_t"], ["DenseMatrix", "int64_t"], "int64_t"]
                 ],
-                "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR"]
+                "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR", "BITWISE_AND"]
             },
             {
                 "name":  ["CPP"],
@@ -1097,7 +1171,7 @@
                     ["Frame", "Frame", "double"],
                     ["Frame", "Frame", "int64_t"]
                 ],
-                "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR"]
+                "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR", "BITWISE_AND"]
             }
         ]
     },
@@ -1143,7 +1217,7 @@
             ["uint32_t", "uint32_t", "uint32_t"],
             ["size_t", "size_t", "size_t"]
         ],
-        "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR"]
+        "opCodes": ["ADD", "SUB", "MUL", "DIV", "POW", "LOG", "MOD", "EQ", "NEQ", "LT", "LE", "GT", "GE", "MIN", "MAX", "AND", "OR", "BITWISE_AND"]
     },
     {
         "kernelTemplate": {
@@ -1492,6 +1566,7 @@
                     [["DenseMatrix", "float"], "float"],
                     [["DenseMatrix", "double"], "double"],
                     [["DenseMatrix", "int64_t"], "int64_t"],
+                    [["DenseMatrix", "uint64_t"], "uint64_t"],
                     [["DenseMatrix", "uint8_t"], "uint8_t"]]
             }
         ]
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index e266dc80c..7d63d5976 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -47,15 +47,22 @@ set(TEST_SOURCES
         api/cli/vectorized/MultiThreadedOpsTest.cpp
         api/cli/vectorized/VectorizedPipelineTest.cpp
         api/cli/Utils.cpp
-    
+
         api/python/DaphneLibTest.cpp
-        
+
+        api/cli/codegen/EwBinaryScalarTest.cpp
+        api/cli/codegen/MatMulTest.cpp
+        api/cli/codegen/EwOpLoopFusionTest.cpp
+        api/cli/codegen/AggAllTest.cpp
+        api/cli/codegen/MapOpTest.cpp
+        codegen/CodegenTest.cpp
+
         ir/daphneir/InferTypesTest.cpp
-        
+
         parser/config/ConfigParserTest.cpp
-    
+
         runtime/distributed/worker/WorkerTest.cpp
-    
+
         runtime/local/datastructures/CSRMatrixTest.cpp
         runtime/local/datastructures/DenseMatrixTest.cpp
         runtime/local/datastructures/FrameTest.cpp
@@ -142,7 +149,7 @@ endif()
 
 add_executable(run_tests ${TEST_SOURCES})
 set_target_properties(run_tests PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
-add_dependencies(run_tests daphne daphnelib DistributedWorker)
+add_dependencies(run_tests daphne daphnelib DistributedWorker daphne-opt)
 
 get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 set(LIBS AllKernels ${dialect_libs} DataStructures DaphneDSLParser MLIRDaphne WorkerImpl Proto DaphneConfigParser
diff --git a/test/api/cli/Utils.h b/test/api/cli/Utils.h
index 3768a18cd..1c2a29891 100644
--- a/test/api/cli/Utils.h
+++ b/test/api/cli/Utils.h
@@ -163,6 +163,31 @@ pid_t runProgramInBackground(int &out, int &err, const char * execPath, Args ...
     }
 }
 
+/**
+ * @brief Executes the "run-lit.py" python script in a directory and
+ * captures `stdout`, `stderr`, and the status code.
+ *
+ * "run-lit.py" is required to run the LLVM tool llvm-lit in order to
+ * test "*.mlir" files in the directoy using the llvm-lit command RUN:
+ * in each file.
+ *
+ * @param out The stream where to direct the program's standard output.
+ * @param err The stream where to direct the program's standard error.
+ * @param dirPath The path to the directory containing the "run-lit.py" script
+ * and the "*.mlir" test cases.
+ * @param args The arguments to pass in addition to the script's path. Despite
+ * the variadic template, each element should be of type `char *`. The last one
+ * does *not* need to be a null pointer.
+ * @return The status code returned by the process, or `-1` if it did not exit
+ * normally.
+ */
+template <typename... Args>
+int runLIT(std::stringstream &out, std::stringstream &err, std::string dirPath,
+           Args... args) {
+    return runProgram(out, err, "/bin/python3", "python3",
+                      (dirPath + "run-lit.py").c_str(), "-v", dirPath.c_str(),
+                      args...);
+}
 
 /**
  * @brief Executes DAPHNE's command line interface with the given arguments and
@@ -465,4 +490,4 @@ void compareDaphneToSomeRefSimple(const std::string & dirPath, const std::string
  */
 std::string generalizeDataTypes(const std::string& str);
 
-#endif //TEST_API_CLI_UTILS_H
\ No newline at end of file
+#endif //TEST_API_CLI_UTILS_H
diff --git a/test/api/cli/codegen/AggAllTest.cpp b/test/api/cli/codegen/AggAllTest.cpp
new file mode 100644
index 000000000..f0c383c00
--- /dev/null
+++ b/test/api/cli/codegen/AggAllTest.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <api/cli/Utils.h>
+#include <tags.h>
+
+#include <catch.hpp>
+#include <sstream>
+#include <string>
+
+#include "api/cli/StatusCode.h"
+
+const std::string dirPath = "test/api/cli/codegen/";
+
+TEST_CASE("aggAll", TAG_CODEGEN) {
+    std::string result = "100\n";
+
+    compareDaphneToStr(result, dirPath + "sum_aggall.daphne");
+    compareDaphneToStr(result, dirPath + "sum_aggall.daphne", "--mlir-codegen");
+}
diff --git a/test/api/cli/codegen/EwBinaryScalarTest.cpp b/test/api/cli/codegen/EwBinaryScalarTest.cpp
new file mode 100644
index 000000000..224d566c3
--- /dev/null
+++ b/test/api/cli/codegen/EwBinaryScalarTest.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <api/cli/Utils.h>
+#include <tags.h>
+
+#include <catch.hpp>
+#include <sstream>
+#include <string>
+
+#include "api/cli/StatusCode.h"
+
+const std::string dirPath = "test/api/cli/codegen/";
+
+void test_binary_lowering(const std::string op,
+                          const std::string kernel_call,
+                          const std::string lowering,
+                          const std::string result) {
+    std::stringstream out;
+    std::stringstream err;
+
+    int status = runDaphne(out, err, "--explain", "llvm", (dirPath + op + ".daphne").c_str());
+    CHECK(status == StatusCode::SUCCESS);
+
+    CHECK_THAT(err.str(), Catch::Contains(kernel_call));
+    CHECK_THAT(err.str(), !Catch::Contains(lowering));
+    CHECK(out.str() == result);
+
+    out.str(std::string());
+    err.str(std::string());
+
+    status = runDaphne(out, err, "--explain", "llvm", "--mlir-codegen", (dirPath + op + ".daphne").c_str());
+    CHECK(status == StatusCode::SUCCESS);
+
+    CHECK_THAT(err.str(), !Catch::Contains(kernel_call));
+    CHECK_THAT(err.str(), Catch::Contains(lowering));
+    CHECK(out.str() == result);
+}
+
+TEST_CASE("ewBinaryAddScalar", TAG_CODEGEN) {
+    test_binary_lowering("add", "llvm.call @_ewAdd__", "llvm.add", "3\n");
+}
+
+TEST_CASE("ewBinarySubScalar", TAG_CODEGEN) {
+    test_binary_lowering("sub", "llvm.call @_ewSub__", "llvm.sub", "-1\n");
+}
+
+TEST_CASE("ewBinaryMulScalar", TAG_CODEGEN) {
+    test_binary_lowering("mul", "llvm.call @_ewMul__", "llvm.mul", "2\n");
+}
+
+TEST_CASE("ewBinaryDivScalar", TAG_CODEGEN) {
+    test_binary_lowering("div", "llvm.call @_ewDiv__", "llvm.fdiv", "1.5\n");
+}
+
+TEST_CASE("ewBinaryPowScalar", TAG_CODEGEN) {
+    test_binary_lowering("pow", "llvm.call @_ewPow__", "llvm.intr.pow", "9\n");
+}
+
+TEST_CASE("ewBinaryAbsScalar", TAG_CODEGEN) {
+    test_binary_lowering("abs", "llvm.call @_ewAbs__", "llvm.intr.fabs", "4\n");
+}
diff --git a/test/api/cli/codegen/EwOpLoopFusionTest.cpp b/test/api/cli/codegen/EwOpLoopFusionTest.cpp
new file mode 100644
index 000000000..46f91b7cb
--- /dev/null
+++ b/test/api/cli/codegen/EwOpLoopFusionTest.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <api/cli/Utils.h>
+#include <tags.h>
+
+#include <catch.hpp>
+#include <sstream>
+#include <string>
+
+#include "api/cli/StatusCode.h"
+
+const std::string dirPath = "test/api/cli/codegen/";
+
+TEST_CASE("ewloopfusion", TAG_CODEGEN) {
+    std::string result =
+        "DenseMatrix(2x2, double)\n"
+        "8 8\n"
+        "8 8\n"
+        "DenseMatrix(2x2, double)\n"
+        "10 10\n"
+        "10 10\n"
+        "DenseMatrix(2x2, double)\n"
+        "9 9\n"
+        "9 9\n";
+
+    compareDaphneToStr(result, dirPath + "fusion.daphne");
+    compareDaphneToStr(result, dirPath + "fusion.daphne", "--mlir-codegen");
+}
diff --git a/test/api/cli/codegen/MapOpTest.cpp b/test/api/cli/codegen/MapOpTest.cpp
new file mode 100644
index 000000000..a7ccf56d0
--- /dev/null
+++ b/test/api/cli/codegen/MapOpTest.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <api/cli/Utils.h>
+#include <tags.h>
+
+#include <catch.hpp>
+#include <sstream>
+#include <string>
+
+#include "api/cli/StatusCode.h"
+
+const std::string dirPath = "test/api/cli/codegen/";
+
+TEST_CASE("mapOp", TAG_CODEGEN) {
+    std::string result =
+        "DenseMatrix(2x2, double)\n"
+        "2.1 1\n"
+        "6.5 -1.2\n";
+
+    compareDaphneToStr(result, dirPath + "map.daphne");
+    compareDaphneToStr(result, dirPath + "map.daphne", "--mlir-codegen", "--no-obj-ref-mgnt");
+}
+
diff --git a/test/api/cli/codegen/MatMulTest.cpp b/test/api/cli/codegen/MatMulTest.cpp
new file mode 100644
index 000000000..6ae2f324a
--- /dev/null
+++ b/test/api/cli/codegen/MatMulTest.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <api/cli/Utils.h>
+#include <tags.h>
+
+#include <catch.hpp>
+#include <sstream>
+#include <string>
+
+#include "api/cli/StatusCode.h"
+
+const std::string dirPath = "test/api/cli/codegen/";
+
+TEST_CASE("matmul", TAG_CODEGEN) {
+    std::string result =
+        "DenseMatrix(3x3, double)\n"
+        "45 45 45\n"
+        "45 45 45\n"
+        "45 45 45\n";
+
+    compareDaphneToStr(result, dirPath + "matmul.daphne");
+    compareDaphneToStr(result, dirPath + "matmul.daphne", "--mlir-codegen");
+}
+
+
+TEST_CASE("matvec", TAG_CODEGEN) {
+    std::string result =
+        "DenseMatrix(3x1, double)\n"
+        "45\n"
+        "45\n"
+        "45\n";
+
+    compareDaphneToStr(result, dirPath + "matvec.daphne");
+    compareDaphneToStr(result, dirPath + "matvec.daphne", "--mlir-codegen");
+}
diff --git a/test/api/cli/codegen/abs.daphne b/test/api/cli/codegen/abs.daphne
new file mode 100644
index 000000000..1cf002ecc
--- /dev/null
+++ b/test/api/cli/codegen/abs.daphne
@@ -0,0 +1,7 @@
+// Performs a Abs. Used to compare precompiled kernel with codegen. Value
+// extracted as scalar to avoid it being optimized out of the calculation with
+// constant folding or similar.
+
+X = [1.0, -2.0, -4.0];
+a = as.scalar(X[2:3, 0:1]);
+print(abs(a));
diff --git a/test/api/cli/codegen/add.daphne b/test/api/cli/codegen/add.daphne
new file mode 100644
index 000000000..fd33984cc
--- /dev/null
+++ b/test/api/cli/codegen/add.daphne
@@ -0,0 +1,8 @@
+// Performs an AddOp. Used to compare precompiled kernel with codegen.
+// Values extracted as scalar to avoid them being optimized out of
+// the calculation with constant folding or similar.
+
+X = [1, 2, 3];
+a = as.scalar(X[0:1, 0:1]);
+b = as.scalar(X[1:2, 0:1]);
+print(a + b);
diff --git a/test/api/cli/codegen/div.daphne b/test/api/cli/codegen/div.daphne
new file mode 100644
index 000000000..a934b84b7
--- /dev/null
+++ b/test/api/cli/codegen/div.daphne
@@ -0,0 +1,8 @@
+// Performs a DivOp. Used to compare precompiled kernel with codegen. Values
+// extracted as scalar to avoid them being optimized out of the calculation
+// with constant folding or similar.
+
+X = [1.0, 2.0, 3.0];
+a = as.scalar(X[2:3, 0:1]);
+b = as.scalar(X[1:2, 0:1]);
+print(a / b);
diff --git a/test/api/cli/codegen/fusion.daphne b/test/api/cli/codegen/fusion.daphne
new file mode 100644
index 000000000..e4b81f68e
--- /dev/null
+++ b/test/api/cli/codegen/fusion.daphne
@@ -0,0 +1,11 @@
+// Performs loop fusion on multiple EwBinaryOps. Used to compare precompiled
+// kernel with codegen.
+
+X = fill(4.0, 2, 2);
+X = X * 2.0;
+Y = X + 2.0;
+Z = X + 1.0;
+
+print(X);
+print(Y);
+print(Z);
diff --git a/test/api/cli/codegen/log.daphne b/test/api/cli/codegen/log.daphne
new file mode 100644
index 000000000..b9f86d44b
--- /dev/null
+++ b/test/api/cli/codegen/log.daphne
@@ -0,0 +1,8 @@
+// Performs a LogOp. Used to compare precompiled kernel with codegen. Values
+// extracted as scalar to avoid them being optimized out of the calculation
+// with constant folding or similar.
+
+X = [1, 2, 3];
+a = as.scalar(X[0:1, 0:1]);
+b = as.scalar(X[1:2, 0:1]);
+print(ln(a, b));
diff --git a/test/api/cli/codegen/map.daphne b/test/api/cli/codegen/map.daphne
new file mode 100644
index 000000000..6c9d203eb
--- /dev/null
+++ b/test/api/cli/codegen/map.daphne
@@ -0,0 +1,10 @@
+// Performs a MapOp with the UDF `increment`. Used to compare precompiled
+// kernel with codegen.
+
+def increment(x) {
+    return x + 1;
+}
+
+X = reshape([1.1, 0.0, 5.5, -2.2], 2, 2);
+
+print(map(X, increment));
diff --git a/test/api/cli/codegen/matmul.daphne b/test/api/cli/codegen/matmul.daphne
new file mode 100644
index 000000000..af5b46ae9
--- /dev/null
+++ b/test/api/cli/codegen/matmul.daphne
@@ -0,0 +1,9 @@
+// Performs a MatMulOp. Used to compare precompiled kernel with codegen.
+
+N = 3;
+A = fill(5.0, N, N);
+B = fill(3.0, N, N);
+
+C = A@B;
+
+print(C); // for small matrices
diff --git a/test/api/cli/codegen/matvec.daphne b/test/api/cli/codegen/matvec.daphne
new file mode 100644
index 000000000..7aba59805
--- /dev/null
+++ b/test/api/cli/codegen/matvec.daphne
@@ -0,0 +1,9 @@
+// Performs a MatMulOp. Used to compare precompiled kernel with codegen.
+
+N = 3;
+A = fill(5.0, N, N);
+B = fill(3.0, N, 1);
+
+C = A@B;
+
+print(C); // for small matrices
diff --git a/test/api/cli/codegen/mul.daphne b/test/api/cli/codegen/mul.daphne
new file mode 100644
index 000000000..17ea31d5c
--- /dev/null
+++ b/test/api/cli/codegen/mul.daphne
@@ -0,0 +1,8 @@
+// Performs a MulOp. Used to compare precompiled kernel with codegen. Values
+// extracted as scalar to avoid them being optimized out of the calculation
+// with constant folding or similar.
+
+X = [1, 2, 3];
+a = as.scalar(X[0:1, 0:1]);
+b = as.scalar(X[1:2, 0:1]);
+print(a * b);
diff --git a/test/api/cli/codegen/pow.daphne b/test/api/cli/codegen/pow.daphne
new file mode 100644
index 000000000..ff13b1b23
--- /dev/null
+++ b/test/api/cli/codegen/pow.daphne
@@ -0,0 +1,8 @@
+// Performs a PowOp. Used to compare precompiled kernel with codegen. Values
+// extracted as scalar to avoid them being optimized out of the calculation
+// with constant folding or similar.
+
+X = [1.0, 2.0, 3.0];
+a = as.scalar(X[2:3, 0:1]);
+b = as.scalar(X[1:2, 0:1]);
+print(pow(a, b));
diff --git a/test/api/cli/codegen/sub.daphne b/test/api/cli/codegen/sub.daphne
new file mode 100644
index 000000000..a230024c1
--- /dev/null
+++ b/test/api/cli/codegen/sub.daphne
@@ -0,0 +1,8 @@
+// Compare precompiled kernel with codegen generated for the SubOp. Value
+// extracted as scalar to avoid it being optimizedd out of the calculation with
+// constant folding or similar.
+
+X = [1, 2, 3];
+a = as.scalar(X[0:1, 0:1]);
+b = as.scalar(X[1:2, 0:1]);
+print(a - b);
diff --git a/test/api/cli/codegen/sum_aggall.daphne b/test/api/cli/codegen/sum_aggall.daphne
new file mode 100644
index 000000000..77578e7c6
--- /dev/null
+++ b/test/api/cli/codegen/sum_aggall.daphne
@@ -0,0 +1,5 @@
+// Compare precompiled kernel with codegen generated for the AggAllOp.
+
+X = fill(1.0, 10, 10);
+a = sum(X);
+print(a);
diff --git a/test/codegen/.gitignore b/test/codegen/.gitignore
new file mode 100644
index 000000000..a6e9662a3
--- /dev/null
+++ b/test/codegen/.gitignore
@@ -0,0 +1,2 @@
+Output/**
+.lit_test_times.txt
diff --git a/test/codegen/CodegenTest.cpp b/test/codegen/CodegenTest.cpp
new file mode 100644
index 000000000..fce4d03c9
--- /dev/null
+++ b/test/codegen/CodegenTest.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2023 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "run_tests.h"
+
+#include "api/cli/StatusCode.h"
+#include "api/cli/Utils.h"
+
+#include <tags.h>
+
+const std::string dirPath = "test/codegen/";
+
+// Place all test files with FileCheck directives in the dirPath.
+// LIT will test all *.mlir files in the directory.
+TEST_CASE("codegen", TAG_CODEGEN) {
+    std::stringstream out;
+    std::stringstream err;
+
+    int status = runLIT(out, err, dirPath);
+
+#ifndef NDEBUG
+    spdlog::info("runLIT return status: " + std::to_string(status));
+    spdlog::info("runLIT out:\n" + out.str());
+    spdlog::info("runLIT err:\n" + err.str());
+#endif
+    CHECK(status == StatusCode::SUCCESS);
+}
diff --git a/test/codegen/daphne_opt.mlir b/test/codegen/daphne_opt.mlir
new file mode 100644
index 000000000..25a02d813
--- /dev/null
+++ b/test/codegen/daphne_opt.mlir
@@ -0,0 +1,18 @@
+// RUN: daphne-opt --opt-daphne %s | FileCheck %s
+
+module {
+  func.func @main() {
+    %0 = "daphne.constant"() {value = 2 : ui64} : () -> ui64
+    %1 = "daphne.constant"() {value = 2 : index} : () -> index
+    %2 = "daphne.constant"() {value = 4 : ui64} : () -> ui64
+    %3 = "daphne.constant"() {value = false} : () -> i1
+    %4 = "daphne.constant"() {value = true} : () -> i1
+    %5 = "daphne.fill"(%2, %1, %1) : (ui64, index, index) -> !daphne.Matrix<2x2xui64>
+    // CHECK-NOT: daphne.ewMod
+    // CHECK: daphne.ewSub
+    // CHECK-NEXT: daphne.ewBitwiseAnd
+    %6 = "daphne.ewMod"(%5, %0) : (!daphne.Matrix<2x2xui64>, ui64) -> !daphne.Matrix<2x2xui64>
+    "daphne.print"(%6, %4, %3) : (!daphne.Matrix<2x2xui64>, i1, i1) -> ()
+    "daphne.return"() : () -> ()
+  }
+}
diff --git a/test/codegen/ew.mlir b/test/codegen/ew.mlir
new file mode 100644
index 000000000..d94cf32cd
--- /dev/null
+++ b/test/codegen/ew.mlir
@@ -0,0 +1,105 @@
+// RUN: daphne-opt --lower-ew %s | FileCheck %s
+
+func.func @add() {
+  %0 = "daphne.constant"() {value = 2 : index} : () -> index
+  %1 = "daphne.constant"() {value = false} : () -> i1
+  %2 = "daphne.constant"() {value = true} : () -> i1
+  %3 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64
+  %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64>
+  // CHECK-NOT: daphne.ewAdd
+  // CHECK: arith.addf
+  %5 = "daphne.ewAdd"(%4, %4) : (!daphne.Matrix<2x2xf64>, !daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64>
+  "daphne.print"(%5, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> ()
+  "daphne.return"() : () -> ()
+}
+
+func.func @sub() {
+  %0 = "daphne.constant"() {value = 2 : index} : () -> index
+  %1 = "daphne.constant"() {value = false} : () -> i1
+  %2 = "daphne.constant"() {value = true} : () -> i1
+  %3 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64
+  %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64>
+  // CHECK-NOT: daphne.ewSub
+  // CHECK: arith.subf
+  %5 = "daphne.ewSub"(%4, %4) : (!daphne.Matrix<2x2xf64>, !daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64>
+  "daphne.print"(%5, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> ()
+  "daphne.return"() : () -> ()
+}
+
+func.func @mul() {
+  %0 = "daphne.constant"() {value = 2 : index} : () -> index
+  %1 = "daphne.constant"() {value = false} : () -> i1
+  %2 = "daphne.constant"() {value = true} : () -> i1
+  %3 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64
+  %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64>
+  // CHECK-NOT: daphne.ewMul
+  // CHECK: arith.mulf
+  %5 = "daphne.ewMul"(%4, %4) : (!daphne.Matrix<2x2xf64>, !daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64>
+  "daphne.print"(%5, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> ()
+  "daphne.return"() : () -> ()
+}
+
+func.func @div() {
+  %0 = "daphne.constant"() {value = 2 : index} : () -> index
+  %1 = "daphne.constant"() {value = false} : () -> i1
+  %2 = "daphne.constant"() {value = true} : () -> i1
+  %3 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64
+  %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64>
+  // CHECK-NOT: daphne.ewDiv
+  // CHECK: arith.divf
+  %5 = "daphne.ewDiv"(%4, %4) : (!daphne.Matrix<2x2xf64>, !daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64>
+  "daphne.print"(%5, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> ()
+  "daphne.return"() : () -> ()
+}
+
+func.func @sqrt() {
+  %0 = "daphne.constant"() {value = 0 : index} : () -> index
+  %1 = "daphne.constant"() {value = 1 : index} : () -> index
+  %2 = "daphne.constant"() {value = 2 : index} : () -> index
+  %3 = "daphne.constant"() {value = false} : () -> i1
+  %4 = "daphne.constant"() {value = true} : () -> i1
+  %5 = "daphne.constant"() {value = 4 : si64} : () -> si64
+  %6 = "daphne.fill"(%5, %2, %2) : (si64, index, index) -> !daphne.Matrix<2x2xsi64>
+  %7 = "daphne.sliceRow"(%6, %0, %1) : (!daphne.Matrix<2x2xsi64>, index, index) -> !daphne.Matrix<?x?xsi64>
+  %8 = "daphne.sliceCol"(%7, %0, %1) : (!daphne.Matrix<?x?xsi64>, index, index) -> !daphne.Matrix<?x?xsi64>
+  %9 = "daphne.sliceRow"(%6, %0, %1) : (!daphne.Matrix<2x2xsi64>, index, index) -> !daphne.Matrix<?x?xsi64>
+  %10 = "daphne.sliceCol"(%9, %0, %1) : (!daphne.Matrix<?x?xsi64>, index, index) -> !daphne.Matrix<?x?xsi64>
+  %11 = "daphne.cast"(%10) : (!daphne.Matrix<?x?xsi64>) -> si64
+  %12 = "daphne.cast"(%11) : (si64) -> f64
+  // CHECK-NOT: daphne.ewSqrt
+  // CHECK: math.sqrt
+  %13 = "daphne.ewSqrt"(%12) : (f64) -> f64
+  "daphne.print"(%13, %4, %3) : (f64, i1, i1) -> ()
+  "daphne.return"() : () -> ()
+}
+
+func.func @abs() {
+  %0 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64
+  %3 = "daphne.constant"() {value = false} : () -> i1
+  %4 = "daphne.constant"() {value = true} : () -> i1
+  // CHECK-NOT: daphne.ewAbs
+  // CHECK: math.absf
+  %12 = "daphne.ewAbs"(%0) : (f64) -> f64
+  "daphne.print"(%12, %4, %3) : (f64, i1, i1) -> ()
+  "daphne.return"() : () -> ()
+}
+
+func.func @pow() {
+  %0 = "daphne.constant"() {value = 0 : index} : () -> index
+  %1 = "daphne.constant"() {value = 1 : index} : () -> index
+  %2 = "daphne.constant"() {value = 2 : index} : () -> index
+  %3 = "daphne.constant"() {value = false} : () -> i1
+  %4 = "daphne.constant"() {value = true} : () -> i1
+  %5 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64
+  %6 = "daphne.fill"(%5, %2, %2) : (f64, index, index) -> !daphne.Matrix<2x2xf64>
+  %7 = "daphne.sliceRow"(%6, %0, %1) : (!daphne.Matrix<2x2xf64>, index, index) -> !daphne.Matrix<?x?xf64>
+  %8 = "daphne.sliceCol"(%7, %0, %1) : (!daphne.Matrix<?x?xf64>, index, index) -> !daphne.Matrix<?x?xf64>
+  %9 = "daphne.sliceRow"(%6, %0, %1) : (!daphne.Matrix<2x2xf64>, index, index) -> !daphne.Matrix<?x?xf64>
+  %10 = "daphne.sliceCol"(%9, %0, %1) : (!daphne.Matrix<?x?xf64>, index, index) -> !daphne.Matrix<?x?xf64>
+  %11 = "daphne.cast"(%10) : (!daphne.Matrix<?x?xf64>) -> f64
+  // CHECK-NOT: daphne.ewPow
+  // CHECK: math.powf
+  %12 = "daphne.ewPow"(%11, %11) : (f64, f64) -> f64
+  "daphne.print"(%12, %4, %3) : (f64, i1, i1) -> ()
+  "daphne.return"() : () -> ()
+}
diff --git a/test/codegen/fusion.mlir b/test/codegen/fusion.mlir
new file mode 100644
index 000000000..43107ad22
--- /dev/null
+++ b/test/codegen/fusion.mlir
@@ -0,0 +1,29 @@
+// RUN: daphne-opt -pass-pipeline="builtin.module(lower-ew, canonicalize, func.func(affine-loop-fusion))" %s | FileCheck %s""""
+
+func.func @main() {
+  %0 = "daphne.constant"() {value = 2 : index} : () -> index
+  %1 = "daphne.constant"() {value = false} : () -> i1
+  %2 = "daphne.constant"() {value = true} : () -> i1
+  %3 = "daphne.constant"() {value = 1.000000e+00 : f64} : () -> f64
+  %4 = "daphne.constant"() {value = 2.000000e+00 : f64} : () -> f64
+  %5 = "daphne.constant"() {value = 4.000000e+00 : f64} : () -> f64
+  %6 = "daphne.fill"(%5, %0, %0) : (f64, index, index) -> !daphne.Matrix<2x2xf64>
+  // CHECK: affine.for
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.load
+  // CHECK-NEXT: arith.mulf
+  // CHECK-NEXT: affine.store
+  // CHECK-NEXT: affine.load
+  // CHECK-NEXT: arith.addf
+  // CHECK-NEXT: affine.store
+  // CHECK-NEXT: affine.load
+  // CHECK-NEXT: arith.addf
+  // CHECK-NEXT: affine.store
+  %7 = "daphne.ewMul"(%6, %4) : (!daphne.Matrix<2x2xf64>, f64) -> !daphne.Matrix<2x2xf64>
+  %8 = "daphne.ewAdd"(%7, %4) : (!daphne.Matrix<2x2xf64>, f64) -> !daphne.Matrix<2x2xf64>
+  %9 = "daphne.ewAdd"(%7, %3) : (!daphne.Matrix<2x2xf64>, f64) -> !daphne.Matrix<2x2xf64>
+  "daphne.print"(%7, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> ()
+  "daphne.print"(%8, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> ()
+  "daphne.print"(%9, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> ()
+  "daphne.return"() : () -> ()
+}
diff --git a/test/codegen/lit.cfg b/test/codegen/lit.cfg
new file mode 100644
index 000000000..fab502252
--- /dev/null
+++ b/test/codegen/lit.cfg
@@ -0,0 +1,17 @@
+import lit.formats
+import os
+
+config.name = "DAPHNE LIT config"
+config.test_format = lit.formats.ShTest(True)
+
+config.suffixes = [".mlir"]
+
+config.test_source_root = os.path.dirname(__file__)
+
+config.environment["PATH"] = os.path.pathsep.join(
+    (
+        os.path.abspath("bin/"),
+        os.path.abspath("thirdparty/build/llvm-project/bin/"),
+        config.environment["PATH"],
+    )
+)
diff --git a/test/codegen/mapop.mlir b/test/codegen/mapop.mlir
new file mode 100644
index 000000000..ff8825989
--- /dev/null
+++ b/test/codegen/mapop.mlir
@@ -0,0 +1,26 @@
+// RUN: daphne-opt --lower-map --inline %s | FileCheck %s
+
+module {
+  func.func @"increment-1-1"(%arg0: f64) -> f64 {
+    %0 = "daphne.ewExp"(%arg0) : (f64) -> f64
+    "daphne.return"(%0) : (f64) -> ()
+  }
+  func.func @main() {
+    %0 = "daphne.constant"() {value = 2 : index} : () -> index
+    %1 = "daphne.constant"() {value = false} : () -> i1
+    %2 = "daphne.constant"() {value = true} : () -> i1
+    %3 = "daphne.constant"() {value = 93985655361872 : ui64} : () -> ui64
+    %4 = "daphne.matrixConstant"(%3) : (ui64) -> !daphne.Matrix<?x?xf64>
+    %5 = "daphne.reshape"(%4, %0, %0) : (!daphne.Matrix<?x?xf64>, index, index) -> !daphne.Matrix<2x2xf64>
+    // CHECK-NOT: daphne.map
+    // CHECK: {{.*}}"daphne.convertDenseMatrixToMemRef"{{.*}}
+    // CHECK: affine.for
+    // CHECK-NEXT: affine.for
+    // CHECK-NOT: func.call
+    // CHECK: affine.load
+    // CHECK-NEXT: daphne.ewExp
+    %6 = "daphne.map"(%5) {func = "increment-1-1"} : (!daphne.Matrix<2x2xf64>) -> !daphne.Matrix<2x2xf64>
+    "daphne.print"(%6, %2, %1) : (!daphne.Matrix<2x2xf64>, i1, i1) -> ()
+    "daphne.return"() : () -> ()
+  }
+}
diff --git a/test/codegen/matmul.mlir b/test/codegen/matmul.mlir
new file mode 100644
index 000000000..6f3672be5
--- /dev/null
+++ b/test/codegen/matmul.mlir
@@ -0,0 +1,32 @@
+// RUN: daphne-opt --lower-mm %s | FileCheck %s
+
+module {
+  func.func @main() {
+    // CHECK: {{.*}}memref.alloc
+    %0 = "daphne.constant"() {value = 10 : index} : () -> index
+    %1 = "daphne.constant"() {value = false} : () -> i1
+    %2 = "daphne.constant"() {value = 3.000000e+00 : f64} : () -> f64
+    %3 = "daphne.constant"() {value = 5.000000e+00 : f64} : () -> f64
+    %4 = "daphne.fill"(%3, %0, %0) : (f64, index, index) -> !daphne.Matrix<10x10xf64>
+    %5 = "daphne.fill"(%2, %0, %0) : (f64, index, index) -> !daphne.Matrix<10x10xf64>
+    // CHECK: {{.*}}"daphne.convertDenseMatrixToMemRef"{{.*}}
+    // CHECK-NEXT: {{.*}}"daphne.convertDenseMatrixToMemRef"{{.*}}
+
+    // Initialize alloced memref to 0
+    // CHECK: affine.for
+    // CHECK-NEXT: {{ *}}affine.for
+    // CHECK-NEXT: {{ *}}affine.store
+
+    // MatMul
+    // CHECK: affine.for
+    // CHECK-NEXT: affine.for
+    // CHECK-NEXT: affine.for
+    // CHECK-NEXT: {{.*}}memref.load
+    // CHECK-NEXT: {{.*}}memref.load
+    // CHECK-NEXT: {{.*}}memref.load
+    // CHECK-NEXT: {{.*}}llvm.intr.fma
+    // CHECK-NEXT: {{.*}}memref.store
+    %6 = "daphne.matMul"(%4, %5, %1, %1) : (!daphne.Matrix<10x10xf64>, !daphne.Matrix<10x10xf64>, i1, i1) -> !daphne.Matrix<10x10xf64>
+    "daphne.return"() : () -> ()
+  }
+}
diff --git a/test/codegen/run-lit.py b/test/codegen/run-lit.py
new file mode 100644
index 000000000..39898435c
--- /dev/null
+++ b/test/codegen/run-lit.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python
+
+from lit.main import main
+main()
diff --git a/test/codegen/sum_agg.mlir b/test/codegen/sum_agg.mlir
new file mode 100644
index 000000000..d0df6ea7e
--- /dev/null
+++ b/test/codegen/sum_agg.mlir
@@ -0,0 +1,26 @@
+// RUN: daphne-opt --lower-agg %s | FileCheck %s
+
+module {
+  func.func @main() {
+    %0 = "daphne.constant"() {value = true} : () -> i1
+    %1 = "daphne.constant"() {value = 10 : index} : () -> index
+    %2 = "daphne.constant"() {value = 1000000 : si64} : () -> si64
+    %3 = "daphne.constant"() {value = false} : () -> i1
+    %4 = "daphne.constant"() {value = 1.000000e+00 : f64} : () -> f64
+    %5 = "daphne.fill"(%4, %1, %1) : (f64, index, index) -> !daphne.Matrix<10x10xf64>
+    %6 = "daphne.now"() : () -> si64
+    // CHECK-NOT: sumAll
+    // CHECK: {{.*}}"daphne.convertDenseMatrixToMemRef"{{.*}}
+    // CHECK: affine.for
+    // CHECK-NEXT: arith.constant
+    // CHECK-NEXT: affine.for
+    // CHECK-NEXT: memref.load
+    %7 = "daphne.sumAll"(%5) : (!daphne.Matrix<10x10xf64>) -> f64
+    %8 = "daphne.now"() : () -> si64
+    "daphne.print"(%7, %0, %3) : (f64, i1, i1) -> ()
+    %9 = "daphne.ewSub"(%8, %6) : (si64, si64) -> si64
+    %10 = "daphne.ewDiv"(%9, %2) : (si64, si64) -> si64
+    "daphne.print"(%10, %0, %3) : (si64, i1, i1) -> ()
+    "daphne.return"() : () -> ()
+  }
+}
diff --git a/test/tags.h b/test/tags.h
index 9d977a61c..14c490cbc 100644
--- a/test/tags.h
+++ b/test/tags.h
@@ -24,6 +24,7 @@
 
 #define TAG_ALGORITHMS "[algorithms]"
 #define TAG_CAST "[cast]"
+#define TAG_CODEGEN "[codegen]"
 #define TAG_CONFIG "[config]"
 #define TAG_CONTROLFLOW "[controlflow]"
 #define TAG_DATASTRUCTURES "[datastructures]"