From 5414c2017cefc0acedb339c3cce6ed4f9eebb99a Mon Sep 17 00:00:00 2001 From: Jeff Fifield Date: Thu, 13 Feb 2025 16:38:46 -0700 Subject: [PATCH] Rename aie-translate's aie-npu-instgen flag to aie-npu-to-binary be more descriptive (#2041) --- include/aie-c/Translation.h | 3 +- include/aie/Targets/AIETargets.h | 11 +-- lib/CAPI/Translation.cpp | 6 +- lib/Targets/AIETargetNPU.cpp | 14 ++-- lib/Targets/AIETargets.cpp | 8 +-- python/AIEMLIRModule.cpp | 9 +-- python/compiler/aiecc/main.py | 69 +++++++++++-------- python/dialects/aie.py | 2 +- test/Targets/NPU/npu_blockwrite_instgen.mlir | 2 +- test/Targets/NPU/npu_instgen.mlir | 2 +- test/aiecc/buffers_xclbin.mlir | 12 ++++ test/npu-xrt/add_one_two_txn/run.lit | 2 +- test/npu-xrt/ctrl_packet_reconfig/run.lit | 2 +- .../ctrl_packet_reconfig_1x4_cores/run.lit | 2 +- .../ctrl_packet_reconfig_4x1_cores/run.lit | 2 +- test/txn2mlir/roundtrip_npu1_1col.mlir | 2 +- test/txn2mlir/roundtrip_npu1_4col.mlir | 2 +- 17 files changed, 91 insertions(+), 59 deletions(-) diff --git a/include/aie-c/Translation.h b/include/aie-c/Translation.h index 651e729d59..928642d545 100644 --- a/include/aie-c/Translation.h +++ b/include/aie-c/Translation.h @@ -21,7 +21,8 @@ extern "C" { MLIR_CAPI_EXPORTED MlirStringRef aieTranslateAIEVecToCpp(MlirOperation op, bool aie2); MLIR_CAPI_EXPORTED MlirStringRef aieTranslateModuleToLLVMIR(MlirOperation op); -MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToNPU(MlirOperation op); +MLIR_CAPI_EXPORTED MlirStringRef aieTranslateNpuToBinary(MlirOperation op, + MlirStringRef name); MLIR_CAPI_EXPORTED MlirStringRef aieTranslateControlPacketsToUI32Vec(MlirOperation op); MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToXAIEV2(MlirOperation op); diff --git a/include/aie/Targets/AIETargets.h b/include/aie/Targets/AIETargets.h index 53e8b914a4..9deaf3caf2 100644 --- a/include/aie/Targets/AIETargets.h +++ b/include/aie/Targets/AIETargets.h @@ -34,11 +34,12 @@ mlir::LogicalResult AIETranslateShimSolution(mlir::ModuleOp module, llvm::raw_ostream &); mlir::LogicalResult AIETranslateGraphXPE(mlir::ModuleOp module, llvm::raw_ostream &); -mlir::LogicalResult AIETranslateToNPU(mlir::ModuleOp module, - llvm::raw_ostream &output, - llvm::StringRef sequenceName = ""); -mlir::LogicalResult AIETranslateToNPU(mlir::ModuleOp, std::vector &, - llvm::StringRef sequenceName = ""); +mlir::LogicalResult AIETranslateNpuToBinary(mlir::ModuleOp module, + llvm::raw_ostream &output, + llvm::StringRef sequenceName = ""); +mlir::LogicalResult AIETranslateNpuToBinary(mlir::ModuleOp, + std::vector &, + llvm::StringRef sequenceName = ""); mlir::LogicalResult AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp module, llvm::raw_ostream &output, diff --git a/lib/CAPI/Translation.cpp b/lib/CAPI/Translation.cpp index a29c76afbf..7d566a7f9f 100644 --- a/lib/CAPI/Translation.cpp +++ b/lib/CAPI/Translation.cpp @@ -92,11 +92,13 @@ MlirOperation aieTranslateBinaryToTxn(MlirContext ctx, MlirStringRef binary) { return wrap(mod->getOperation()); } -MlirStringRef aieTranslateToNPU(MlirOperation moduleOp) { +MlirStringRef aieTranslateNpuToBinary(MlirOperation moduleOp, + MlirStringRef sequenceName) { std::string npu; llvm::raw_string_ostream os(npu); ModuleOp mod = llvm::cast(unwrap(moduleOp)); - if (failed(AIETranslateToNPU(mod, os))) + llvm::StringRef name(sequenceName.data, sequenceName.length); + if (failed(AIETranslateNpuToBinary(mod, os, name))) return mlirStringRefCreate(nullptr, 0); char *cStr = static_cast(malloc(npu.size())); npu.copy(cStr, npu.size()); diff --git a/lib/Targets/AIETargetNPU.cpp b/lib/Targets/AIETargetNPU.cpp index bf62b73497..b5b69c6d6f 100644 --- a/lib/Targets/AIETargetNPU.cpp +++ b/lib/Targets/AIETargetNPU.cpp @@ -186,9 +186,9 @@ void appendBlockWrite(std::vector &instructions, NpuBlockWriteOp op) { } // namespace LogicalResult -xilinx::AIE::AIETranslateToNPU(ModuleOp module, - std::vector &instructions, - StringRef sequenceName) { +xilinx::AIE::AIETranslateNpuToBinary(ModuleOp module, + std::vector &instructions, + StringRef sequenceName) { auto words = reserveAndGetTail(instructions, 4); @@ -242,11 +242,11 @@ xilinx::AIE::AIETranslateToNPU(ModuleOp module, return success(); } -LogicalResult xilinx::AIE::AIETranslateToNPU(ModuleOp module, - raw_ostream &output, - StringRef sequenceName) { +LogicalResult xilinx::AIE::AIETranslateNpuToBinary(ModuleOp module, + raw_ostream &output, + StringRef sequenceName) { std::vector instructions; - auto r = AIETranslateToNPU(module, instructions, sequenceName); + auto r = AIETranslateNpuToBinary(module, instructions, sequenceName); if (failed(r)) return r; for (auto w : instructions) diff --git a/lib/Targets/AIETargets.cpp b/lib/Targets/AIETargets.cpp index 625f3c1a03..4cba7bd521 100644 --- a/lib/Targets/AIETargets.cpp +++ b/lib/Targets/AIETargets.cpp @@ -157,7 +157,7 @@ void registerAIETranslations() { "aie-output-binary", llvm::cl::init(false), llvm::cl::desc( "Select binary (true) or text (false) output for supported " - "translations. e.g. aie-npu-instgen, aie-ctrlpkt-to-bin")); + "translations. e.g. aie-npu-to-binary, aie-ctrlpkt-to-bin")); static llvm::cl::opt sequenceName( "aie-sequence-name", llvm::cl::init(""), @@ -344,18 +344,18 @@ void registerAIETranslations() { }, registerDialects); TranslateFromMLIRRegistration registrationNPU( - "aie-npu-instgen", "Translate npu instructions to binary", + "aie-npu-to-binary", "Translate npu instructions to binary", [](ModuleOp module, raw_ostream &output) { if (outputBinary == true) { std::vector instructions; - auto r = AIETranslateToNPU(module, instructions, sequenceName); + auto r = AIETranslateNpuToBinary(module, instructions, sequenceName); if (failed(r)) return r; output.write(reinterpret_cast(instructions.data()), instructions.size() * sizeof(uint32_t)); return success(); } - return AIETranslateToNPU(module, output, sequenceName); + return AIETranslateNpuToBinary(module, output, sequenceName); }, registerDialects); TranslateFromMLIRRegistration registrationCtrlPkt( diff --git a/python/AIEMLIRModule.cpp b/python/AIEMLIRModule.cpp index 51c8f0441b..4e1dc5cd85 100644 --- a/python/AIEMLIRModule.cpp +++ b/python/AIEMLIRModule.cpp @@ -122,16 +122,17 @@ NB_MODULE(_aie, m) { "ctx"_a, "binary"_a); m.def( - "npu_instgen", - [&stealCStr](MlirOperation op) { - nb::str npuInstructions = stealCStr(aieTranslateToNPU(op)); + "translate_npu_to_binary", + [&stealCStr](MlirOperation op, const std::string &sequence_name) { + nb::str npuInstructions = stealCStr(aieTranslateNpuToBinary( + op, {sequence_name.data(), sequence_name.size()})); auto individualInstructions = nb::cast(npuInstructions.attr("split")()); for (size_t i = 0; i < individualInstructions.size(); ++i) individualInstructions[i] = individualInstructions[i].attr("strip")(); return individualInstructions; }, - "module"_a); + "module"_a, "sequence_name"_a = ""); m.def( "generate_control_packets", diff --git a/python/compiler/aiecc/main.py b/python/compiler/aiecc/main.py index 947dc15c9d..62de57cca3 100644 --- a/python/compiler/aiecc/main.py +++ b/python/compiler/aiecc/main.py @@ -93,11 +93,8 @@ + LOWER_TO_LLVM_PIPELINE ) -CREATE_PATH_FINDER_FLOWS = Pipeline().Nested( - "aie.device", Pipeline().add_pass("aie-create-pathfinder-flows") -) - -DMA_TO_NPU = Pipeline().Nested( +# pipeline to lower and legalize runtime sequence for NPU +NPU_LOWERING_PIPELINE = Pipeline().Nested( "aie.device", Pipeline() .add_pass("aie-materialize-bd-chains") @@ -335,7 +332,7 @@ def do_run(command, verbose=False): def run_passes(pass_pipeline, mlir_module_str, outputfile=None, verbose=False): if verbose: print("Running:", pass_pipeline) - with Context() as ctx, Location.unknown(): + with Context(), Location.unknown(): module = Module.parse(mlir_module_str) pm = PassManager.parse(pass_pipeline) try: @@ -350,6 +347,23 @@ def run_passes(pass_pipeline, mlir_module_str, outputfile=None, verbose=False): return mlir_module_str +def run_passes_module(pass_pipeline, mlir_module, outputfile=None, verbose=False): + if verbose: + print("Running:", pass_pipeline) + with mlir_module.context, Location.unknown(): + pm = PassManager.parse(pass_pipeline) + try: + pm.run(mlir_module.operation) + except Exception as e: + print("Error running pass pipeline: ", pass_pipeline, e) + raise e + if outputfile: + mlir_module_str = str(mlir_module) + with open(outputfile, "w") as g: + g.write(mlir_module_str) + return mlir_module + + def corefile(dirname, core, ext): col, row, _ = core return os.path.join(dirname, f"core_{col}_{row}.{ext}") @@ -1102,27 +1116,28 @@ async def run_flow(self): # Optionally generate insts.txt for NPU instruction stream if opts.npu or opts.only_npu: - generated_insts_mlir = self.prepend_tmp("generated_npu_insts.mlir") - await self.do_call( - progress_bar.task, - [ - "aie-opt", - f"--pass-pipeline={DMA_TO_NPU}", - file_with_addresses, - "-o", - generated_insts_mlir, - ], - ) - await self.do_call( - progress_bar.task, - [ - "aie-translate", - "--aie-npu-instgen", - generated_insts_mlir, - "-o", - opts.insts_name, - ], - ) + with Context(), Location.unknown(): + file_with_addresses_module = Module.parse( + await read_file_async(file_with_addresses) + ) + pass_pipeline = NPU_LOWERING_PIPELINE.materialize(module=True) + npu_insts_file = ( + self.prepend_tmp("npu_insts.mlir") + if self.opts.verbose + else None + ) + npu_insts_module = run_passes_module( + pass_pipeline, + file_with_addresses_module, + npu_insts_file, + self.opts.verbose, + ) + npu_insts = aiedialect.translate_npu_to_binary( + npu_insts_module.operation + ) + with open(opts.insts_name, "w") as f: + for inst in npu_insts: + f.write(f"{inst}\n") if opts.only_npu: return diff --git a/python/dialects/aie.py b/python/dialects/aie.py index 1e70673480..996f10d31a 100644 --- a/python/dialects/aie.py +++ b/python/dialects/aie.py @@ -33,7 +33,7 @@ generate_cdo, generate_xaie, generate_control_packets, - npu_instgen, + translate_npu_to_binary, register_dialect, translate_aie_vec_to_cpp, translate_mlir_to_llvmir, diff --git a/test/Targets/NPU/npu_blockwrite_instgen.mlir b/test/Targets/NPU/npu_blockwrite_instgen.mlir index 9ca60fc63d..7b8a65810d 100644 --- a/test/Targets/NPU/npu_blockwrite_instgen.mlir +++ b/test/Targets/NPU/npu_blockwrite_instgen.mlir @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -// RUN: aie-opt --aie-dma-to-npu %s | aie-translate --aie-npu-instgen | FileCheck %s +// RUN: aie-opt --aie-dma-to-npu %s | aie-translate --aie-npu-to-binary | FileCheck %s module { aie.device(npu1_4col) { aiex.runtime_sequence(%arg0: memref<16xf32>, %arg1: memref<16xf32>) { diff --git a/test/Targets/NPU/npu_instgen.mlir b/test/Targets/NPU/npu_instgen.mlir index 1eadf3d7eb..10c01649f2 100644 --- a/test/Targets/NPU/npu_instgen.mlir +++ b/test/Targets/NPU/npu_instgen.mlir @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -// RUN: aie-translate --aie-npu-instgen %s | FileCheck %s +// RUN: aie-translate --aie-npu-to-binary %s | FileCheck %s module { aie.device(npu1) { memref.global "private" constant @write_data : memref<8xi32> = dense<[100, 101, 102, 103, 104 ,105, 106, 107]> diff --git a/test/aiecc/buffers_xclbin.mlir b/test/aiecc/buffers_xclbin.mlir index 24e3bf1642..7af1fc330c 100644 --- a/test/aiecc/buffers_xclbin.mlir +++ b/test/aiecc/buffers_xclbin.mlir @@ -93,6 +93,18 @@ module { %02 = aie.tile(0, 2) %12 = aie.tile(1, 2) %22 = aie.tile(2, 2) + memref.global "public" @in0 : memref<1024xi32> + memref.global "public" @in1 : memref<1024xi32> + memref.global "public" @in2 : memref<1024xi32> + memref.global "public" @out0 : memref<1024xi32> + memref.global "public" @out1 : memref<1024xi32> + memref.global "public" @out2 : memref<1024xi32> + aie.shim_dma_allocation @in0 (S2MM, 0, 0) + aie.shim_dma_allocation @in1(S2MM, 1, 0) + aie.shim_dma_allocation @in2(S2MM, 2, 0) + aie.shim_dma_allocation @out0(MM2S, 0, 0) + aie.shim_dma_allocation @out1(MM2S, 1, 0) + aie.shim_dma_allocation @out2(MM2S, 2, 0) aiex.runtime_sequence(%arg0: memref<1024xi32>, %arg1: memref<1024xi32>, %arg2: memref<1024xi32>, %arg3: memref<1024xi32>, %arg4: memref<1024xi32>, %arg5: memref<1024xi32>) { aiex.npu.dma_memcpy_nd (0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0, 1]) {id = 0 : i64, metadata = @in0} : memref<1024xi32> aiex.npu.dma_memcpy_nd (0, 0, %arg1[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0, 1]) {id = 1 : i64, metadata = @out0} : memref<1024xi32> diff --git a/test/npu-xrt/add_one_two_txn/run.lit b/test/npu-xrt/add_one_two_txn/run.lit index 80bb55f222..642a83e6b9 100644 --- a/test/npu-xrt/add_one_two_txn/run.lit +++ b/test/npu-xrt/add_one_two_txn/run.lit @@ -6,5 +6,5 @@ // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags // RUN: %python aiecc.py --xclbin-kernel-name=ADDONE --xclbin-kernel-id=0x901 --xclbin-instance-name=ADDONEINST --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=add_one.xclbin --npu-insts-name=add_one_insts.txt %S/aie1.mlir // RUN: %python aiecc.py --no-aiesim --aie-generate-txn --aie-generate-npu --no-compile-host --npu-insts-name=add_two_insts.txt %S/aie2.mlir -// RUN: aie-translate -aie-npu-instgen -aie-output-binary=true -aie-sequence-name=configure aie2.mlir.prj/txn.mlir -o add_two_cfg.bin +// RUN: aie-translate -aie-npu-to-binary -aie-output-binary=true -aie-sequence-name=configure aie2.mlir.prj/txn.mlir -o add_two_cfg.bin // RUN: %run_on_npu ./test.exe -x add_one.xclbin -i add_one_insts.txt -c add_two_cfg.bin -j add_two_insts.txt diff --git a/test/npu-xrt/ctrl_packet_reconfig/run.lit b/test/npu-xrt/ctrl_packet_reconfig/run.lit index ad7148a41c..201498248d 100644 --- a/test/npu-xrt/ctrl_packet_reconfig/run.lit +++ b/test/npu-xrt/ctrl_packet_reconfig/run.lit @@ -12,7 +12,7 @@ // RUN: aie-translate -aie-ctrlpkt-to-bin -aie-sequence-name=configure aie2_overlay.mlir.prj/ctrlpkt.mlir -o ctrlpkt.txt // // RUN: aie-opt -aie-ctrl-packet-to-dma -aie-dma-to-npu aie2_overlay.mlir.prj/ctrlpkt.mlir -o ctrlpkt_dma_seq.mlir -// RUN: aie-translate -aie-npu-instgen -aie-sequence-name=configure ctrlpkt_dma_seq.mlir -o ctrlpkt_dma_seq.txt +// RUN: aie-translate -aie-npu-to-binary -aie-sequence-name=configure ctrlpkt_dma_seq.mlir -o ctrlpkt_dma_seq.txt // // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags // RUN: %run_on_npu ./test.exe diff --git a/test/npu-xrt/ctrl_packet_reconfig_1x4_cores/run.lit b/test/npu-xrt/ctrl_packet_reconfig_1x4_cores/run.lit index ad7148a41c..201498248d 100644 --- a/test/npu-xrt/ctrl_packet_reconfig_1x4_cores/run.lit +++ b/test/npu-xrt/ctrl_packet_reconfig_1x4_cores/run.lit @@ -12,7 +12,7 @@ // RUN: aie-translate -aie-ctrlpkt-to-bin -aie-sequence-name=configure aie2_overlay.mlir.prj/ctrlpkt.mlir -o ctrlpkt.txt // // RUN: aie-opt -aie-ctrl-packet-to-dma -aie-dma-to-npu aie2_overlay.mlir.prj/ctrlpkt.mlir -o ctrlpkt_dma_seq.mlir -// RUN: aie-translate -aie-npu-instgen -aie-sequence-name=configure ctrlpkt_dma_seq.mlir -o ctrlpkt_dma_seq.txt +// RUN: aie-translate -aie-npu-to-binary -aie-sequence-name=configure ctrlpkt_dma_seq.mlir -o ctrlpkt_dma_seq.txt // // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags // RUN: %run_on_npu ./test.exe diff --git a/test/npu-xrt/ctrl_packet_reconfig_4x1_cores/run.lit b/test/npu-xrt/ctrl_packet_reconfig_4x1_cores/run.lit index ad7148a41c..201498248d 100644 --- a/test/npu-xrt/ctrl_packet_reconfig_4x1_cores/run.lit +++ b/test/npu-xrt/ctrl_packet_reconfig_4x1_cores/run.lit @@ -12,7 +12,7 @@ // RUN: aie-translate -aie-ctrlpkt-to-bin -aie-sequence-name=configure aie2_overlay.mlir.prj/ctrlpkt.mlir -o ctrlpkt.txt // // RUN: aie-opt -aie-ctrl-packet-to-dma -aie-dma-to-npu aie2_overlay.mlir.prj/ctrlpkt.mlir -o ctrlpkt_dma_seq.mlir -// RUN: aie-translate -aie-npu-instgen -aie-sequence-name=configure ctrlpkt_dma_seq.mlir -o ctrlpkt_dma_seq.txt +// RUN: aie-translate -aie-npu-to-binary -aie-sequence-name=configure ctrlpkt_dma_seq.mlir -o ctrlpkt_dma_seq.txt // // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags // RUN: %run_on_npu ./test.exe diff --git a/test/txn2mlir/roundtrip_npu1_1col.mlir b/test/txn2mlir/roundtrip_npu1_1col.mlir index b251f115d1..9a4a6b3b38 100644 --- a/test/txn2mlir/roundtrip_npu1_1col.mlir +++ b/test/txn2mlir/roundtrip_npu1_1col.mlir @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -// RUN: aie-translate -aie-npu-instgen -aie-output-binary=true %s -o ./roundtrip_npu1_1col_cfg.bin +// RUN: aie-translate -aie-npu-to-binary -aie-output-binary=true %s -o ./roundtrip_npu1_1col_cfg.bin // RUN: %python txn2mlir.py -f ./roundtrip_npu1_1col_cfg.bin | FileCheck %s // CHECK: aie.device(npu1_1col) diff --git a/test/txn2mlir/roundtrip_npu1_4col.mlir b/test/txn2mlir/roundtrip_npu1_4col.mlir index 0059e3e4a4..d1d2674c5a 100644 --- a/test/txn2mlir/roundtrip_npu1_4col.mlir +++ b/test/txn2mlir/roundtrip_npu1_4col.mlir @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -// RUN: aie-translate -aie-npu-instgen -aie-output-binary=true %s -o ./roundtrip_npu1_4col_cfg.bin +// RUN: aie-translate -aie-npu-to-binary -aie-output-binary=true %s -o ./roundtrip_npu1_4col_cfg.bin // RUN: %python txn2mlir.py -f ./roundtrip_npu1_4col_cfg.bin | FileCheck %s // CHECK: aie.device(npu1_4col)