Squashed commit of the following:

commit c2c791923d0070c143c71ce9e2866d7d5562f7f7 Author: Mason Remy <[email protected]> Date: Wed Jan 11 03:21:45 2023 +0000 Merged PR 3018: Use VS 17.4.3-built binaries. This is in a separate channel to allow older ve... Use VS 17.4.3-built binaries. This is in a separate channel to allow older versions to keep working commit ef0552571e492745f0c3e53d7d1e77f452b3f08e Author: Denny Sun <[email protected]> Date: Mon Jan 9 03:54:54 2023 +0000 Merged PR 3012: Correctness check for output array support for range node Successful correctness check means output array support can work end to end. commit 06d392fef06e5a340a8e6c26ba7d1c312f779964 Author: Denny Sun <[email protected]> Date: Sat Jan 7 00:53:57 2023 +0000 Merged PR 3015: Update hatlib version to support floating type as function arg Update hatlib version to support floating type as function arg commit 622fd7a73e56f55747f1c86e46dcaa93b14585e9 Author: Captain Jack Sparrow <[email protected]> Date: Thu Jan 5 18:19:47 2023 +0000 Merged PR 3010: Disable BinOp simplification for floating types Disable BinOp simplification for floating types commit 989f1785d080e82c802dc046e84e84cfdb5717bf Author: Lisa Ong <[email protected]> Date: Thu Jan 5 07:39:28 2023 +0000 Merged PR 3013: Apply major version in docs Removes the need to update docs versions every time we release commit c24f4dcfd26588733b5c59cd568cb9beb03ac902 Author: Captain Jack Sparrow <[email protected]> Date: Tue Jan 3 18:02:52 2023 +0000 Merged PR 2981: Prologue and Epilogue op support with tensorization and caching - Add optional prologue and epilogue support for tensorization - Supported gemm parameters with fragment ops are: {alpha: 1, beta: any} and {alpha: >1, beta: 0} - ReLU, SET, SCALE added a standard fragment op Related work items: #3704
microsoft · Jan 11, 2023 · 5f5de21 · 5f5de21
1 parent c35ce22
commit 5f5de21
Show file tree

Hide file tree

Showing 125 changed files with 1,249 additions and 619 deletions.
diff --git a/.azure/cuda/cuda-benchmark-baseline.yml b/.azure/cuda/cuda-benchmark-baseline.yml
@@ -86,6 +86,8 @@ jobs:
           export PYTHONPATH=$(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8
           python gpu_benchmark_tool.py --type h --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --pytorch CUDA --input gemm_rectangle_A6000.csv gemm_square.csv gemm_bert_assorted.csv
           python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --pytorch CUDA --input gemm_rectangle_A6000.csv gemm_square.csv gemm_bert_assorted.csv gemm_resnet_inception.csv
+          python gpu_benchmark_tool.py --type h --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --pytorch CUDA --input gemm_rectangle_A6000.csv gemm_square.csv gemm_bert_assorted.csv --relu
+          python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --pytorch CUDA --input gemm_rectangle_A6000.csv gemm_square.csv gemm_bert_assorted.csv gemm_resnet_inception.csv --relu
         displayName: Run Pytorch benchmarks
         workingDirectory: "$(Build.SourcesDirectory)/tools/benchmarkers"
         env:

diff --git a/.azure/cuda/cuda-benchmark-fp16-bert.yml b/.azure/cuda/cuda-benchmark-fp16-bert.yml
@@ -9,7 +9,7 @@ trigger: none
 
 jobs:
   - job: "CUDA_Benchmarking_FP16_BERT"
-    timeoutInMinutes: 600
+    timeoutInMinutes: 720
 
     pool:
       name: LinuxNVGPUPool

diff --git a/.azure/cuda/cuda-benchmark-fp32.yml b/.azure/cuda/cuda-benchmark-fp32.yml
@@ -48,7 +48,7 @@ jobs:
 
       - bash: |
           export PYTHONPATH=$(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8
-          python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --check --input gemm_small.csv --category cube
+          python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --check --input gemm_small.csv --category cube --relu
           python gpu_benchmark_tool.py --type s --target 'NVidia RTX A6000' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --check --input gemm_small_A6000.csv --category device
         displayName: Run fp32 benchmarks A6000
         workingDirectory: "$(Build.SourcesDirectory)/tools/benchmarkers"

diff --git a/.azure/linux-pr.yml b/.azure/linux-pr.yml
@@ -85,7 +85,7 @@ steps:
   - bash: |
       python -m pip install -r $(Build.SourcesDirectory)/accera/onnx-emitter/test/requirements.txt
       python -m pip install pytest-azurepipelines
-      ctest -C Debug -T test -VV -LE benchmark -j $(PARALLEL) --progress
+      ctest -C Debug -T test -VV -LE benchmark -E "onnx" -j $(PARALLEL) --progress
     displayName: Run all ctest targets
     workingDirectory: "$(Build.SourcesDirectory)/build"
 

diff --git a/.azure/macos-pr.yml b/.azure/macos-pr.yml
@@ -73,7 +73,7 @@ steps:
   - bash: |
       python -m pip install -r $(Build.SourcesDirectory)/accera/onnx-emitter/test/requirements.txt
       python -m pip install pytest-azurepipelines
-      ctest -C Release -T test -VV -LE benchmark -E "dsl|mfma" --progress
+      ctest -C Release -T test -VV -LE benchmark -E "dsl|mfma|onnx" --progress
     displayName: Run all ctest targets
     continueOnError: false
     workingDirectory: "$(Build.SourcesDirectory)/build"

diff --git a/.azure/rocm/rocm-benchmark-baseline.yml b/.azure/rocm/rocm-benchmark-baseline.yml
@@ -83,6 +83,8 @@ jobs:
           export PYTHONPATH=$(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8
           python gpu_benchmark_tool.py --type h --target 'AMD MI100' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --pytorch ROCM --input gemm_rectangle_MI100.csv gemm_square.csv gemm_bert_assorted.csv
           python gpu_benchmark_tool.py --type s --target 'AMD MI100' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --pytorch ROCM --input gemm_rectangle_MI100.csv gemm_square.csv gemm_bert_assorted.csv gemm_resnet_inception.csv
+          python gpu_benchmark_tool.py --type h --target 'AMD MI100' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --pytorch ROCM --input gemm_rectangle_MI100.csv gemm_square.csv gemm_bert_assorted.csv --relu
+          python gpu_benchmark_tool.py --type s --target 'AMD MI100' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --pytorch ROCM --input gemm_rectangle_MI100.csv gemm_square.csv gemm_bert_assorted.csv gemm_resnet_inception.csv --relu
         displayName: Run Pytorch benchmarks
         workingDirectory: "$(Build.SourcesDirectory)/tools/benchmarkers"
         env:

diff --git a/.azure/rocm/rocm-benchmark-fp16-bert.yml b/.azure/rocm/rocm-benchmark-fp16-bert.yml
@@ -9,7 +9,7 @@ trigger: none
 
 jobs:
   - job: "ROCM_Benchmarking_FP16_BERT"
-    timeoutInMinutes: 600
+    timeoutInMinutes: 720
 
     pool: LinuxAMDGPUPool
 

diff --git a/.azure/rocm/rocm-benchmark-fp32.yml b/.azure/rocm/rocm-benchmark-fp32.yml
@@ -47,7 +47,7 @@ jobs:
           export PYTHONPATH=$(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8
           export LD_LIBRARY_PATH=${ROCM_PATH}/lib
           echo "LD_LIBRARY_PATH" ${LD_LIBRARY_PATH}
-          python gpu_benchmark_tool.py --type s --target 'AMD MI100' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --check --input gemm_small.csv --category cube
+          python gpu_benchmark_tool.py --type s --target 'AMD MI100' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --check --input gemm_small.csv --category cube --relu
           python gpu_benchmark_tool.py --type s --target 'AMD MI100' --branch $(Build.SourceBranch) --output $(Build.SourcesDirectory)/build/lib.linux-x86_64-3.8/accera_benchmarks/results --upload official_build_container_DO_NOT_UPLOAD_HERE --verbose --check --input gemm_small_MI100.csv --category device
         displayName: Run fp32 benchmarks MI100
         workingDirectory: "$(Build.SourcesDirectory)/tools/benchmarkers"

diff --git a/.azure/win-pr.yml b/.azure/win-pr.yml
@@ -82,7 +82,7 @@ steps:
       call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
       python -m pip install -r $(Build.SourcesDirectory)/accera/onnx-emitter/test/requirements.txt
       python -m pip install pytest-azurepipelines
-      ctest -C Release -T test -VV -LE "benchmark" -E "mfma" --progress
+      ctest -C Release -T test -VV -LE "benchmark" -E "mfma|onnx" --progress
     displayName: Run ctest targets with smoke tests
     workingDirectory: "$(Build.SourcesDirectory)/build/Release"
 

diff --git a/accera/acc-translate/src/Target/Cpp/AcceraDialectCppPrinter.cpp b/accera/acc-translate/src/Target/Cpp/AcceraDialectCppPrinter.cpp
@@ -85,8 +85,7 @@ namespace cpp_printer
             os << "__builtin_amdgcn_readfirstlane(" << tid << ")";
 
             // ROCDL threadID ops will have a cast from i32 to index type, so navigate appropriately
-            assert((gpu::Dimension{ warpIdOp.dimension() } == gpu::Dimension::x && warpIdOp.threadId().getDefiningOp<arith::IndexCastOp>().getIn().getDefiningOp<ROCDL::ThreadIdXOp>())
-                || (gpu::Dimension{ warpIdOp.dimension() } == gpu::Dimension::y && warpIdOp.threadId().getDefiningOp<arith::IndexCastOp>().getIn().getDefiningOp<ROCDL::ThreadIdYOp>()));
+            assert((gpu::Dimension{ warpIdOp.dimension() } == gpu::Dimension::x && warpIdOp.threadId().getDefiningOp<arith::IndexCastOp>().getIn().getDefiningOp<ROCDL::ThreadIdXOp>()) || (gpu::Dimension{ warpIdOp.dimension() } == gpu::Dimension::y && warpIdOp.threadId().getDefiningOp<arith::IndexCastOp>().getIn().getDefiningOp<ROCDL::ThreadIdYOp>()));
         }
         else
         {
@@ -117,17 +116,15 @@ namespace cpp_printer
         return printMMAMatrixOp(state, printer, memRefType.getElementType(), shape, allocMatrixOp.result(), opType, mfmaOpType.getNumBlocks(), allocMatrixOp.blocks(), rowMajor);
     }
 
-    LogicalResult AcceraDialectCppPrinter::printOp(vir::MMAFillSyncOp constantMatrixOp)
-    {
-        if (!state.hasRuntime(Runtime::CUDA))
-        {
-            return constantMatrixOp.emitError("non-cuda version is not supported.");
-        }
+    // LogicalResult AcceraDialectCppPrinter::printOp(vir::MMAFillSyncOp constantMatrixOp)
+    // {
+    //     if (!state.hasRuntime(Runtime::CUDA))
+    //     {
+    //         return constantMatrixOp.emitError("non-cuda version is not supported.");
+    //     }
 
-        const vir::MMAOp mfmaOpType{ static_cast<vir::MMAShape>(constantMatrixOp.mmaShapeType()) };
-        const auto cShape = std::make_tuple(mfmaOpType.getM(), mfmaOpType.getN(), mfmaOpType.getK());
-        return printConstantMatrixOp(state, printer, cShape, constantMatrixOp.dest(), constantMatrixOp.value());
-    }
+    //     return printConstantMatrixOp(state, printer, constantMatrixOp.dest(), constantMatrixOp.value());
+    // }
 
     LogicalResult AcceraDialectCppPrinter::printOp(vir::MMALoadSyncOp loadMatrixOp)
     {
@@ -136,11 +133,9 @@ namespace cpp_printer
             return loadMatrixOp.emitError("non-cuda version is not supported.");
         }
 
-        const vir::MMAOp mfmaOpType{ static_cast<vir::MMAShape>(loadMatrixOp.mmaShapeType()) };
         const auto operandType = static_cast<vir::MMAOperandType>(loadMatrixOp.operandType());
-        const auto memrefShape = std::make_tuple(mfmaOpType.getM(), mfmaOpType.getN(), mfmaOpType.getK());
 
-        return printLoadMatrixOp(state, printer, memrefShape, loadMatrixOp.memref(), loadMatrixOp.dest(), operandType, loadMatrixOp.indices(), loadMatrixOp.rowMajor(), loadMatrixOp.blockThreadId(), loadMatrixOp.staticOffsets());
+        return printLoadMatrixOp(state, printer, loadMatrixOp.memref(), loadMatrixOp.dest(), operandType, loadMatrixOp.indices(), loadMatrixOp.rowMajor(), loadMatrixOp.blockThreadId(), loadMatrixOp.staticOffsets(), static_cast<vir::MMAFragmentOp>(loadMatrixOp.mmaPrologueOp()), loadMatrixOp.mmaPrologueArg());
     }
 
     LogicalResult AcceraDialectCppPrinter::printOp(vir::MMAComputeSyncOp computeMatrixOp)
@@ -150,9 +145,7 @@ namespace cpp_printer
             return computeMatrixOp.emitError("non-cuda version is not supported.");
         }
 
-        const vir::MMAOp mfmaOpType{ static_cast<vir::MMAShape>(computeMatrixOp.mmaShapeType()) };
-        const auto cShape = std::make_tuple(mfmaOpType.getM(), mfmaOpType.getN(), mfmaOpType.getK());
-        return printComputeMatrixOp(state, printer, cShape, computeMatrixOp.opA(), computeMatrixOp.opB(), computeMatrixOp.opC(), computeMatrixOp.opC(), computeMatrixOp.cbsz(), computeMatrixOp.abid(), computeMatrixOp.blgp());
+        return printComputeMatrixOp(state, printer, computeMatrixOp.opA(), computeMatrixOp.opB(), computeMatrixOp.opC(), computeMatrixOp.opC(), computeMatrixOp.cbsz(), computeMatrixOp.abid(), computeMatrixOp.blgp());
     }
 
     LogicalResult AcceraDialectCppPrinter::printOp(vir::MMAStoreSyncOp storeMatrixOp)
@@ -162,7 +155,7 @@ namespace cpp_printer
             return storeMatrixOp.emitError("non-cuda version is not supported.");
         }
 
-        return printStoreMatrixOp(state, printer, storeMatrixOp.src(), storeMatrixOp.memref(), storeMatrixOp.indices(), storeMatrixOp.blockThreadId(), storeMatrixOp.staticOffsets());
+        return printStoreMatrixOp(state, printer, storeMatrixOp.src(), storeMatrixOp.memref(), storeMatrixOp.indices(), storeMatrixOp.blockThreadId(), storeMatrixOp.staticOffsets(), static_cast<vir::MMAFragmentOp>(storeMatrixOp.mmaEpilogueOp()), storeMatrixOp.mmaEpilogueArg());
     }
 
     LogicalResult AcceraDialectCppPrinter::printVectorType(mlir::Type elementType, const uint32_t stride) const
@@ -200,14 +193,15 @@ namespace cpp_printer
 
         auto srcMemref = blockLoadOp.memref();
         auto srcMemrefType = srcMemref.getType().cast<MemRefType>();
-        const auto srcMemSpace = srcMemrefType.getMemorySpaceAsInt();
+        auto srcMemSpace = srcMemrefType.getMemorySpaceAsInt();
         auto elementType = srcMemrefType.getElementType();
         AffineDialectCppPrinter* affineDialectPrinter = dynamic_cast<AffineDialectCppPrinter*>(printer->getDialectPrinter("Affine"));
         auto srcMap = mlir::getStridedLinearLayoutMap(srcMemrefType);
-        const auto srcRowMajor = mlir::canonicalizeStridedLayout(srcMemrefType).getLayout().isIdentity();
+        auto srcRowMajor = mlir::canonicalizeStridedLayout(srcMemrefType).getLayout().isIdentity();
 
         auto dstMemrefType = blockLoadOp.dest().getType().cast<MemRefType>();
-        const auto destMemSpace = dstMemrefType.getMemorySpaceAsInt();
+        auto destMemSpace = dstMemrefType.getMemorySpaceAsInt();
+        auto dstRowMajor = blockLoadOp.dstRowMajor();
 
         const auto tileShape = accera::ir::util::ConvertArrayAttrToIntVector(blockLoadOp.tileShape());
         const auto var = SSANameState::SSANameKind::Variable;
@@ -222,7 +216,13 @@ namespace cpp_printer
         const auto stride = std::min(wpt, vecWidth);
         const auto strategy = stringifyCacheStrategy(blockLoadOp.strategy());
 
-        os << "block_copy<CopyMode::" << strategy << ", " << srcRowMajor << ", /*DST_ROW_MAJOR*/ " << blockLoadOp.dstRowMajor() << ", /*STRIDE*/ " << stride << ", /*WPT*/ " << wpt;
+        if (!blockLoadOp.srcToDst())
+        {
+            std::swap(srcMemSpace, destMemSpace);
+            std::swap(srcRowMajor, dstRowMajor);
+        }
+
+        os << "block_copy<CopyMode::" << strategy << ", /*SRC_ROW_MAJOR*/ " << srcRowMajor << ", /*DST_ROW_MAJOR*/ " << dstRowMajor << ", /*STRIDE*/ " << stride << ", /*WPT*/ " << wpt;
         os << ", /*TILE_R,C*/" << tileShape[0] << ", " << tileShape[1] << ", /*BLOCK_DIM_X,Y,Z*/ " << blockLoadOp.blockDimX() << ", " << blockLoadOp.blockDimY();
         os << ", " << blockLoadOp.blockDimZ() << ", " << getMemSpaceEnum(srcMemSpace) << ", " << getMemSpaceEnum(destMemSpace) << ", ";
         RETURN_IF_FAILED(printVectorType(elementType, stride));
@@ -248,7 +248,7 @@ namespace cpp_printer
 
         TypeSwitch<Operation*>(op)
             .Case<vir::MMAAllocSyncOp>(handler)
-            .Case<vir::MMAFillSyncOp>(handler)
+            //.Case<vir::MMAFillSyncOp>(handler)
             .Case<vir::MMALoadSyncOp>(handler)
             .Case<vir::MMAComputeSyncOp>(handler)
             .Case<vir::MMAStoreSyncOp>(handler)

diff --git a/accera/acc-translate/src/Target/Cpp/AcceraDialectCppPrinter.h b/accera/acc-translate/src/Target/Cpp/AcceraDialectCppPrinter.h
@@ -33,7 +33,7 @@ namespace cpp_printer
         std::string getName() override { return "Accera"; }
 
         LogicalResult printOp(accera::ir::value::MMAAllocSyncOp op);
-        LogicalResult printOp(accera::ir::value::MMAFillSyncOp op);
+        //LogicalResult printOp(accera::ir::value::MMAFillSyncOp op);
         LogicalResult printOp(accera::ir::value::MMALoadSyncOp op);
         LogicalResult printOp(accera::ir::value::MMAComputeSyncOp op);
         LogicalResult printOp(accera::ir::value::MMAStoreSyncOp op);

diff --git a/accera/acc-translate/src/Target/Cpp/CppPrinter.cpp b/accera/acc-translate/src/Target/Cpp/CppPrinter.cpp
@@ -67,7 +67,7 @@ namespace cpp_printer
         {
             SmallString<128> nameStr("");
             llvm::raw_svector_ostream strm(nameStr);
-            CppPrinter cppPrinter(strm, -1);
+            CppPrinter cppPrinter(strm, parentState);
             (void)cppPrinter.printAttribute(constant->second);
             return StringRef(nameStr).copy(nameAllocator);
         }
@@ -150,7 +150,7 @@ namespace cpp_printer
 
     // The function taken from AsmPrinter
     // TODO: decouple this function from CppPrinter
-    LogicalResult CppPrinter::printFloatValue(const APFloat& apValue)
+    LogicalResult CppPrinter::printFloatValue(const APFloat& apValue, mlir::Type floatType)
     {
         // We would like to output the FP constant value in exponential notation,
         // but we cannot do this if doing so will lose precision.  Check here to
@@ -163,6 +163,7 @@ namespace cpp_printer
             // For 0 values just print something like: "{}"
             if (apValue.isZero())
             {
+                RETURN_IF_FAILED(printType(floatType));
                 os << "{}";
                 return success();
             }
@@ -183,7 +184,8 @@ namespace cpp_printer
             // (i.e., there is no precision loss).
             if (APFloat(apValue.getSemantics(), strValue).bitwiseIsEqual(apValue))
             {
-                os << strValue;
+                RETURN_IF_FAILED(printType(floatType));
+                os << "{" << strValue << "}";
                 return success();
             }
 
@@ -195,7 +197,8 @@ namespace cpp_printer
             // Make sure that we can parse the default form as a float.
             if (StringRef(strValue).contains('.'))
             {
-                os << strValue;
+                RETURN_IF_FAILED(printType(floatType));
+                os << "{" << strValue << "}";
                 return success();
             }
         }
@@ -257,7 +260,7 @@ namespace cpp_printer
         }
         else if (auto floatAttr = attr.dyn_cast<FloatAttr>())
         {
-            (void)printFloatValue(floatAttr.getValue());
+            (void)printFloatValue(floatAttr.getValue(), floatAttr.getType());
         }
         else if (auto intAttr = attr.dyn_cast<IntegerAttr>())
         {
@@ -644,7 +647,7 @@ namespace cpp_printer
         {
             SmallString<128> nameStr("");
             llvm::raw_svector_ostream strm(nameStr);
-            CppPrinter cppPrinter(strm, getPrinterState().indexBitwidth);
+            CppPrinter cppPrinter(strm, getPrinterState());
             (void)cppPrinter.printType(targetOrSrc.getType());
             vectorTypeName = strm.str().str();
         }