From 7d72e418a40dc53f057a23e7b39a6b820e2c812d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 10 Jan 2025 23:54:14 +0100 Subject: [PATCH] [HIPIFY][#1776][perl][feature][fix] Tuned the `hipify-perl` performance up to `450%` + [IMP] Changing `foreach` to `while (...) = each` gave 450% performance improvement; for example, 58 sec vs 13 sec + [FIX] Eliminated erroneous warnings like `unsupported MIOpen identifier: cudnnOpTensor` when the actual identifier is `cudnnOpTensor_t` + Decreased the generated file size by 3% + Updated the regenerated `hipify-perl` script --- bin/hipify-perl | 10540 ++++++++++++++++++++-------------------- src/CUDA2HIP_Perl.cpp | 67 +- 2 files changed, 5318 insertions(+), 5289 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 75b98742..82c0923c 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -9040,546 +9040,548 @@ sub transformHostFunctions { return $k; } +@SupportedDeviceFunctions = ( + "ynf", + "yn", + "y1f", + "y1", + "y0f", + "y0", + "truncf", + "trunc", + "tgammaf", + "tgamma", + "tanhf", + "tanh", + "tanf", + "tan", + "sqrtf", + "sqrt", + "sinpif", + "sinpi", + "sinhf", + "sinh", + "sinf", + "sincospif", + "sincospi", + "sincosf", + "sincos", + "sin", + "signbit", + "scalbnf", + "scalbn", + "scalblnf", + "scalbln", + "rsqrtf", + "rsqrt", + "roundf", + "round", + "rnormf", + "rnorm4df", + "rnorm4d", + "rnorm3df", + "rnorm3d", + "rnorm", + "rintf", + "rint", + "rhypotf", + "rhypot", + "remquof", + "remquo", + "remainderf", + "remainder", + "rcbrtf", + "rcbrt", + "powf", + "pow", + "normf", + "normcdfinvf", + "normcdfinv", + "normcdff", + "normcdf", + "norm4df", + "norm4d", + "norm3df", + "norm3d", + "norm", + "nextafterf", + "nextafter", + "nearbyintf", + "nearbyint", + "nanf", + "nan", + "modff", + "modf", + "min", + "max", + "lroundf", + "lround", + "lrintf", + "lrint", + "logf", + "logbf", + "logb", + "log2f", + "log2", + "log1pf", + "log1p", + "log10f", + "log10", + "log", + "llroundf", + "llround", + "llrintf", + "llrint", + "llabs", + "lgammaf", + "lgamma", + "ldexpf", + "ldexp", + "labs", + "jnf", + "jn", + "j1f", + "j1", + "j0f", + "j0", + "isnan", + "isinf", + "isfinite", + "ilogbf", + "ilogb", + "hypotf", + "hypot", + "htrunc", + "hsqrt", + "hsin", + "hrsqrt", + "hrint", + "hrcp", + "hlog2", + "hlog10", + "hlog", + "hfloor", + "hexp2", + "hexp10", + "hexp", + "hcos", + "hceil", + "h2trunc", + "h2sqrt", + "h2sin", + "h2rsqrt", + "h2rint", + "h2rcp", + "h2log2", + "h2log10", + "h2log", + "h2floor", + "h2exp2", + "h2exp10", + "h2exp", + "h2cos", + "h2ceil", + "frexpf", + "frexp", + "fmodf", + "fmod", + "fminf", + "fmin", + "fmaxf", + "fmax", + "fmaf", + "fma", + "floorf", + "floor", + "fdividef", + "fdimf", + "fdim", + "fabsf", + "fabs", + "expm1f", + "expm1", + "expf", + "exp2f", + "exp2", + "exp10f", + "exp10", + "exp", + "erfinvf", + "erfinv", + "erff", + "erfcxf", + "erfcx", + "erfcinvf", + "erfcinv", + "erfcf", + "erfc", + "erf", + "cyl_bessel_i1f", + "cyl_bessel_i1", + "cyl_bessel_i0f", + "cyl_bessel_i0", + "cospif", + "cospi", + "coshf", + "cosh", + "cosf", + "cos", + "copysignf", + "copysign", + "clock64", + "clock", + "ceilf", + "ceil", + "cbrtf", + "cbrt", + "atomicXor_system", + "atomicXor", + "atomicSub_system", + "atomicSub", + "atomicOr_system", + "atomicOr", + "atomicMin_system", + "atomicMin", + "atomicMax_system", + "atomicMax", + "atomicInc", + "atomicExch_system", + "atomicExch", + "atomicDec", + "atomicCAS_system", + "atomicCAS", + "atomicAnd_system", + "atomicAnd", + "atomicAdd_system", + "atomicAdd", + "atanhf", + "atanh", + "atanf", + "atan2f", + "atan2", + "atan", + "asinhf", + "asinh", + "asinf", + "asin", + "acoshf", + "acosh", + "acosf", + "acos", + "abs", + "__ushort_as_half", + "__ushort2half_rz", + "__ushort2half_ru", + "__ushort2half_rn", + "__ushort2half_rd", + "__usad", + "__urhadd", + "__umulhi", + "__umul64hi", + "__umul24", + "__ull2half_rz", + "__ull2half_ru", + "__ull2half_rn", + "__ull2half_rd", + "__ull2float_rz", + "__ull2float_ru", + "__ull2float_rn", + "__ull2float_rd", + "__ull2double_rz", + "__ull2double_ru", + "__ull2double_rn", + "__ull2double_rd", + "__uint_as_float", + "__uint2half_rz", + "__uint2half_ru", + "__uint2half_rn", + "__uint2half_rd", + "__uint2float_rz", + "__uint2float_ru", + "__uint2float_rn", + "__uint2float_rd", + "__uint2double_rn", + "__uhadd", + "__threadfence_system", + "__threadfence_block", + "__threadfence", + "__tanf", + "__syncthreads_or", + "__syncthreads_count", + "__syncthreads_and", + "__syncthreads", + "__sinf", + "__sincosf", + "__short_as_half", + "__short2half_rz", + "__short2half_ru", + "__short2half_rn", + "__short2half_rd", + "__shfl_xor_sync", + "__shfl_xor", + "__shfl_up_sync", + "__shfl_up", + "__shfl_sync", + "__shfl_down_sync", + "__shfl_down", + "__shfl", + "__saturatef", + "__sad", + "__rhadd", + "__powf", + "__popcll", + "__popc", + "__nv_cvt_halfraw_to_fp8", + "__nv_cvt_halfraw2_to_fp8x2", + "__nv_cvt_fp8x2_to_halfraw2", + "__nv_cvt_fp8_to_halfraw", + "__nv_cvt_float_to_fp8", + "__nv_cvt_float2_to_fp8x2", + "__nv_cvt_double_to_fp8", + "__nv_cvt_double2_to_fp8x2", + "__nv_cvt_bfloat16raw_to_fp8", + "__nv_cvt_bfloat16raw2_to_fp8x2", + "__mulhi", + "__mul64hi", + "__mul24", + "__match_any_sync", + "__match_all_sync", + "__lows2half2", + "__lows2bfloat162", + "__lowhigh2highlow", + "__low2half2", + "__low2half", + "__low2float", + "__longlong_as_double", + "__logf", + "__log2f", + "__log10f", + "__ll2half_rz", + "__ll2half_ru", + "__ll2half_rn", + "__ll2half_rd", + "__ll2float_rz", + "__ll2float_ru", + "__ll2float_rn", + "__ll2float_rd", + "__ll2double_rz", + "__ll2double_ru", + "__ll2double_rn", + "__ll2double_rd", + "__ldg", + "__ldcs", + "__ldcg", + "__ldca", + "__int_as_float", + "__int2half_rz", + "__int2half_ru", + "__int2half_rn", + "__int2half_rd", + "__int2float_rz", + "__int2float_ru", + "__int2float_rn", + "__int2float_rd", + "__int2double_rn", + "__hsub_sat", + "__hsub2_sat", + "__hsub2", + "__hsub", + "__hneu2", + "__hneu", + "__hneg2", + "__hneg", + "__hne2", + "__hne", + "__hmul_sat", + "__hmul2_sat", + "__hmul2", + "__hmul", + "__hmin_nan", + "__hmin", + "__hmax_nan", + "__hmax", + "__hltu2", + "__hltu", + "__hlt2", + "__hlt", + "__hleu2", + "__hleu", + "__hle2", + "__hle", + "__hisnan2", + "__hisnan", + "__hisinf", + "__hiloint2double", + "__highs2half2", + "__highs2bfloat162", + "__high2half2", + "__high2half", + "__high2float", + "__high2bfloat16", + "__hgtu2", + "__hgtu", + "__hgt2", + "__hgt", + "__hgeu2", + "__hgeu", + "__hge2", + "__hge", + "__hfma_sat", + "__hfma2_sat", + "__hfma2", + "__hfma", + "__hequ2", + "__hequ", + "__heq2", + "__heq", + "__hdiv", + "__hbneu2", + "__hbne2", + "__hbltu2", + "__hblt2", + "__hbleu2", + "__hble2", + "__hbgtu2", + "__hbgt2", + "__hbgeu2", + "__hbge2", + "__hbequ2", + "__hbeq2", + "__halves2half2", + "__half_as_ushort", + "__half_as_short", + "__half2ushort_rz", + "__half2ushort_ru", + "__half2ushort_rn", + "__half2ushort_rd", + "__half2ull_rz", + "__half2ull_ru", + "__half2ull_rn", + "__half2ull_rd", + "__half2uint_rz", + "__half2uint_ru", + "__half2uint_rn", + "__half2uint_rd", + "__half2short_rz", + "__half2short_ru", + "__half2short_rn", + "__half2short_rd", + "__half2ll_rz", + "__half2ll_ru", + "__half2ll_rn", + "__half2ll_rd", + "__half2int_rz", + "__half2int_ru", + "__half2int_rn", + "__half2int_rd", + "__half2half2", + "__half2float", + "__half22float2", + "__hadd_sat", + "__hadd2_sat", + "__hadd2", + "__hadd", + "__habs2", + "__habs", + "__h2div", + "__funnelshift_rc", + "__funnelshift_r", + "__funnelshift_lc", + "__funnelshift_l", + "__fsub_rn", + "__fsqrt_rn", + "__frsqrt_rn", + "__frcp_rn", + "__fmul_rn", + "__fmaf_rn", + "__fma_rn", + "__floats2half2_rn", + "__float_as_uint", + "__float_as_int", + "__float2ull_rz", + "__float2ull_ru", + "__float2ull_rn", + "__float2ull_rd", + "__float2uint_rz", + "__float2uint_ru", + "__float2uint_rn", + "__float2uint_rd", + "__float2ll_rz", + "__float2ll_ru", + "__float2ll_rn", + "__float2ll_rd", + "__float2int_rz", + "__float2int_ru", + "__float2int_rn", + "__float2int_rd", + "__float2half_rz", + "__float2half_ru", + "__float2half_rn", + "__float2half_rd", + "__float2half2_rn", + "__float2half", + "__float2bfloat16", + "__float22half2_rn", + "__ffsll", + "__ffs", + "__fdividef", + "__fdiv_rn", + "__fadd_rn", + "__expf", + "__exp10f", + "__dsub_rn", + "__dsqrt_rn", + "__drcp_rn", + "__double_as_longlong", + "__double2ull_rz", + "__double2ull_ru", + "__double2ull_rn", + "__double2ull_rd", + "__double2uint_rz", + "__double2uint_ru", + "__double2uint_rn", + "__double2uint_rd", + "__double2loint", + "__double2ll_rz", + "__double2ll_ru", + "__double2ll_rn", + "__double2ll_rd", + "__double2int_rz", + "__double2int_ru", + "__double2int_rn", + "__double2int_rd", + "__double2hiint", + "__double2float_rz", + "__double2float_ru", + "__double2float_rn", + "__double2float_rd", + "__double2bfloat16", + "__dmul_rn", + "__ddiv_rn", + "__dadd_rn", + "__cosf", + "__clzll", + "__clz", + "__byte_perm", + "__brevll", + "__brev", + "__bfloat162float", + "__bfloat162bfloat162", + "__bfloat1622float2", + "__ballot_sync", + "__ballot", + "__assertfail", + "__assert_fail", + "__any_sync", + "__any", + "__all_sync", + "__all", + "__activemask" + ); + sub countSupportedDeviceFunctions { my $k = 0; - foreach $func ( - "ynf", - "yn", - "y1f", - "y1", - "y0f", - "y0", - "truncf", - "trunc", - "tgammaf", - "tgamma", - "tanhf", - "tanh", - "tanf", - "tan", - "sqrtf", - "sqrt", - "sinpif", - "sinpi", - "sinhf", - "sinh", - "sinf", - "sincospif", - "sincospi", - "sincosf", - "sincos", - "sin", - "signbit", - "scalbnf", - "scalbn", - "scalblnf", - "scalbln", - "rsqrtf", - "rsqrt", - "roundf", - "round", - "rnormf", - "rnorm4df", - "rnorm4d", - "rnorm3df", - "rnorm3d", - "rnorm", - "rintf", - "rint", - "rhypotf", - "rhypot", - "remquof", - "remquo", - "remainderf", - "remainder", - "rcbrtf", - "rcbrt", - "powf", - "pow", - "normf", - "normcdfinvf", - "normcdfinv", - "normcdff", - "normcdf", - "norm4df", - "norm4d", - "norm3df", - "norm3d", - "norm", - "nextafterf", - "nextafter", - "nearbyintf", - "nearbyint", - "nanf", - "nan", - "modff", - "modf", - "min", - "max", - "lroundf", - "lround", - "lrintf", - "lrint", - "logf", - "logbf", - "logb", - "log2f", - "log2", - "log1pf", - "log1p", - "log10f", - "log10", - "log", - "llroundf", - "llround", - "llrintf", - "llrint", - "llabs", - "lgammaf", - "lgamma", - "ldexpf", - "ldexp", - "labs", - "jnf", - "jn", - "j1f", - "j1", - "j0f", - "j0", - "isnan", - "isinf", - "isfinite", - "ilogbf", - "ilogb", - "hypotf", - "hypot", - "htrunc", - "hsqrt", - "hsin", - "hrsqrt", - "hrint", - "hrcp", - "hlog2", - "hlog10", - "hlog", - "hfloor", - "hexp2", - "hexp10", - "hexp", - "hcos", - "hceil", - "h2trunc", - "h2sqrt", - "h2sin", - "h2rsqrt", - "h2rint", - "h2rcp", - "h2log2", - "h2log10", - "h2log", - "h2floor", - "h2exp2", - "h2exp10", - "h2exp", - "h2cos", - "h2ceil", - "frexpf", - "frexp", - "fmodf", - "fmod", - "fminf", - "fmin", - "fmaxf", - "fmax", - "fmaf", - "fma", - "floorf", - "floor", - "fdividef", - "fdimf", - "fdim", - "fabsf", - "fabs", - "expm1f", - "expm1", - "expf", - "exp2f", - "exp2", - "exp10f", - "exp10", - "exp", - "erfinvf", - "erfinv", - "erff", - "erfcxf", - "erfcx", - "erfcinvf", - "erfcinv", - "erfcf", - "erfc", - "erf", - "cyl_bessel_i1f", - "cyl_bessel_i1", - "cyl_bessel_i0f", - "cyl_bessel_i0", - "cospif", - "cospi", - "coshf", - "cosh", - "cosf", - "cos", - "copysignf", - "copysign", - "clock64", - "clock", - "ceilf", - "ceil", - "cbrtf", - "cbrt", - "atomicXor_system", - "atomicXor", - "atomicSub_system", - "atomicSub", - "atomicOr_system", - "atomicOr", - "atomicMin_system", - "atomicMin", - "atomicMax_system", - "atomicMax", - "atomicInc", - "atomicExch_system", - "atomicExch", - "atomicDec", - "atomicCAS_system", - "atomicCAS", - "atomicAnd_system", - "atomicAnd", - "atomicAdd_system", - "atomicAdd", - "atanhf", - "atanh", - "atanf", - "atan2f", - "atan2", - "atan", - "asinhf", - "asinh", - "asinf", - "asin", - "acoshf", - "acosh", - "acosf", - "acos", - "abs", - "__ushort_as_half", - "__ushort2half_rz", - "__ushort2half_ru", - "__ushort2half_rn", - "__ushort2half_rd", - "__usad", - "__urhadd", - "__umulhi", - "__umul64hi", - "__umul24", - "__ull2half_rz", - "__ull2half_ru", - "__ull2half_rn", - "__ull2half_rd", - "__ull2float_rz", - "__ull2float_ru", - "__ull2float_rn", - "__ull2float_rd", - "__ull2double_rz", - "__ull2double_ru", - "__ull2double_rn", - "__ull2double_rd", - "__uint_as_float", - "__uint2half_rz", - "__uint2half_ru", - "__uint2half_rn", - "__uint2half_rd", - "__uint2float_rz", - "__uint2float_ru", - "__uint2float_rn", - "__uint2float_rd", - "__uint2double_rn", - "__uhadd", - "__threadfence_system", - "__threadfence_block", - "__threadfence", - "__tanf", - "__syncthreads_or", - "__syncthreads_count", - "__syncthreads_and", - "__syncthreads", - "__sinf", - "__sincosf", - "__short_as_half", - "__short2half_rz", - "__short2half_ru", - "__short2half_rn", - "__short2half_rd", - "__shfl_xor_sync", - "__shfl_xor", - "__shfl_up_sync", - "__shfl_up", - "__shfl_sync", - "__shfl_down_sync", - "__shfl_down", - "__shfl", - "__saturatef", - "__sad", - "__rhadd", - "__powf", - "__popcll", - "__popc", - "__nv_cvt_halfraw_to_fp8", - "__nv_cvt_halfraw2_to_fp8x2", - "__nv_cvt_fp8x2_to_halfraw2", - "__nv_cvt_fp8_to_halfraw", - "__nv_cvt_float_to_fp8", - "__nv_cvt_float2_to_fp8x2", - "__nv_cvt_double_to_fp8", - "__nv_cvt_double2_to_fp8x2", - "__nv_cvt_bfloat16raw_to_fp8", - "__nv_cvt_bfloat16raw2_to_fp8x2", - "__mulhi", - "__mul64hi", - "__mul24", - "__match_any_sync", - "__match_all_sync", - "__lows2half2", - "__lows2bfloat162", - "__lowhigh2highlow", - "__low2half2", - "__low2half", - "__low2float", - "__longlong_as_double", - "__logf", - "__log2f", - "__log10f", - "__ll2half_rz", - "__ll2half_ru", - "__ll2half_rn", - "__ll2half_rd", - "__ll2float_rz", - "__ll2float_ru", - "__ll2float_rn", - "__ll2float_rd", - "__ll2double_rz", - "__ll2double_ru", - "__ll2double_rn", - "__ll2double_rd", - "__ldg", - "__ldcs", - "__ldcg", - "__ldca", - "__int_as_float", - "__int2half_rz", - "__int2half_ru", - "__int2half_rn", - "__int2half_rd", - "__int2float_rz", - "__int2float_ru", - "__int2float_rn", - "__int2float_rd", - "__int2double_rn", - "__hsub_sat", - "__hsub2_sat", - "__hsub2", - "__hsub", - "__hneu2", - "__hneu", - "__hneg2", - "__hneg", - "__hne2", - "__hne", - "__hmul_sat", - "__hmul2_sat", - "__hmul2", - "__hmul", - "__hmin_nan", - "__hmin", - "__hmax_nan", - "__hmax", - "__hltu2", - "__hltu", - "__hlt2", - "__hlt", - "__hleu2", - "__hleu", - "__hle2", - "__hle", - "__hisnan2", - "__hisnan", - "__hisinf", - "__hiloint2double", - "__highs2half2", - "__highs2bfloat162", - "__high2half2", - "__high2half", - "__high2float", - "__high2bfloat16", - "__hgtu2", - "__hgtu", - "__hgt2", - "__hgt", - "__hgeu2", - "__hgeu", - "__hge2", - "__hge", - "__hfma_sat", - "__hfma2_sat", - "__hfma2", - "__hfma", - "__hequ2", - "__hequ", - "__heq2", - "__heq", - "__hdiv", - "__hbneu2", - "__hbne2", - "__hbltu2", - "__hblt2", - "__hbleu2", - "__hble2", - "__hbgtu2", - "__hbgt2", - "__hbgeu2", - "__hbge2", - "__hbequ2", - "__hbeq2", - "__halves2half2", - "__half_as_ushort", - "__half_as_short", - "__half2ushort_rz", - "__half2ushort_ru", - "__half2ushort_rn", - "__half2ushort_rd", - "__half2ull_rz", - "__half2ull_ru", - "__half2ull_rn", - "__half2ull_rd", - "__half2uint_rz", - "__half2uint_ru", - "__half2uint_rn", - "__half2uint_rd", - "__half2short_rz", - "__half2short_ru", - "__half2short_rn", - "__half2short_rd", - "__half2ll_rz", - "__half2ll_ru", - "__half2ll_rn", - "__half2ll_rd", - "__half2int_rz", - "__half2int_ru", - "__half2int_rn", - "__half2int_rd", - "__half2half2", - "__half2float", - "__half22float2", - "__hadd_sat", - "__hadd2_sat", - "__hadd2", - "__hadd", - "__habs2", - "__habs", - "__h2div", - "__funnelshift_rc", - "__funnelshift_r", - "__funnelshift_lc", - "__funnelshift_l", - "__fsub_rn", - "__fsqrt_rn", - "__frsqrt_rn", - "__frcp_rn", - "__fmul_rn", - "__fmaf_rn", - "__fma_rn", - "__floats2half2_rn", - "__float_as_uint", - "__float_as_int", - "__float2ull_rz", - "__float2ull_ru", - "__float2ull_rn", - "__float2ull_rd", - "__float2uint_rz", - "__float2uint_ru", - "__float2uint_rn", - "__float2uint_rd", - "__float2ll_rz", - "__float2ll_ru", - "__float2ll_rn", - "__float2ll_rd", - "__float2int_rz", - "__float2int_ru", - "__float2int_rn", - "__float2int_rd", - "__float2half_rz", - "__float2half_ru", - "__float2half_rn", - "__float2half_rd", - "__float2half2_rn", - "__float2half", - "__float2bfloat16", - "__float22half2_rn", - "__ffsll", - "__ffs", - "__fdividef", - "__fdiv_rn", - "__fadd_rn", - "__expf", - "__exp10f", - "__dsub_rn", - "__dsqrt_rn", - "__drcp_rn", - "__double_as_longlong", - "__double2ull_rz", - "__double2ull_ru", - "__double2ull_rn", - "__double2ull_rd", - "__double2uint_rz", - "__double2uint_ru", - "__double2uint_rn", - "__double2uint_rd", - "__double2loint", - "__double2ll_rz", - "__double2ll_ru", - "__double2ll_rn", - "__double2ll_rd", - "__double2int_rz", - "__double2int_ru", - "__double2int_rn", - "__double2int_rd", - "__double2hiint", - "__double2float_rz", - "__double2float_ru", - "__double2float_rn", - "__double2float_rd", - "__double2bfloat16", - "__dmul_rn", - "__ddiv_rn", - "__dadd_rn", - "__cosf", - "__clzll", - "__clz", - "__byte_perm", - "__brevll", - "__brev", - "__bfloat162float", - "__bfloat162bfloat162", - "__bfloat1622float2", - "__ballot_sync", - "__ballot", - "__assertfail", - "__assert_fail", - "__any_sync", - "__any", - "__all_sync", - "__all", - "__activemask" - ) + while (my($func) = each %SupportedDeviceFunctions) { # match device function from the list, except those, which have a namespace prefix (aka somenamespace::umin(...)); # function with only global namespace qualifier '::' (aka ::umin(...)) should be treated as a device function (and warned as well as without such qualifier); @@ -9592,275 +9594,277 @@ sub countSupportedDeviceFunctions { return $k; } +@UnsupportedDeviceFunctions = ( + "umul24", + "umin", + "umax", + "ullmin", + "ullmax", + "uint_as_float", + "uint2float", + "saturate", + "mulhi", + "mul64hi", + "mul24", + "make_half2", + "make_bfloat162", + "llmin", + "llmax", + "int_as_float", + "int2float", + "float_as_uint", + "float_as_int", + "float2int", + "fdivide", + "_ldsign", + "_fdsign", + "__vsubus4", + "__vsubus2", + "__vsubss4", + "__vsubss2", + "__vsub4", + "__vsub2", + "__vsetne4", + "__vsetne2", + "__vsetltu4", + "__vsetltu2", + "__vsetlts4", + "__vsetlts2", + "__vsetleu4", + "__vsetleu2", + "__vsetles4", + "__vsetles2", + "__vsetgtu4", + "__vsetgts4", + "__vsetgts2", + "__vsetgeu4", + "__vsetgeu2", + "__vsetges4", + "__vsetges2", + "__vseteq4", + "__vseteq2", + "__vsadu4", + "__vsadu2", + "__vsads4", + "__vsads2", + "__vnegss4", + "__vnegss2", + "__vneg4", + "__vneg2", + "__vminu4", + "__vminu2", + "__vmins4", + "__vmins2", + "__vmaxu4", + "__vmaxu2", + "__vmaxs4", + "__vmaxs2", + "__vhaddu4", + "__vhaddu2", + "__vcmpne4", + "__vcmpne2", + "__vcmpltu4", + "__vcmpltu2", + "__vcmplts4", + "__vcmplts2", + "__vcmpleu4", + "__vcmples4", + "__vcmples2", + "__vcmpgtu4", + "__vcmpgtu2", + "__vcmpgts4", + "__vcmpgts2", + "__vcmpgeu4", + "__vcmpgeu2", + "__vcmpges4", + "__vcmpges2", + "__vcmpeq4", + "__vcmpeq2", + "__vavgu4", + "__vavgu2", + "__vavgs4", + "__vavgs2", + "__vaddus4", + "__vaddus2", + "__vaddss4", + "__vaddss2", + "__vadd4", + "__vadd2", + "__vabsss4", + "__vabsss2", + "__vabsdiffu4", + "__vabsdiffu2", + "__vabsdiffs4", + "__vabsdiffs2", + "__vabs4", + "__vabs2", + "__ushort_as_bfloat16", + "__ushort2bfloat16_rz", + "__ushort2bfloat16_ru", + "__ushort2bfloat16_rn", + "__ushort2bfloat16_rd", + "__ull2bfloat16_rz", + "__ull2bfloat16_ru", + "__ull2bfloat16_rn", + "__ull2bfloat16_rd", + "__uint2bfloat16_rz", + "__uint2bfloat16_ru", + "__uint2bfloat16_rn", + "__uint2bfloat16_rd", + "__trap", + "__stwt", + "__stwb", + "__stcs", + "__stcg", + "__signbitl", + "__signbitf", + "__signbit", + "__short_as_bfloat16", + "__short2bfloat16_rz", + "__short2bfloat16_ru", + "__short2bfloat16_rn", + "__short2bfloat16_rd", + "__prof_trigger", + "__pm3", + "__pm2", + "__pm1", + "__pm0", + "__low2bfloat162", + "__low2bfloat16", + "__ll2bfloat16_rz", + "__ll2bfloat16_ru", + "__ll2bfloat16_rn", + "__ll2bfloat16_rd", + "__ldlu", + "__ldcv", + "__isnanl", + "__isnanf", + "__isnan", + "__isinfl", + "__isinff", + "__isinf", + "__int2bfloat16_rz", + "__int2bfloat16_ru", + "__int2bfloat16_rn", + "__int2bfloat16_rd", + "__hsub_rn", + "__hsub2_rn", + "__hneu2_mask", + "__hne2_mask", + "__hmul_rn", + "__hmul2_rn", + "__hmin2_nan", + "__hmin2", + "__hmax2_nan", + "__hmax2", + "__hltu2_mask", + "__hlt2_mask", + "__hleu2_mask", + "__hle2_mask", + "__high2bfloat162", + "__hgtu2_mask", + "__hgt2_mask", + "__hgeu2_mask", + "__hge2_mask", + "__hfma_relu", + "__hfma2_relu", + "__hequ2_mask", + "__heq2_mask", + "__hcmadd", + "__halves2bfloat162", + "__half2uchar_rz", + "__half2char_rz", + "__hadd_rn", + "__hadd2_rn", + "__fsub_rz", + "__fsub_ru", + "__fsub_rd", + "__fsqrt_rz", + "__fsqrt_ru", + "__fsqrt_rd", + "__frcp_rz", + "__frcp_ru", + "__frcp_rd", + "__fmul_rz", + "__fmul_ru", + "__fmul_rd", + "__fmaf_rz", + "__fmaf_ru", + "__fmaf_rd", + "__fma_rz", + "__fma_ru", + "__fma_rd", + "__floats2bfloat162_rn", + "__float2bfloat16_rz", + "__float2bfloat16_ru", + "__float2bfloat16_rn", + "__float2bfloat16_rd", + "__float2bfloat162_rn", + "__float22bfloat162_rn", + "__finitel", + "__finitef", + "__finite", + "__fdiv_rz", + "__fdiv_ru", + "__fdiv_rd", + "__fadd_rz", + "__fadd_ru", + "__fadd_rd", + "__dsub_rz", + "__dsub_ru", + "__dsub_rd", + "__dsqrt_rz", + "__dsqrt_ru", + "__dsqrt_rd", + "__drcp_rz", + "__drcp_ru", + "__drcp_rd", + "__double2half", + "__dmul_rz", + "__dmul_ru", + "__dmul_rd", + "__ddiv_rz", + "__ddiv_ru", + "__ddiv_rd", + "__dadd_rz", + "__dadd_ru", + "__dadd_rd", + "__brkpt", + "__bfloat16_as_ushort", + "__bfloat16_as_short", + "__bfloat162ushort_rz", + "__bfloat162ushort_ru", + "__bfloat162ushort_rn", + "__bfloat162ushort_rd", + "__bfloat162ull_rz", + "__bfloat162ull_ru", + "__bfloat162ull_rn", + "__bfloat162ull_rd", + "__bfloat162uint_rz", + "__bfloat162uint_ru", + "__bfloat162uint_rn", + "__bfloat162uint_rd", + "__bfloat162uchar_rz", + "__bfloat162short_rz", + "__bfloat162short_ru", + "__bfloat162short_rn", + "__bfloat162short_rd", + "__bfloat162ll_rz", + "__bfloat162ll_ru", + "__bfloat162ll_rn", + "__bfloat162ll_rd", + "__bfloat162int_rz", + "__bfloat162int_ru", + "__bfloat162int_rn", + "__bfloat162int_rd", + "__bfloat162char_rz", + "_Pow_int" + ); + sub warnUnsupportedDeviceFunctions { my $line_num = shift; my $k = 0; - foreach $func ( - "umul24", - "umin", - "umax", - "ullmin", - "ullmax", - "uint_as_float", - "uint2float", - "saturate", - "mulhi", - "mul64hi", - "mul24", - "make_half2", - "make_bfloat162", - "llmin", - "llmax", - "int_as_float", - "int2float", - "float_as_uint", - "float_as_int", - "float2int", - "fdivide", - "_ldsign", - "_fdsign", - "__vsubus4", - "__vsubus2", - "__vsubss4", - "__vsubss2", - "__vsub4", - "__vsub2", - "__vsetne4", - "__vsetne2", - "__vsetltu4", - "__vsetltu2", - "__vsetlts4", - "__vsetlts2", - "__vsetleu4", - "__vsetleu2", - "__vsetles4", - "__vsetles2", - "__vsetgtu4", - "__vsetgts4", - "__vsetgts2", - "__vsetgeu4", - "__vsetgeu2", - "__vsetges4", - "__vsetges2", - "__vseteq4", - "__vseteq2", - "__vsadu4", - "__vsadu2", - "__vsads4", - "__vsads2", - "__vnegss4", - "__vnegss2", - "__vneg4", - "__vneg2", - "__vminu4", - "__vminu2", - "__vmins4", - "__vmins2", - "__vmaxu4", - "__vmaxu2", - "__vmaxs4", - "__vmaxs2", - "__vhaddu4", - "__vhaddu2", - "__vcmpne4", - "__vcmpne2", - "__vcmpltu4", - "__vcmpltu2", - "__vcmplts4", - "__vcmplts2", - "__vcmpleu4", - "__vcmples4", - "__vcmples2", - "__vcmpgtu4", - "__vcmpgtu2", - "__vcmpgts4", - "__vcmpgts2", - "__vcmpgeu4", - "__vcmpgeu2", - "__vcmpges4", - "__vcmpges2", - "__vcmpeq4", - "__vcmpeq2", - "__vavgu4", - "__vavgu2", - "__vavgs4", - "__vavgs2", - "__vaddus4", - "__vaddus2", - "__vaddss4", - "__vaddss2", - "__vadd4", - "__vadd2", - "__vabsss4", - "__vabsss2", - "__vabsdiffu4", - "__vabsdiffu2", - "__vabsdiffs4", - "__vabsdiffs2", - "__vabs4", - "__vabs2", - "__ushort_as_bfloat16", - "__ushort2bfloat16_rz", - "__ushort2bfloat16_ru", - "__ushort2bfloat16_rn", - "__ushort2bfloat16_rd", - "__ull2bfloat16_rz", - "__ull2bfloat16_ru", - "__ull2bfloat16_rn", - "__ull2bfloat16_rd", - "__uint2bfloat16_rz", - "__uint2bfloat16_ru", - "__uint2bfloat16_rn", - "__uint2bfloat16_rd", - "__trap", - "__stwt", - "__stwb", - "__stcs", - "__stcg", - "__signbitl", - "__signbitf", - "__signbit", - "__short_as_bfloat16", - "__short2bfloat16_rz", - "__short2bfloat16_ru", - "__short2bfloat16_rn", - "__short2bfloat16_rd", - "__prof_trigger", - "__pm3", - "__pm2", - "__pm1", - "__pm0", - "__low2bfloat162", - "__low2bfloat16", - "__ll2bfloat16_rz", - "__ll2bfloat16_ru", - "__ll2bfloat16_rn", - "__ll2bfloat16_rd", - "__ldlu", - "__ldcv", - "__isnanl", - "__isnanf", - "__isnan", - "__isinfl", - "__isinff", - "__isinf", - "__int2bfloat16_rz", - "__int2bfloat16_ru", - "__int2bfloat16_rn", - "__int2bfloat16_rd", - "__hsub_rn", - "__hsub2_rn", - "__hneu2_mask", - "__hne2_mask", - "__hmul_rn", - "__hmul2_rn", - "__hmin2_nan", - "__hmin2", - "__hmax2_nan", - "__hmax2", - "__hltu2_mask", - "__hlt2_mask", - "__hleu2_mask", - "__hle2_mask", - "__high2bfloat162", - "__hgtu2_mask", - "__hgt2_mask", - "__hgeu2_mask", - "__hge2_mask", - "__hfma_relu", - "__hfma2_relu", - "__hequ2_mask", - "__heq2_mask", - "__hcmadd", - "__halves2bfloat162", - "__half2uchar_rz", - "__half2char_rz", - "__hadd_rn", - "__hadd2_rn", - "__fsub_rz", - "__fsub_ru", - "__fsub_rd", - "__fsqrt_rz", - "__fsqrt_ru", - "__fsqrt_rd", - "__frcp_rz", - "__frcp_ru", - "__frcp_rd", - "__fmul_rz", - "__fmul_ru", - "__fmul_rd", - "__fmaf_rz", - "__fmaf_ru", - "__fmaf_rd", - "__fma_rz", - "__fma_ru", - "__fma_rd", - "__floats2bfloat162_rn", - "__float2bfloat16_rz", - "__float2bfloat16_ru", - "__float2bfloat16_rn", - "__float2bfloat16_rd", - "__float2bfloat162_rn", - "__float22bfloat162_rn", - "__finitel", - "__finitef", - "__finite", - "__fdiv_rz", - "__fdiv_ru", - "__fdiv_rd", - "__fadd_rz", - "__fadd_ru", - "__fadd_rd", - "__dsub_rz", - "__dsub_ru", - "__dsub_rd", - "__dsqrt_rz", - "__dsqrt_ru", - "__dsqrt_rd", - "__drcp_rz", - "__drcp_ru", - "__drcp_rd", - "__double2half", - "__dmul_rz", - "__dmul_ru", - "__dmul_rd", - "__ddiv_rz", - "__ddiv_ru", - "__ddiv_rd", - "__dadd_rz", - "__dadd_ru", - "__dadd_rd", - "__brkpt", - "__bfloat16_as_ushort", - "__bfloat16_as_short", - "__bfloat162ushort_rz", - "__bfloat162ushort_ru", - "__bfloat162ushort_rn", - "__bfloat162ushort_rd", - "__bfloat162ull_rz", - "__bfloat162ull_ru", - "__bfloat162ull_rn", - "__bfloat162ull_rd", - "__bfloat162uint_rz", - "__bfloat162uint_ru", - "__bfloat162uint_rn", - "__bfloat162uint_rd", - "__bfloat162uchar_rz", - "__bfloat162short_rz", - "__bfloat162short_ru", - "__bfloat162short_rn", - "__bfloat162short_rd", - "__bfloat162ll_rz", - "__bfloat162ll_ru", - "__bfloat162ll_rn", - "__bfloat162ll_rd", - "__bfloat162int_rz", - "__bfloat162int_ru", - "__bfloat162int_rn", - "__bfloat162int_rd", - "__bfloat162char_rz", - "_Pow_int" - ) + while (my($func) = each %UnsupportedDeviceFunctions) { # match device function from the list, except those, which have a namespace prefix (aka somenamespace::umin(...)); # function with only global namespace qualifier '::' (aka ::umin(...)) should be treated as a device function (and warned as well as without such qualifier); @@ -9926,1401 +9930,1403 @@ sub warnRemovedFunctions { return $k; } +@HipOnlyUnsupportedFunctions = ( + "cutensorWriteKernelCacheToFile", + "cutensorTensorDescriptor", + "cutensorReduce", + "cutensorReadKernelCacheFromFile", + "cutensorPlanPreferenceSetAttribute", + "cutensorPlanPreferenceAttribute_t", + "cutensorPlanGetAttribute", + "cutensorPlanAttribute_t", + "cutensorPlan", + "cutensorPermute", + "cutensorOperationDescriptorSetAttribute", + "cutensorOperationDescriptorGetAttribute", + "cutensorOperationDescriptorAttribute_t", + "cutensorMgTensorDescriptor_t", + "cutensorMgTensorDescriptor_s", + "cutensorMgHostDevice_t", + "cutensorMgHandle_t", + "cutensorMgHandle_s", + "cutensorMgDestroyTensorDescriptor", + "cutensorMgDestroyCopyPlan", + "cutensorMgDestroyCopyDescriptor", + "cutensorMgDestroyContractionPlan", + "cutensorMgDestroyContractionFind", + "cutensorMgDestroyContractionDescriptor", + "cutensorMgDestroy", + "cutensorMgCreateTensorDescriptor", + "cutensorMgCreateCopyPlan", + "cutensorMgCreateCopyDescriptor", + "cutensorMgCreateContractionPlan", + "cutensorMgCreateContractionFind", + "cutensorMgCreateContractionDescriptor", + "cutensorMgCreate", + "cutensorMgCopyPlan_t", + "cutensorMgCopyPlan_s", + "cutensorMgCopyGetWorkspace", + "cutensorMgCopyDescriptor_t", + "cutensorMgCopyDescriptor_s", + "cutensorMgCopy", + "cutensorMgContractionPlan_t", + "cutensorMgContractionPlan_s", + "cutensorMgContractionGetWorkspace", + "cutensorMgContractionFind_t", + "cutensorMgContractionFind_s", + "cutensorMgContractionFindSetAttribute", + "cutensorMgContractionFindAttribute_t", + "cutensorMgContractionDescriptor_t", + "cutensorMgContractionDescriptor_s", + "cutensorMgContraction", + "cutensorMgAlgo_t", + "cutensorJitMode_t", + "cutensorHandleWritePlanCacheToFile", + "cutensorHandleResizePlanCache", + "cutensorHandleReadPlanCacheFromFile", + "cutensorHandle", + "cutensorGetVersion", + "cutensorEstimateWorkspaceSize", + "cutensorElementwiseTrinaryExecute", + "cutensorElementwiseBinaryExecute", + "cutensorDestroyTensorDescriptor", + "cutensorDestroyPlanPreference", + "cutensorDestroyPlan", + "cutensorDestroyOperationDescriptor", + "cutensorCreateTensorDescriptor", + "cutensorCreateReduction", + "cutensorCreatePlanPreference", + "cutensorCreatePlan", + "cutensorCreatePermutation", + "cutensorCreateElementwiseTrinary", + "cutensorCreateElementwiseBinary", + "cutensorCreateContraction", + "cutensorCacheMode_t", + "cutensorAutotuneMode_t", + "cusparseZhybsv_solve", + "cusparseZhybsv_analysis", + "cusparseZhyb2dense", + "cusparseZhyb2csc", + "cusparseZgtsv_nopivot", + "cusparseZgtsvStridedBatch", + "cusparseZgtsv", + "cusparseZgebsr2gebsr_bufferSizeExt", + "cusparseZgebsr2gebsc_bufferSizeExt", + "cusparseZdense2hyb", + "cusparseZcsrsv_solve", + "cusparseZcsrsv_analysis", + "cusparseZcsrsm_solve", + "cusparseZcsrsm_analysis", + "cusparseZcsrmv_mp", + "cusparseZcsrilu0", + "cusparseZcsric0", + "cusparseZcsr2gebsr_bufferSizeExt", + "cusparseZcsc2hyb", + "cusparseZbsrsm2_bufferSizeExt", + "cusparseZbsrilu02_bufferSizeExt", + "cusparseZbsric02_bufferSizeExt", + "cusparseXgebsr2csr", + "cusparseSpVecDescr", + "cusparseSpSV_updateMatrix", + "cusparseSpSVUpdate_t", + "cusparseSpSM_updateMatrix", + "cusparseSpSMUpdate_t", + "cusparseSpMatSetNumBatches", + "cusparseSpMatGetNumBatches", + "cusparseSpMatDescr", + "cusparseSpMMOp_destroyPlan", + "cusparseSpMMOp_createPlan", + "cusparseSpMMOpPlan_t", + "cusparseSpMMOpPlan", + "cusparseSpMMOpAlg_t", + "cusparseSpMMOp", + "cusparseSpGEMM_getNumProducts", + "cusparseSpGEMM_estimateMemory", + "cusparseSolveAnalysisInfo_t", + "cusparseSolveAnalysisInfo", + "cusparseSideMode_t", + "cusparseShybsv_solve", + "cusparseShybsv_analysis", + "cusparseShyb2dense", + "cusparseShyb2csc", + "cusparseSgtsv_nopivot", + "cusparseSgtsvStridedBatch", + "cusparseSgtsv", + "cusparseSgebsr2gebsr_bufferSizeExt", + "cusparseSgebsr2gebsc_bufferSizeExt", + "cusparseSdense2hyb", + "cusparseScsrsv_solve", + "cusparseScsrsv_analysis", + "cusparseScsrsm_solve", + "cusparseScsrsm_analysis", + "cusparseScsrmv_mp", + "cusparseScsrilu0", + "cusparseScsric0", + "cusparseScsr2gebsr_bufferSizeExt", + "cusparseScsc2hyb", + "cusparseSbsrsm2_bufferSizeExt", + "cusparseSbsrilu02_bufferSizeExt", + "cusparseSbsric02_bufferSizeExt", + "cusparseMatDescr", + "cusparseLoggerSetMask", + "cusparseLoggerSetLevel", + "cusparseLoggerSetFile", + "cusparseLoggerSetCallback", + "cusparseLoggerOpenFile", + "cusparseLoggerForceDisable", + "cusparseLoggerCallback_t", + "cusparseHybMat", + "cusparseHpruneDense2csr_bufferSizeExt", + "cusparseHpruneDense2csrNnzByPercentage", + "cusparseHpruneDense2csrNnz", + "cusparseHpruneDense2csrByPercentage_bufferSizeExt", + "cusparseHpruneDense2csrByPercentage", + "cusparseHpruneDense2csr", + "cusparseHpruneCsr2csr_bufferSizeExt", + "cusparseHpruneCsr2csrNnzByPercentage", + "cusparseHpruneCsr2csrNnz", + "cusparseHpruneCsr2csrByPercentage_bufferSizeExt", + "cusparseHpruneCsr2csrByPercentage", + "cusparseHpruneCsr2csr", + "cusparseGetLevelInfo", + "cusparseDnVecDescr", + "cusparseDnMatDescr", + "cusparseDhybsv_solve", + "cusparseDhybsv_analysis", + "cusparseDhyb2dense", + "cusparseDhyb2csc", + "cusparseDgtsv_nopivot", + "cusparseDgtsvStridedBatch", + "cusparseDgtsv", + "cusparseDgebsr2gebsr_bufferSizeExt", + "cusparseDgebsr2gebsc_bufferSizeExt", + "cusparseDestroySolveAnalysisInfo", + "cusparseDdense2hyb", + "cusparseDcsrsv_solve", + "cusparseDcsrsv_analysis", + "cusparseDcsrsm_solve", + "cusparseDcsrsm_analysis", + "cusparseDcsrmv_mp", + "cusparseDcsrilu0", + "cusparseDcsric0", + "cusparseDcsr2gebsr_bufferSizeExt", + "cusparseDcsc2hyb", + "cusparseDbsrsm2_bufferSizeExt", + "cusparseDbsrilu02_bufferSizeExt", + "cusparseDbsric02_bufferSizeExt", + "cusparseCsrsv_solveEx", + "cusparseCsrsv_analysisEx", + "cusparseCsrmvEx_bufferSize", + "cusparseCsrmvEx", + "cusparseCsrilu0Ex", + "cusparseCsr2cscEx", + "cusparseCreateSolveAnalysisInfo", + "cusparseCreateSlicedEll", + "cusparseCreateConstSlicedEll", + "cusparseCreateConstBsr", + "cusparseCreateBsr", + "cusparseContext", + "cusparseConstrainedGeMM_bufferSize", + "cusparseConstrainedGeMM", + "cusparseColorInfo", + "cusparseColorAlg_t", + "cusparseChybsv_solve", + "cusparseChybsv_analysis", + "cusparseChyb2dense", + "cusparseChyb2csc", + "cusparseCgtsv_nopivot", + "cusparseCgtsvStridedBatch", + "cusparseCgtsv", + "cusparseCgebsr2gebsr_bufferSizeExt", + "cusparseCgebsr2gebsc_bufferSizeExt", + "cusparseCdense2hyb", + "cusparseCcsrsv_solve", + "cusparseCcsrsv_analysis", + "cusparseCcsrsm_solve", + "cusparseCcsrsm_analysis", + "cusparseCcsrmv_mp", + "cusparseCcsrilu0", + "cusparseCcsric0", + "cusparseCcsr2gebsr_bufferSizeExt", + "cusparseCcsc2hyb", + "cusparseCbsrsm2_bufferSizeExt", + "cusparseCbsrilu02_bufferSizeExt", + "cusparseCbsric02_bufferSizeExt", + "cusparseBsrSetStridedBatch", + "cusparseAlgMode_t", + "curand_mtgp32_specific", + "curand_mtgp32_single_specific", + "curand_mtgp32_single", + "curand_Philox4x32_10", + "curandMethod_t", + "curandMethod", + "curandHistogramM2_t", + "curandHistogramM2_st", + "curandHistogramM2V_t", + "curandHistogramM2V_st", + "curandHistogramM2K_t", + "curandHistogramM2K_st", + "curandGetProperty", + "curandDistribution_t", + "curandDistribution_st", + "curandDistributionShift_t", + "curandDistributionShift_st", + "curandDistributionM2Shift_t", + "curandDistributionM2Shift_st", + "cublasZtrttp", + "cublasZtpttr", + "cublasZmatinvBatched", + "cublasZgemm3m_64", + "cublasZgemm3m", + "cublasXerbla", + "cublasUint8gemmBias", + "cublasTSTgemvStridedBatched_64", + "cublasTSTgemvStridedBatched", + "cublasTSTgemvBatched_64", + "cublasTSTgemvBatched", + "cublasTSSgemvStridedBatched_64", + "cublasTSSgemvStridedBatched", + "cublasTSSgemvBatched_64", + "cublasTSSgemvBatched", + "cublasSwapEx_64", + "cublasSwapEx", + "cublasStrttp", + "cublasStpttr", + "cublasSmatinvBatched", + "cublasShutdown", + "cublasSgemmGroupedBatched_64", + "cublasSgemmGroupedBatched", + "cublasSgemmEx_64", + "cublasSgemmEx", + "cublasSetVector_64", + "cublasSetVectorAsync_64", + "cublasSetSmCountTarget", + "cublasSetMatrix_64", + "cublasSetMatrixAsync_64", + "cublasSetLoggerCallback", + "cublasSetKernelStream", + "cublasRotmgEx", + "cublasRotmEx_64", + "cublasRotmEx", + "cublasRotgEx", + "cublasMigrateComputeType", + "cublasLtReductionScheme_t", + "cublasLtPointerModeMask_t", + "cublasLtNumericalImplFlags_t", + "cublasLtMatrixTransformDescInit", + "cublasLtMatrixLayoutInit", + "cublasLtMatmulTile_t", + "cublasLtMatmulStages_t", + "cublasLtMatmulSearch_t", + "cublasLtMatmulPreferenceInit", + "cublasLtMatmulInnerShape_t", + "cublasLtMatmulDescInit", + "cublasLtMatmulAlgoInit", + "cublasLtMatmulAlgoGetIds", + "cublasLtMatmulAlgoConfigSetAttribute", + "cublasLtMatmulAlgoConfigGetAttribute", + "cublasLtMatmulAlgoConfigAttributes_t", + "cublasLtMatmulAlgoCheck", + "cublasLtMatmulAlgoCapGetAttribute", + "cublasLtMatmulAlgoCapAttributes_t", + "cublasLtLoggerSetMask", + "cublasLtLoggerSetLevel", + "cublasLtLoggerSetFile", + "cublasLtLoggerSetCallback", + "cublasLtLoggerOpenFile", + "cublasLtLoggerForceDisable", + "cublasLtLoggerCallback_t", + "cublasLtHeuristicsCacheSetCapacity", + "cublasLtHeuristicsCacheGetCapacity", + "cublasLtGetVersion", + "cublasLtGetStatusString", + "cublasLtGetStatusName", + "cublasLtGetProperty", + "cublasLtGetCudartVersion", + "cublasLtDisableCpuInstructionsSetMask", + "cublasLtContext", + "cublasLtClusterShape_t", + "cublasLoggerConfigure", + "cublasLogCallback", + "cublasInit", + "cublasIaminEx_64", + "cublasIaminEx", + "cublasIamaxEx_64", + "cublasIamaxEx", + "cublasHSSgemvStridedBatched_64", + "cublasHSSgemvStridedBatched", + "cublasHSSgemvBatched_64", + "cublasHSSgemvBatched", + "cublasHSHgemvStridedBatched_64", + "cublasHSHgemvStridedBatched", + "cublasHSHgemvBatched_64", + "cublasHSHgemvBatched", + "cublasGetVersion_v2", + "cublasGetVersion", + "cublasGetVector_64", + "cublasGetVectorAsync_64", + "cublasGetStatusString", + "cublasGetStatusName", + "cublasGetSmCountTarget", + "cublasGetProperty", + "cublasGetMatrix_64", + "cublasGetMatrixAsync_64", + "cublasGetLoggerCallback", + "cublasGetError", + "cublasGetCudartVersion", + "cublasGemmGroupedBatchedEx_64", + "cublasGemmGroupedBatchedEx", + "cublasFree", + "cublasDtrttp", + "cublasDtpttr", + "cublasDmatinvBatched", + "cublasDgemmGroupedBatched_64", + "cublasDgemmGroupedBatched", + "cublasCtrttp", + "cublasCtpttr", + "cublasCsyrkEx_64", + "cublasCsyrkEx", + "cublasCsyrk3mEx_64", + "cublasCsyrk3mEx", + "cublasCopyEx_64", + "cublasCopyEx", + "cublasContext", + "cublasCmatinvBatched", + "cublasCherkEx_64", + "cublasCherkEx", + "cublasCherk3mEx_64", + "cublasCherk3mEx", + "cublasCgemmEx_64", + "cublasCgemmEx", + "cublasCgemm3m_64", + "cublasCgemm3mStridedBatched_64", + "cublasCgemm3mStridedBatched", + "cublasCgemm3mEx_64", + "cublasCgemm3mEx", + "cublasCgemm3mBatched_64", + "cublasCgemm3mBatched", + "cublasCgemm3m", + "cublasAsumEx_64", + "cublasAsumEx", + "cublasAlloc", + "csrsv2Info", + "csrsm2Info", + "__curand_umul", + "CUTENSOR_WORKSPACE_DEFAULT", + "CUTENSOR_STATUS_MAPPING_ERROR", + "CUTENSOR_STATUS_LICENSE_ERROR", + "CUTENSOR_STATUS_CUDA_ERROR", + "CUTENSOR_STATUS_CUBLAS_ERROR", + "CUTENSOR_R_MIN_TF32", + "CUTENSOR_R_MIN_8U", + "CUTENSOR_R_MIN_8I", + "CUTENSOR_R_MIN_64F", + "CUTENSOR_R_MIN_32U", + "CUTENSOR_R_MIN_32I", + "CUTENSOR_R_MIN_32F", + "CUTENSOR_R_MIN_16F", + "CUTENSOR_R_MIN_16BF", + "CUTENSOR_R_64U", + "CUTENSOR_R_64I", + "CUTENSOR_R_4U", + "CUTENSOR_R_4I", + "CUTENSOR_R_16U", + "CUTENSOR_R_16I", + "CUTENSOR_PLAN_REQUIRED_WORKSPACE", + "CUTENSOR_PLAN_PREFERENCE_KERNEL_RANK", + "CUTENSOR_PLAN_PREFERENCE_JIT", + "CUTENSOR_PLAN_PREFERENCE_INCREMENTAL_COUNT", + "CUTENSOR_PLAN_PREFERENCE_CACHE_MODE", + "CUTENSOR_PLAN_PREFERENCE_AUTOTUNE_MODE", + "CUTENSOR_PLAN_PREFERENCE_ALGO", + "CUTENSOR_OP_TANH", + "CUTENSOR_OP_TAN", + "CUTENSOR_OP_SWISH", + "CUTENSOR_OP_SOFT_SIGN", + "CUTENSOR_OP_SOFT_PLUS", + "CUTENSOR_OP_SINH", + "CUTENSOR_OP_SIN", + "CUTENSOR_OP_SIGMOID", + "CUTENSOR_OP_RELU", + "CUTENSOR_OP_RCP", + "CUTENSOR_OP_NEG", + "CUTENSOR_OP_MISH", + "CUTENSOR_OP_LOG", + "CUTENSOR_OP_FLOOR", + "CUTENSOR_OP_EXP", + "CUTENSOR_OP_COSH", + "CUTENSOR_OP_COS", + "CUTENSOR_OP_CONJ", + "CUTENSOR_OP_CEIL", + "CUTENSOR_OP_ATANH", + "CUTENSOR_OP_ATAN", + "CUTENSOR_OP_ASINH", + "CUTENSOR_OP_ASIN", + "CUTENSOR_OP_ACOSH", + "CUTENSOR_OP_ACOS", + "CUTENSOR_OP_ABS", + "CUTENSOR_OPERATION_DESCRIPTOR_TAG", + "CUTENSOR_OPERATION_DESCRIPTOR_SCALAR_TYPE", + "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_VALUE", + "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_RIGHT", + "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_LEFT", + "CUTENSOR_OPERATION_DESCRIPTOR_MOVED_BYTES", + "CUTENSOR_OPERATION_DESCRIPTOR_FLOPS", + "CUTENSOR_MG_DEVICE_HOST_PINNED", + "CUTENSOR_MG_DEVICE_HOST", + "CUTENSOR_JIT_MODE_NONE", + "CUTENSOR_JIT_MODE_DEFAULT", + "CUTENSOR_C_MIN_TF32", + "CUTENSOR_C_MIN_64F", + "CUTENSOR_C_MIN_32F", + "CUTENSOR_C_MIN_16F", + "CUTENSOR_C_8U", + "CUTENSOR_C_8I", + "CUTENSOR_C_64U", + "CUTENSOR_C_64I", + "CUTENSOR_C_4U", + "CUTENSOR_C_4I", + "CUTENSOR_C_32U", + "CUTENSOR_C_32I", + "CUTENSOR_C_16U", + "CUTENSOR_C_16I", + "CUTENSOR_C_16F", + "CUTENSOR_C_16BF", + "CUTENSOR_COMPUTE_TF32", + "CUTENSOR_COMPUTE_3XTF32", + "CUTENSOR_CACHE_MODE_PEDANTIC", + "CUTENSOR_CACHE_MODE_NONE", + "CUTENSOR_AUTOTUNE_MODE_NONE", + "CUTENSOR_AUTOTUNE_MODE_INCREMENTAL", + "CUTENSOR_ALGO_TTGT", + "CUTENSOR_ALGO_TGETT", + "CUTENSOR_ALGO_GETT", + "CUTENSORMG_CONTRACTION_FIND_ATTRIBUTE_MAX", + "CUTENSORMG_ALGO_DEFAULT", + "CUSPARSE_SPSV_UPDATE_GENERAL", + "CUSPARSE_SPSV_UPDATE_DIAGONAL", + "CUSPARSE_SPSM_UPDATE_GENERAL", + "CUSPARSE_SPSM_UPDATE_DIAGONAL", + "CUSPARSE_SPMV_SELL_ALG1", + "CUSPARSE_SPMM_OP_ALG_DEFAULT", + "CUSPARSE_SPMM_BSR_ALG1", + "CUSPARSE_SPMMA_PREPROCESS", + "CUSPARSE_SPMMA_ALG4", + "CUSPARSE_SPMMA_ALG3", + "CUSPARSE_SPMMA_ALG2", + "CUSPARSE_SPMMA_ALG1", + "CUSPARSE_SIDE_RIGHT", + "CUSPARSE_SIDE_LEFT", + "CUSPARSE_FORMAT_SLICED_ELLPACK", + "CUSPARSE_FORMAT_BSR", + "CUSPARSE_COLOR_ALG1", + "CUSPARSE_COLOR_ALG0", + "CUSPARSE_ALG_NAIVE", + "CUSPARSE_ALG_MERGE_PATH", + "CUSPARSE_ALG1", + "CUSPARSE_ALG0", + "CURAND_REJECTION", + "CURAND_POISSON", + "CURAND_M2", + "CURAND_M1", + "CURAND_KNUTH", + "CURAND_ITR", + "CURAND_HITR", + "CURAND_FAST_REJECTION", + "CURAND_DISCRETE_GAUSS", + "CURAND_DEVICE_API", + "CURAND_DEFINITION", + "CURAND_CHOOSE_BEST", + "CURAND_BINARY_SEARCH", + "CURAND_3RD", + "CUDA_R_8F_E5M2", + "CUDA_R_8F_E4M3", + "CUDA_R_64U", + "CUDA_R_64I", + "CUDA_R_4U", + "CUDA_R_4I", + "CUDA_R_16U", + "CUDA_R_16I", + "CUDA_C_64U", + "CUDA_C_64I", + "CUDA_C_4U", + "CUDA_C_4I", + "CUDA_C_16U", + "CUDA_C_16I", + "CUBLAS_OP_CONJG", + "CUBLAS_GEMM_DFALT_TENSOR_OP", + "CUBLAS_GEMM_DEFAULT_TENSOR_OP", + "CUBLAS_GEMM_ALGO9_TENSOR_OP", + "CUBLAS_GEMM_ALGO9", + "CUBLAS_GEMM_ALGO8_TENSOR_OP", + "CUBLAS_GEMM_ALGO8", + "CUBLAS_GEMM_ALGO7_TENSOR_OP", + "CUBLAS_GEMM_ALGO7", + "CUBLAS_GEMM_ALGO6_TENSOR_OP", + "CUBLAS_GEMM_ALGO6", + "CUBLAS_GEMM_ALGO5_TENSOR_OP", + "CUBLAS_GEMM_ALGO5", + "CUBLAS_GEMM_ALGO4_TENSOR_OP", + "CUBLAS_GEMM_ALGO4", + "CUBLAS_GEMM_ALGO3_TENSOR_OP", + "CUBLAS_GEMM_ALGO3", + "CUBLAS_GEMM_ALGO2_TENSOR_OP", + "CUBLAS_GEMM_ALGO23", + "CUBLAS_GEMM_ALGO22", + "CUBLAS_GEMM_ALGO21", + "CUBLAS_GEMM_ALGO20", + "CUBLAS_GEMM_ALGO2", + "CUBLAS_GEMM_ALGO1_TENSOR_OP", + "CUBLAS_GEMM_ALGO19", + "CUBLAS_GEMM_ALGO18", + "CUBLAS_GEMM_ALGO17", + "CUBLAS_GEMM_ALGO16", + "CUBLAS_GEMM_ALGO15_TENSOR_OP", + "CUBLAS_GEMM_ALGO15", + "CUBLAS_GEMM_ALGO14_TENSOR_OP", + "CUBLAS_GEMM_ALGO14", + "CUBLAS_GEMM_ALGO13_TENSOR_OP", + "CUBLAS_GEMM_ALGO13", + "CUBLAS_GEMM_ALGO12_TENSOR_OP", + "CUBLAS_GEMM_ALGO12", + "CUBLAS_GEMM_ALGO11_TENSOR_OP", + "CUBLAS_GEMM_ALGO11", + "CUBLAS_GEMM_ALGO10_TENSOR_OP", + "CUBLAS_GEMM_ALGO10", + "CUBLAS_GEMM_ALGO1", + "CUBLAS_GEMM_ALGO0_TENSOR_OP", + "CUBLAS_GEMM_ALGO0", + "CUBLASLT_SEARCH_RESERVED_09", + "CUBLASLT_SEARCH_RESERVED_08", + "CUBLASLT_SEARCH_RESERVED_07", + "CUBLASLT_SEARCH_RESERVED_06", + "CUBLASLT_SEARCH_RESERVED_05", + "CUBLASLT_SEARCH_RESERVED_04", + "CUBLASLT_SEARCH_RESERVED_03", + "CUBLASLT_SEARCH_RESERVED_02", + "CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID", + "CUBLASLT_SEARCH_BEST_FIT", + "CUBLASLT_REDUCTION_SCHEME_OUTPUT_TYPE", + "CUBLASLT_REDUCTION_SCHEME_NONE", + "CUBLASLT_REDUCTION_SCHEME_MASK", + "CUBLASLT_REDUCTION_SCHEME_INPLACE", + "CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE", + "CUBLASLT_POINTER_MODE_MASK_HOST", + "CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR", + "CUBLASLT_POINTER_MODE_MASK_DEVICE", + "CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO", + "CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_HOST", + "CUBLASLT_POINTER_MODE_DEVICE_VECTOR", + "CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO", + "CUBLASLT_ORDER_COL4_4R2_8C", + "CUBLASLT_ORDER_COL32_2R_4R4", + "CUBLASLT_ORDER_COL32", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_TENSOR_OP_MASK", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_TYPE_MASK", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_TF32", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8I", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E5M2", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E4M3", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_64F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_32F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16BF", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_IMMA", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_FMA", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_DMMA", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_TYPE_MASK", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_64F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32I", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_16F", + "CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET", + "CUBLASLT_MATMUL_TILE_UNDEFINED", + "CUBLASLT_MATMUL_TILE_END", + "CUBLASLT_MATMUL_TILE_96x64", + "CUBLASLT_MATMUL_TILE_96x512", + "CUBLASLT_MATMUL_TILE_96x448", + "CUBLASLT_MATMUL_TILE_96x384", + "CUBLASLT_MATMUL_TILE_96x320", + "CUBLASLT_MATMUL_TILE_96x256", + "CUBLASLT_MATMUL_TILE_96x192", + "CUBLASLT_MATMUL_TILE_96x128", + "CUBLASLT_MATMUL_TILE_8x8", + "CUBLASLT_MATMUL_TILE_8x768", + "CUBLASLT_MATMUL_TILE_8x704", + "CUBLASLT_MATMUL_TILE_8x640", + "CUBLASLT_MATMUL_TILE_8x64", + "CUBLASLT_MATMUL_TILE_8x576", + "CUBLASLT_MATMUL_TILE_8x512", + "CUBLASLT_MATMUL_TILE_8x448", + "CUBLASLT_MATMUL_TILE_8x384", + "CUBLASLT_MATMUL_TILE_8x320", + "CUBLASLT_MATMUL_TILE_8x32", + "CUBLASLT_MATMUL_TILE_8x256", + "CUBLASLT_MATMUL_TILE_8x192", + "CUBLASLT_MATMUL_TILE_8x16", + "CUBLASLT_MATMUL_TILE_8x128", + "CUBLASLT_MATMUL_TILE_88x64", + "CUBLASLT_MATMUL_TILE_88x512", + "CUBLASLT_MATMUL_TILE_88x448", + "CUBLASLT_MATMUL_TILE_88x384", + "CUBLASLT_MATMUL_TILE_88x320", + "CUBLASLT_MATMUL_TILE_88x256", + "CUBLASLT_MATMUL_TILE_88x192", + "CUBLASLT_MATMUL_TILE_88x128", + "CUBLASLT_MATMUL_TILE_80x64", + "CUBLASLT_MATMUL_TILE_80x576", + "CUBLASLT_MATMUL_TILE_80x512", + "CUBLASLT_MATMUL_TILE_80x448", + "CUBLASLT_MATMUL_TILE_80x384", + "CUBLASLT_MATMUL_TILE_80x320", + "CUBLASLT_MATMUL_TILE_80x256", + "CUBLASLT_MATMUL_TILE_80x192", + "CUBLASLT_MATMUL_TILE_80x128", + "CUBLASLT_MATMUL_TILE_768x80", + "CUBLASLT_MATMUL_TILE_768x8", + "CUBLASLT_MATMUL_TILE_768x72", + "CUBLASLT_MATMUL_TILE_768x64", + "CUBLASLT_MATMUL_TILE_768x56", + "CUBLASLT_MATMUL_TILE_768x48", + "CUBLASLT_MATMUL_TILE_768x40", + "CUBLASLT_MATMUL_TILE_768x32", + "CUBLASLT_MATMUL_TILE_768x24", + "CUBLASLT_MATMUL_TILE_768x16", + "CUBLASLT_MATMUL_TILE_760x64", + "CUBLASLT_MATMUL_TILE_752x64", + "CUBLASLT_MATMUL_TILE_744x64", + "CUBLASLT_MATMUL_TILE_736x64", + "CUBLASLT_MATMUL_TILE_72x640", + "CUBLASLT_MATMUL_TILE_72x64", + "CUBLASLT_MATMUL_TILE_72x576", + "CUBLASLT_MATMUL_TILE_72x512", + "CUBLASLT_MATMUL_TILE_72x448", + "CUBLASLT_MATMUL_TILE_72x384", + "CUBLASLT_MATMUL_TILE_72x320", + "CUBLASLT_MATMUL_TILE_72x256", + "CUBLASLT_MATMUL_TILE_72x192", + "CUBLASLT_MATMUL_TILE_72x128", + "CUBLASLT_MATMUL_TILE_728x64", + "CUBLASLT_MATMUL_TILE_720x64", + "CUBLASLT_MATMUL_TILE_712x64", + "CUBLASLT_MATMUL_TILE_704x88", + "CUBLASLT_MATMUL_TILE_704x80", + "CUBLASLT_MATMUL_TILE_704x8", + "CUBLASLT_MATMUL_TILE_704x72", + "CUBLASLT_MATMUL_TILE_704x64", + "CUBLASLT_MATMUL_TILE_704x56", + "CUBLASLT_MATMUL_TILE_704x48", + "CUBLASLT_MATMUL_TILE_704x40", + "CUBLASLT_MATMUL_TILE_704x32", + "CUBLASLT_MATMUL_TILE_704x24", + "CUBLASLT_MATMUL_TILE_704x16", + "CUBLASLT_MATMUL_TILE_696x64", + "CUBLASLT_MATMUL_TILE_688x64", + "CUBLASLT_MATMUL_TILE_680x64", + "CUBLASLT_MATMUL_TILE_672x64", + "CUBLASLT_MATMUL_TILE_664x64", + "CUBLASLT_MATMUL_TILE_656x64", + "CUBLASLT_MATMUL_TILE_64x96", + "CUBLASLT_MATMUL_TILE_64x88", + "CUBLASLT_MATMUL_TILE_64x80", + "CUBLASLT_MATMUL_TILE_64x8", + "CUBLASLT_MATMUL_TILE_64x768", + "CUBLASLT_MATMUL_TILE_64x760", + "CUBLASLT_MATMUL_TILE_64x752", + "CUBLASLT_MATMUL_TILE_64x744", + "CUBLASLT_MATMUL_TILE_64x736", + "CUBLASLT_MATMUL_TILE_64x728", + "CUBLASLT_MATMUL_TILE_64x720", + "CUBLASLT_MATMUL_TILE_64x72", + "CUBLASLT_MATMUL_TILE_64x712", + "CUBLASLT_MATMUL_TILE_64x704", + "CUBLASLT_MATMUL_TILE_64x696", + "CUBLASLT_MATMUL_TILE_64x688", + "CUBLASLT_MATMUL_TILE_64x680", + "CUBLASLT_MATMUL_TILE_64x672", + "CUBLASLT_MATMUL_TILE_64x664", + "CUBLASLT_MATMUL_TILE_64x656", + "CUBLASLT_MATMUL_TILE_64x648", + "CUBLASLT_MATMUL_TILE_64x640", + "CUBLASLT_MATMUL_TILE_64x64", + "CUBLASLT_MATMUL_TILE_64x632", + "CUBLASLT_MATMUL_TILE_64x624", + "CUBLASLT_MATMUL_TILE_64x616", + "CUBLASLT_MATMUL_TILE_64x608", + "CUBLASLT_MATMUL_TILE_64x600", + "CUBLASLT_MATMUL_TILE_64x592", + "CUBLASLT_MATMUL_TILE_64x584", + "CUBLASLT_MATMUL_TILE_64x576", + "CUBLASLT_MATMUL_TILE_64x568", + "CUBLASLT_MATMUL_TILE_64x560", + "CUBLASLT_MATMUL_TILE_64x56", + "CUBLASLT_MATMUL_TILE_64x552", + "CUBLASLT_MATMUL_TILE_64x544", + "CUBLASLT_MATMUL_TILE_64x536", + "CUBLASLT_MATMUL_TILE_64x528", + "CUBLASLT_MATMUL_TILE_64x520", + "CUBLASLT_MATMUL_TILE_64x512", + "CUBLASLT_MATMUL_TILE_64x504", + "CUBLASLT_MATMUL_TILE_64x496", + "CUBLASLT_MATMUL_TILE_64x488", + "CUBLASLT_MATMUL_TILE_64x480", + "CUBLASLT_MATMUL_TILE_64x48", + "CUBLASLT_MATMUL_TILE_64x472", + "CUBLASLT_MATMUL_TILE_64x464", + "CUBLASLT_MATMUL_TILE_64x456", + "CUBLASLT_MATMUL_TILE_64x448", + "CUBLASLT_MATMUL_TILE_64x440", + "CUBLASLT_MATMUL_TILE_64x432", + "CUBLASLT_MATMUL_TILE_64x424", + "CUBLASLT_MATMUL_TILE_64x416", + "CUBLASLT_MATMUL_TILE_64x408", + "CUBLASLT_MATMUL_TILE_64x400", + "CUBLASLT_MATMUL_TILE_64x40", + "CUBLASLT_MATMUL_TILE_64x392", + "CUBLASLT_MATMUL_TILE_64x384", + "CUBLASLT_MATMUL_TILE_64x376", + "CUBLASLT_MATMUL_TILE_64x368", + "CUBLASLT_MATMUL_TILE_64x360", + "CUBLASLT_MATMUL_TILE_64x352", + "CUBLASLT_MATMUL_TILE_64x344", + "CUBLASLT_MATMUL_TILE_64x336", + "CUBLASLT_MATMUL_TILE_64x328", + "CUBLASLT_MATMUL_TILE_64x320", + "CUBLASLT_MATMUL_TILE_64x32", + "CUBLASLT_MATMUL_TILE_64x312", + "CUBLASLT_MATMUL_TILE_64x304", + "CUBLASLT_MATMUL_TILE_64x296", + "CUBLASLT_MATMUL_TILE_64x288", + "CUBLASLT_MATMUL_TILE_64x280", + "CUBLASLT_MATMUL_TILE_64x272", + "CUBLASLT_MATMUL_TILE_64x264", + "CUBLASLT_MATMUL_TILE_64x256", + "CUBLASLT_MATMUL_TILE_64x248", + "CUBLASLT_MATMUL_TILE_64x240", + "CUBLASLT_MATMUL_TILE_64x24", + "CUBLASLT_MATMUL_TILE_64x232", + "CUBLASLT_MATMUL_TILE_64x224", + "CUBLASLT_MATMUL_TILE_64x216", + "CUBLASLT_MATMUL_TILE_64x208", + "CUBLASLT_MATMUL_TILE_64x200", + "CUBLASLT_MATMUL_TILE_64x192", + "CUBLASLT_MATMUL_TILE_64x184", + "CUBLASLT_MATMUL_TILE_64x176", + "CUBLASLT_MATMUL_TILE_64x168", + "CUBLASLT_MATMUL_TILE_64x160", + "CUBLASLT_MATMUL_TILE_64x16", + "CUBLASLT_MATMUL_TILE_64x152", + "CUBLASLT_MATMUL_TILE_64x144", + "CUBLASLT_MATMUL_TILE_64x136", + "CUBLASLT_MATMUL_TILE_64x128", + "CUBLASLT_MATMUL_TILE_64x120", + "CUBLASLT_MATMUL_TILE_64x112", + "CUBLASLT_MATMUL_TILE_64x104", + "CUBLASLT_MATMUL_TILE_648x64", + "CUBLASLT_MATMUL_TILE_640x96", + "CUBLASLT_MATMUL_TILE_640x88", + "CUBLASLT_MATMUL_TILE_640x80", + "CUBLASLT_MATMUL_TILE_640x8", + "CUBLASLT_MATMUL_TILE_640x72", + "CUBLASLT_MATMUL_TILE_640x64", + "CUBLASLT_MATMUL_TILE_640x56", + "CUBLASLT_MATMUL_TILE_640x48", + "CUBLASLT_MATMUL_TILE_640x40", + "CUBLASLT_MATMUL_TILE_640x32", + "CUBLASLT_MATMUL_TILE_640x24", + "CUBLASLT_MATMUL_TILE_640x16", + "CUBLASLT_MATMUL_TILE_632x64", + "CUBLASLT_MATMUL_TILE_624x64", + "CUBLASLT_MATMUL_TILE_616x64", + "CUBLASLT_MATMUL_TILE_608x64", + "CUBLASLT_MATMUL_TILE_600x64", + "CUBLASLT_MATMUL_TILE_592x64", + "CUBLASLT_MATMUL_TILE_584x64", + "CUBLASLT_MATMUL_TILE_576x96", + "CUBLASLT_MATMUL_TILE_576x88", + "CUBLASLT_MATMUL_TILE_576x80", + "CUBLASLT_MATMUL_TILE_576x8", + "CUBLASLT_MATMUL_TILE_576x72", + "CUBLASLT_MATMUL_TILE_576x64", + "CUBLASLT_MATMUL_TILE_576x56", + "CUBLASLT_MATMUL_TILE_576x48", + "CUBLASLT_MATMUL_TILE_576x40", + "CUBLASLT_MATMUL_TILE_576x32", + "CUBLASLT_MATMUL_TILE_576x24", + "CUBLASLT_MATMUL_TILE_576x16", + "CUBLASLT_MATMUL_TILE_576x112", + "CUBLASLT_MATMUL_TILE_576x104", + "CUBLASLT_MATMUL_TILE_56x768", + "CUBLASLT_MATMUL_TILE_56x704", + "CUBLASLT_MATMUL_TILE_56x640", + "CUBLASLT_MATMUL_TILE_56x576", + "CUBLASLT_MATMUL_TILE_56x512", + "CUBLASLT_MATMUL_TILE_56x448", + "CUBLASLT_MATMUL_TILE_56x384", + "CUBLASLT_MATMUL_TILE_56x320", + "CUBLASLT_MATMUL_TILE_56x256", + "CUBLASLT_MATMUL_TILE_56x192", + "CUBLASLT_MATMUL_TILE_56x128", + "CUBLASLT_MATMUL_TILE_568x64", + "CUBLASLT_MATMUL_TILE_560x64", + "CUBLASLT_MATMUL_TILE_552x64", + "CUBLASLT_MATMUL_TILE_544x64", + "CUBLASLT_MATMUL_TILE_536x64", + "CUBLASLT_MATMUL_TILE_528x64", + "CUBLASLT_MATMUL_TILE_520x64", + "CUBLASLT_MATMUL_TILE_512x96", + "CUBLASLT_MATMUL_TILE_512x88", + "CUBLASLT_MATMUL_TILE_512x80", + "CUBLASLT_MATMUL_TILE_512x8", + "CUBLASLT_MATMUL_TILE_512x72", + "CUBLASLT_MATMUL_TILE_512x64", + "CUBLASLT_MATMUL_TILE_512x56", + "CUBLASLT_MATMUL_TILE_512x48", + "CUBLASLT_MATMUL_TILE_512x40", + "CUBLASLT_MATMUL_TILE_512x32", + "CUBLASLT_MATMUL_TILE_512x24", + "CUBLASLT_MATMUL_TILE_512x16", + "CUBLASLT_MATMUL_TILE_512x128", + "CUBLASLT_MATMUL_TILE_512x120", + "CUBLASLT_MATMUL_TILE_512x112", + "CUBLASLT_MATMUL_TILE_512x104", + "CUBLASLT_MATMUL_TILE_504x64", + "CUBLASLT_MATMUL_TILE_496x64", + "CUBLASLT_MATMUL_TILE_48x768", + "CUBLASLT_MATMUL_TILE_48x704", + "CUBLASLT_MATMUL_TILE_48x640", + "CUBLASLT_MATMUL_TILE_48x64", + "CUBLASLT_MATMUL_TILE_48x576", + "CUBLASLT_MATMUL_TILE_48x512", + "CUBLASLT_MATMUL_TILE_48x448", + "CUBLASLT_MATMUL_TILE_48x384", + "CUBLASLT_MATMUL_TILE_48x320", + "CUBLASLT_MATMUL_TILE_48x256", + "CUBLASLT_MATMUL_TILE_48x192", + "CUBLASLT_MATMUL_TILE_48x128", + "CUBLASLT_MATMUL_TILE_488x64", + "CUBLASLT_MATMUL_TILE_480x64", + "CUBLASLT_MATMUL_TILE_472x64", + "CUBLASLT_MATMUL_TILE_464x64", + "CUBLASLT_MATMUL_TILE_456x64", + "CUBLASLT_MATMUL_TILE_448x96", + "CUBLASLT_MATMUL_TILE_448x88", + "CUBLASLT_MATMUL_TILE_448x80", + "CUBLASLT_MATMUL_TILE_448x8", + "CUBLASLT_MATMUL_TILE_448x72", + "CUBLASLT_MATMUL_TILE_448x64", + "CUBLASLT_MATMUL_TILE_448x56", + "CUBLASLT_MATMUL_TILE_448x48", + "CUBLASLT_MATMUL_TILE_448x40", + "CUBLASLT_MATMUL_TILE_448x32", + "CUBLASLT_MATMUL_TILE_448x24", + "CUBLASLT_MATMUL_TILE_448x16", + "CUBLASLT_MATMUL_TILE_448x144", + "CUBLASLT_MATMUL_TILE_448x136", + "CUBLASLT_MATMUL_TILE_448x128", + "CUBLASLT_MATMUL_TILE_448x120", + "CUBLASLT_MATMUL_TILE_448x112", + "CUBLASLT_MATMUL_TILE_448x104", + "CUBLASLT_MATMUL_TILE_440x64", + "CUBLASLT_MATMUL_TILE_432x64", + "CUBLASLT_MATMUL_TILE_424x64", + "CUBLASLT_MATMUL_TILE_416x64", + "CUBLASLT_MATMUL_TILE_40x768", + "CUBLASLT_MATMUL_TILE_40x704", + "CUBLASLT_MATMUL_TILE_40x640", + "CUBLASLT_MATMUL_TILE_40x64", + "CUBLASLT_MATMUL_TILE_40x576", + "CUBLASLT_MATMUL_TILE_40x512", + "CUBLASLT_MATMUL_TILE_40x448", + "CUBLASLT_MATMUL_TILE_40x384", + "CUBLASLT_MATMUL_TILE_40x320", + "CUBLASLT_MATMUL_TILE_40x256", + "CUBLASLT_MATMUL_TILE_40x192", + "CUBLASLT_MATMUL_TILE_40x128", + "CUBLASLT_MATMUL_TILE_408x64", + "CUBLASLT_MATMUL_TILE_400x64", + "CUBLASLT_MATMUL_TILE_392x64", + "CUBLASLT_MATMUL_TILE_384x96", + "CUBLASLT_MATMUL_TILE_384x88", + "CUBLASLT_MATMUL_TILE_384x80", + "CUBLASLT_MATMUL_TILE_384x8", + "CUBLASLT_MATMUL_TILE_384x72", + "CUBLASLT_MATMUL_TILE_384x64", + "CUBLASLT_MATMUL_TILE_384x56", + "CUBLASLT_MATMUL_TILE_384x48", + "CUBLASLT_MATMUL_TILE_384x40", + "CUBLASLT_MATMUL_TILE_384x32", + "CUBLASLT_MATMUL_TILE_384x24", + "CUBLASLT_MATMUL_TILE_384x168", + "CUBLASLT_MATMUL_TILE_384x160", + "CUBLASLT_MATMUL_TILE_384x16", + "CUBLASLT_MATMUL_TILE_384x152", + "CUBLASLT_MATMUL_TILE_384x144", + "CUBLASLT_MATMUL_TILE_384x136", + "CUBLASLT_MATMUL_TILE_384x128", + "CUBLASLT_MATMUL_TILE_384x120", + "CUBLASLT_MATMUL_TILE_384x112", + "CUBLASLT_MATMUL_TILE_384x104", + "CUBLASLT_MATMUL_TILE_376x64", + "CUBLASLT_MATMUL_TILE_376x128", + "CUBLASLT_MATMUL_TILE_368x64", + "CUBLASLT_MATMUL_TILE_368x128", + "CUBLASLT_MATMUL_TILE_360x64", + "CUBLASLT_MATMUL_TILE_360x128", + "CUBLASLT_MATMUL_TILE_352x64", + "CUBLASLT_MATMUL_TILE_352x128", + "CUBLASLT_MATMUL_TILE_344x64", + "CUBLASLT_MATMUL_TILE_344x128", + "CUBLASLT_MATMUL_TILE_336x64", + "CUBLASLT_MATMUL_TILE_336x128", + "CUBLASLT_MATMUL_TILE_32x8", + "CUBLASLT_MATMUL_TILE_32x768", + "CUBLASLT_MATMUL_TILE_32x704", + "CUBLASLT_MATMUL_TILE_32x640", + "CUBLASLT_MATMUL_TILE_32x64", + "CUBLASLT_MATMUL_TILE_32x576", + "CUBLASLT_MATMUL_TILE_32x512", + "CUBLASLT_MATMUL_TILE_32x448", + "CUBLASLT_MATMUL_TILE_32x384", + "CUBLASLT_MATMUL_TILE_32x320", + "CUBLASLT_MATMUL_TILE_32x32", + "CUBLASLT_MATMUL_TILE_32x256", + "CUBLASLT_MATMUL_TILE_32x192", + "CUBLASLT_MATMUL_TILE_32x16", + "CUBLASLT_MATMUL_TILE_32x128", + "CUBLASLT_MATMUL_TILE_328x64", + "CUBLASLT_MATMUL_TILE_328x128", + "CUBLASLT_MATMUL_TILE_320x96", + "CUBLASLT_MATMUL_TILE_320x88", + "CUBLASLT_MATMUL_TILE_320x80", + "CUBLASLT_MATMUL_TILE_320x8", + "CUBLASLT_MATMUL_TILE_320x72", + "CUBLASLT_MATMUL_TILE_320x64", + "CUBLASLT_MATMUL_TILE_320x56", + "CUBLASLT_MATMUL_TILE_320x48", + "CUBLASLT_MATMUL_TILE_320x40", + "CUBLASLT_MATMUL_TILE_320x32", + "CUBLASLT_MATMUL_TILE_320x24", + "CUBLASLT_MATMUL_TILE_320x200", + "CUBLASLT_MATMUL_TILE_320x192", + "CUBLASLT_MATMUL_TILE_320x184", + "CUBLASLT_MATMUL_TILE_320x176", + "CUBLASLT_MATMUL_TILE_320x168", + "CUBLASLT_MATMUL_TILE_320x160", + "CUBLASLT_MATMUL_TILE_320x16", + "CUBLASLT_MATMUL_TILE_320x152", + "CUBLASLT_MATMUL_TILE_320x144", + "CUBLASLT_MATMUL_TILE_320x136", + "CUBLASLT_MATMUL_TILE_320x128", + "CUBLASLT_MATMUL_TILE_320x120", + "CUBLASLT_MATMUL_TILE_320x112", + "CUBLASLT_MATMUL_TILE_320x104", + "CUBLASLT_MATMUL_TILE_312x64", + "CUBLASLT_MATMUL_TILE_312x128", + "CUBLASLT_MATMUL_TILE_304x64", + "CUBLASLT_MATMUL_TILE_304x128", + "CUBLASLT_MATMUL_TILE_296x64", + "CUBLASLT_MATMUL_TILE_296x128", + "CUBLASLT_MATMUL_TILE_288x64", + "CUBLASLT_MATMUL_TILE_288x128", + "CUBLASLT_MATMUL_TILE_280x64", + "CUBLASLT_MATMUL_TILE_280x128", + "CUBLASLT_MATMUL_TILE_272x64", + "CUBLASLT_MATMUL_TILE_272x128", + "CUBLASLT_MATMUL_TILE_264x64", + "CUBLASLT_MATMUL_TILE_264x128", + "CUBLASLT_MATMUL_TILE_256x96", + "CUBLASLT_MATMUL_TILE_256x88", + "CUBLASLT_MATMUL_TILE_256x80", + "CUBLASLT_MATMUL_TILE_256x8", + "CUBLASLT_MATMUL_TILE_256x72", + "CUBLASLT_MATMUL_TILE_256x64", + "CUBLASLT_MATMUL_TILE_256x56", + "CUBLASLT_MATMUL_TILE_256x48", + "CUBLASLT_MATMUL_TILE_256x40", + "CUBLASLT_MATMUL_TILE_256x32", + "CUBLASLT_MATMUL_TILE_256x256", + "CUBLASLT_MATMUL_TILE_256x248", + "CUBLASLT_MATMUL_TILE_256x240", + "CUBLASLT_MATMUL_TILE_256x24", + "CUBLASLT_MATMUL_TILE_256x232", + "CUBLASLT_MATMUL_TILE_256x224", + "CUBLASLT_MATMUL_TILE_256x216", + "CUBLASLT_MATMUL_TILE_256x208", + "CUBLASLT_MATMUL_TILE_256x200", + "CUBLASLT_MATMUL_TILE_256x192", + "CUBLASLT_MATMUL_TILE_256x184", + "CUBLASLT_MATMUL_TILE_256x176", + "CUBLASLT_MATMUL_TILE_256x168", + "CUBLASLT_MATMUL_TILE_256x160", + "CUBLASLT_MATMUL_TILE_256x16", + "CUBLASLT_MATMUL_TILE_256x152", + "CUBLASLT_MATMUL_TILE_256x144", + "CUBLASLT_MATMUL_TILE_256x136", + "CUBLASLT_MATMUL_TILE_256x128", + "CUBLASLT_MATMUL_TILE_256x120", + "CUBLASLT_MATMUL_TILE_256x112", + "CUBLASLT_MATMUL_TILE_256x104", + "CUBLASLT_MATMUL_TILE_24x768", + "CUBLASLT_MATMUL_TILE_24x704", + "CUBLASLT_MATMUL_TILE_24x640", + "CUBLASLT_MATMUL_TILE_24x64", + "CUBLASLT_MATMUL_TILE_24x576", + "CUBLASLT_MATMUL_TILE_24x512", + "CUBLASLT_MATMUL_TILE_24x448", + "CUBLASLT_MATMUL_TILE_24x384", + "CUBLASLT_MATMUL_TILE_24x320", + "CUBLASLT_MATMUL_TILE_24x256", + "CUBLASLT_MATMUL_TILE_24x192", + "CUBLASLT_MATMUL_TILE_24x128", + "CUBLASLT_MATMUL_TILE_248x64", + "CUBLASLT_MATMUL_TILE_248x192", + "CUBLASLT_MATMUL_TILE_248x128", + "CUBLASLT_MATMUL_TILE_240x64", + "CUBLASLT_MATMUL_TILE_240x192", + "CUBLASLT_MATMUL_TILE_240x128", + "CUBLASLT_MATMUL_TILE_232x64", + "CUBLASLT_MATMUL_TILE_232x192", + "CUBLASLT_MATMUL_TILE_232x128", + "CUBLASLT_MATMUL_TILE_224x64", + "CUBLASLT_MATMUL_TILE_224x192", + "CUBLASLT_MATMUL_TILE_224x128", + "CUBLASLT_MATMUL_TILE_216x64", + "CUBLASLT_MATMUL_TILE_216x192", + "CUBLASLT_MATMUL_TILE_216x128", + "CUBLASLT_MATMUL_TILE_208x64", + "CUBLASLT_MATMUL_TILE_208x192", + "CUBLASLT_MATMUL_TILE_208x128", + "CUBLASLT_MATMUL_TILE_200x64", + "CUBLASLT_MATMUL_TILE_200x192", + "CUBLASLT_MATMUL_TILE_200x128", + "CUBLASLT_MATMUL_TILE_192x96", + "CUBLASLT_MATMUL_TILE_192x88", + "CUBLASLT_MATMUL_TILE_192x80", + "CUBLASLT_MATMUL_TILE_192x8", + "CUBLASLT_MATMUL_TILE_192x72", + "CUBLASLT_MATMUL_TILE_192x64", + "CUBLASLT_MATMUL_TILE_192x56", + "CUBLASLT_MATMUL_TILE_192x48", + "CUBLASLT_MATMUL_TILE_192x40", + "CUBLASLT_MATMUL_TILE_192x336", + "CUBLASLT_MATMUL_TILE_192x328", + "CUBLASLT_MATMUL_TILE_192x320", + "CUBLASLT_MATMUL_TILE_192x32", + "CUBLASLT_MATMUL_TILE_192x312", + "CUBLASLT_MATMUL_TILE_192x304", + "CUBLASLT_MATMUL_TILE_192x296", + "CUBLASLT_MATMUL_TILE_192x288", + "CUBLASLT_MATMUL_TILE_192x280", + "CUBLASLT_MATMUL_TILE_192x272", + "CUBLASLT_MATMUL_TILE_192x264", + "CUBLASLT_MATMUL_TILE_192x256", + "CUBLASLT_MATMUL_TILE_192x248", + "CUBLASLT_MATMUL_TILE_192x240", + "CUBLASLT_MATMUL_TILE_192x24", + "CUBLASLT_MATMUL_TILE_192x232", + "CUBLASLT_MATMUL_TILE_192x224", + "CUBLASLT_MATMUL_TILE_192x216", + "CUBLASLT_MATMUL_TILE_192x208", + "CUBLASLT_MATMUL_TILE_192x200", + "CUBLASLT_MATMUL_TILE_192x192", + "CUBLASLT_MATMUL_TILE_192x184", + "CUBLASLT_MATMUL_TILE_192x176", + "CUBLASLT_MATMUL_TILE_192x168", + "CUBLASLT_MATMUL_TILE_192x160", + "CUBLASLT_MATMUL_TILE_192x16", + "CUBLASLT_MATMUL_TILE_192x152", + "CUBLASLT_MATMUL_TILE_192x144", + "CUBLASLT_MATMUL_TILE_192x136", + "CUBLASLT_MATMUL_TILE_192x128", + "CUBLASLT_MATMUL_TILE_192x120", + "CUBLASLT_MATMUL_TILE_192x112", + "CUBLASLT_MATMUL_TILE_192x104", + "CUBLASLT_MATMUL_TILE_184x64", + "CUBLASLT_MATMUL_TILE_184x256", + "CUBLASLT_MATMUL_TILE_184x192", + "CUBLASLT_MATMUL_TILE_184x128", + "CUBLASLT_MATMUL_TILE_176x64", + "CUBLASLT_MATMUL_TILE_176x256", + "CUBLASLT_MATMUL_TILE_176x192", + "CUBLASLT_MATMUL_TILE_176x128", + "CUBLASLT_MATMUL_TILE_16x8", + "CUBLASLT_MATMUL_TILE_16x768", + "CUBLASLT_MATMUL_TILE_16x704", + "CUBLASLT_MATMUL_TILE_16x640", + "CUBLASLT_MATMUL_TILE_16x64", + "CUBLASLT_MATMUL_TILE_16x576", + "CUBLASLT_MATMUL_TILE_16x512", + "CUBLASLT_MATMUL_TILE_16x448", + "CUBLASLT_MATMUL_TILE_16x384", + "CUBLASLT_MATMUL_TILE_16x320", + "CUBLASLT_MATMUL_TILE_16x32", + "CUBLASLT_MATMUL_TILE_16x256", + "CUBLASLT_MATMUL_TILE_16x192", + "CUBLASLT_MATMUL_TILE_16x16", + "CUBLASLT_MATMUL_TILE_16x128", + "CUBLASLT_MATMUL_TILE_168x64", + "CUBLASLT_MATMUL_TILE_168x256", + "CUBLASLT_MATMUL_TILE_168x192", + "CUBLASLT_MATMUL_TILE_168x128", + "CUBLASLT_MATMUL_TILE_160x64", + "CUBLASLT_MATMUL_TILE_160x256", + "CUBLASLT_MATMUL_TILE_160x192", + "CUBLASLT_MATMUL_TILE_160x128", + "CUBLASLT_MATMUL_TILE_152x64", + "CUBLASLT_MATMUL_TILE_152x320", + "CUBLASLT_MATMUL_TILE_152x256", + "CUBLASLT_MATMUL_TILE_152x192", + "CUBLASLT_MATMUL_TILE_152x128", + "CUBLASLT_MATMUL_TILE_144x64", + "CUBLASLT_MATMUL_TILE_144x320", + "CUBLASLT_MATMUL_TILE_144x256", + "CUBLASLT_MATMUL_TILE_144x192", + "CUBLASLT_MATMUL_TILE_144x128", + "CUBLASLT_MATMUL_TILE_136x64", + "CUBLASLT_MATMUL_TILE_136x320", + "CUBLASLT_MATMUL_TILE_136x256", + "CUBLASLT_MATMUL_TILE_136x192", + "CUBLASLT_MATMUL_TILE_136x128", + "CUBLASLT_MATMUL_TILE_128x96", + "CUBLASLT_MATMUL_TILE_128x88", + "CUBLASLT_MATMUL_TILE_128x80", + "CUBLASLT_MATMUL_TILE_128x8", + "CUBLASLT_MATMUL_TILE_128x72", + "CUBLASLT_MATMUL_TILE_128x64", + "CUBLASLT_MATMUL_TILE_128x56", + "CUBLASLT_MATMUL_TILE_128x512", + "CUBLASLT_MATMUL_TILE_128x504", + "CUBLASLT_MATMUL_TILE_128x496", + "CUBLASLT_MATMUL_TILE_128x488", + "CUBLASLT_MATMUL_TILE_128x480", + "CUBLASLT_MATMUL_TILE_128x48", + "CUBLASLT_MATMUL_TILE_128x472", + "CUBLASLT_MATMUL_TILE_128x464", + "CUBLASLT_MATMUL_TILE_128x456", + "CUBLASLT_MATMUL_TILE_128x448", + "CUBLASLT_MATMUL_TILE_128x440", + "CUBLASLT_MATMUL_TILE_128x432", + "CUBLASLT_MATMUL_TILE_128x424", + "CUBLASLT_MATMUL_TILE_128x416", + "CUBLASLT_MATMUL_TILE_128x408", + "CUBLASLT_MATMUL_TILE_128x400", + "CUBLASLT_MATMUL_TILE_128x40", + "CUBLASLT_MATMUL_TILE_128x392", + "CUBLASLT_MATMUL_TILE_128x384", + "CUBLASLT_MATMUL_TILE_128x376", + "CUBLASLT_MATMUL_TILE_128x368", + "CUBLASLT_MATMUL_TILE_128x360", + "CUBLASLT_MATMUL_TILE_128x352", + "CUBLASLT_MATMUL_TILE_128x344", + "CUBLASLT_MATMUL_TILE_128x336", + "CUBLASLT_MATMUL_TILE_128x328", + "CUBLASLT_MATMUL_TILE_128x320", + "CUBLASLT_MATMUL_TILE_128x32", + "CUBLASLT_MATMUL_TILE_128x312", + "CUBLASLT_MATMUL_TILE_128x304", + "CUBLASLT_MATMUL_TILE_128x296", + "CUBLASLT_MATMUL_TILE_128x288", + "CUBLASLT_MATMUL_TILE_128x280", + "CUBLASLT_MATMUL_TILE_128x272", + "CUBLASLT_MATMUL_TILE_128x264", + "CUBLASLT_MATMUL_TILE_128x256", + "CUBLASLT_MATMUL_TILE_128x248", + "CUBLASLT_MATMUL_TILE_128x240", + "CUBLASLT_MATMUL_TILE_128x24", + "CUBLASLT_MATMUL_TILE_128x232", + "CUBLASLT_MATMUL_TILE_128x224", + "CUBLASLT_MATMUL_TILE_128x216", + "CUBLASLT_MATMUL_TILE_128x208", + "CUBLASLT_MATMUL_TILE_128x200", + "CUBLASLT_MATMUL_TILE_128x192", + "CUBLASLT_MATMUL_TILE_128x184", + "CUBLASLT_MATMUL_TILE_128x176", + "CUBLASLT_MATMUL_TILE_128x168", + "CUBLASLT_MATMUL_TILE_128x160", + "CUBLASLT_MATMUL_TILE_128x16", + "CUBLASLT_MATMUL_TILE_128x152", + "CUBLASLT_MATMUL_TILE_128x144", + "CUBLASLT_MATMUL_TILE_128x136", + "CUBLASLT_MATMUL_TILE_128x128", + "CUBLASLT_MATMUL_TILE_128x120", + "CUBLASLT_MATMUL_TILE_128x112", + "CUBLASLT_MATMUL_TILE_128x104", + "CUBLASLT_MATMUL_TILE_120x64", + "CUBLASLT_MATMUL_TILE_120x384", + "CUBLASLT_MATMUL_TILE_120x320", + "CUBLASLT_MATMUL_TILE_120x256", + "CUBLASLT_MATMUL_TILE_120x192", + "CUBLASLT_MATMUL_TILE_120x128", + "CUBLASLT_MATMUL_TILE_112x64", + "CUBLASLT_MATMUL_TILE_112x384", + "CUBLASLT_MATMUL_TILE_112x320", + "CUBLASLT_MATMUL_TILE_112x256", + "CUBLASLT_MATMUL_TILE_112x192", + "CUBLASLT_MATMUL_TILE_112x128", + "CUBLASLT_MATMUL_TILE_104x64", + "CUBLASLT_MATMUL_TILE_104x448", + "CUBLASLT_MATMUL_TILE_104x384", + "CUBLASLT_MATMUL_TILE_104x320", + "CUBLASLT_MATMUL_TILE_104x256", + "CUBLASLT_MATMUL_TILE_104x192", + "CUBLASLT_MATMUL_TILE_104x128", + "CUBLASLT_MATMUL_STAGES_UNDEFINED", + "CUBLASLT_MATMUL_STAGES_END", + "CUBLASLT_MATMUL_STAGES_8xAUTO", + "CUBLASLT_MATMUL_STAGES_8x5", + "CUBLASLT_MATMUL_STAGES_8x4", + "CUBLASLT_MATMUL_STAGES_8x3", + "CUBLASLT_MATMUL_STAGES_64xAUTO", + "CUBLASLT_MATMUL_STAGES_64x6", + "CUBLASLT_MATMUL_STAGES_64x5", + "CUBLASLT_MATMUL_STAGES_64x4", + "CUBLASLT_MATMUL_STAGES_64x3", + "CUBLASLT_MATMUL_STAGES_64x2", + "CUBLASLT_MATMUL_STAGES_64x1", + "CUBLASLT_MATMUL_STAGES_32xAUTO", + "CUBLASLT_MATMUL_STAGES_32x6", + "CUBLASLT_MATMUL_STAGES_32x5", + "CUBLASLT_MATMUL_STAGES_32x4", + "CUBLASLT_MATMUL_STAGES_32x3", + "CUBLASLT_MATMUL_STAGES_32x2", + "CUBLASLT_MATMUL_STAGES_32x10", + "CUBLASLT_MATMUL_STAGES_32x1", + "CUBLASLT_MATMUL_STAGES_16xAUTO", + "CUBLASLT_MATMUL_STAGES_16x6", + "CUBLASLT_MATMUL_STAGES_16x5", + "CUBLASLT_MATMUL_STAGES_16x4", + "CUBLASLT_MATMUL_STAGES_16x3", + "CUBLASLT_MATMUL_STAGES_16x2", + "CUBLASLT_MATMUL_STAGES_16x10", + "CUBLASLT_MATMUL_STAGES_16x1", + "CUBLASLT_MATMUL_STAGES_128xAUTO", + "CUBLASLT_MATMUL_STAGES_128x6", + "CUBLASLT_MATMUL_STAGES_128x5", + "CUBLASLT_MATMUL_STAGES_128x4", + "CUBLASLT_MATMUL_STAGES_128x3", + "CUBLASLT_MATMUL_STAGES_128x2", + "CUBLASLT_MATMUL_STAGES_128x1", + "CUBLASLT_MATMUL_PREF_REDUCTION_SCHEME_MASK", + "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_D_BYTES", + "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_C_BYTES", + "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_B_BYTES", + "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_A_BYTES", + "CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT", + "CUBLASLT_MATMUL_PREF_IMPL_MASK", + "CUBLASLT_MATMUL_INNER_SHAPE_UNDEFINED", + "CUBLASLT_MATMUL_INNER_SHAPE_MMA884", + "CUBLASLT_MATMUL_INNER_SHAPE_MMA1688", + "CUBLASLT_MATMUL_INNER_SHAPE_MMA1684", + "CUBLASLT_MATMUL_INNER_SHAPE_MMA16816", + "CUBLASLT_MATMUL_INNER_SHAPE_END", + "CUBLASLT_MATMUL_DESC_TRANSC", + "CUBLASLT_MATMUL_DESC_SM_COUNT_TARGET", + "CUBLASLT_MATMUL_DESC_SCALE_TYPE", + "CUBLASLT_MATMUL_DESC_FILL_MODE", + "CUBLASLT_MATMUL_DESC_FAST_ACCUM", + "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_DATA_TYPE", + "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_AMAX_POINTER", + "CUBLASLT_MATMUL_DESC_COMPUTE_TYPE", + "CUBLASLT_MATMUL_DESC_BIAS_BATCH_STRIDE", + "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_OUT_COUNTERS_POINTER", + "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_NUM_CHUNKS_D_ROWS", + "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_NUM_CHUNKS_D_COLS", + "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_IN_COUNTERS_POINTER", + "CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE", + "CUBLASLT_EPILOGUE_RELU_AUX_BIAS", + "CUBLASLT_EPILOGUE_RELU_AUX", + "CUBLASLT_EPILOGUE_DRELU_BGRAD", + "CUBLASLT_EPILOGUE_DRELU", + "CUBLASLT_CLUSTER_SHAPE_END", + "CUBLASLT_CLUSTER_SHAPE_AUTO", + "CUBLASLT_CLUSTER_SHAPE_9x1x1", + "CUBLASLT_CLUSTER_SHAPE_8x2x1", + "CUBLASLT_CLUSTER_SHAPE_8x1x1", + "CUBLASLT_CLUSTER_SHAPE_7x2x1", + "CUBLASLT_CLUSTER_SHAPE_7x1x1", + "CUBLASLT_CLUSTER_SHAPE_6x2x1", + "CUBLASLT_CLUSTER_SHAPE_6x1x1", + "CUBLASLT_CLUSTER_SHAPE_5x3x1", + "CUBLASLT_CLUSTER_SHAPE_5x2x1", + "CUBLASLT_CLUSTER_SHAPE_5x1x1", + "CUBLASLT_CLUSTER_SHAPE_4x4x1", + "CUBLASLT_CLUSTER_SHAPE_4x3x1", + "CUBLASLT_CLUSTER_SHAPE_4x2x1", + "CUBLASLT_CLUSTER_SHAPE_4x1x1", + "CUBLASLT_CLUSTER_SHAPE_3x5x1", + "CUBLASLT_CLUSTER_SHAPE_3x4x1", + "CUBLASLT_CLUSTER_SHAPE_3x3x1", + "CUBLASLT_CLUSTER_SHAPE_3x2x1", + "CUBLASLT_CLUSTER_SHAPE_3x1x1", + "CUBLASLT_CLUSTER_SHAPE_2x8x1", + "CUBLASLT_CLUSTER_SHAPE_2x7x1", + "CUBLASLT_CLUSTER_SHAPE_2x6x1", + "CUBLASLT_CLUSTER_SHAPE_2x5x1", + "CUBLASLT_CLUSTER_SHAPE_2x4x1", + "CUBLASLT_CLUSTER_SHAPE_2x3x1", + "CUBLASLT_CLUSTER_SHAPE_2x2x1", + "CUBLASLT_CLUSTER_SHAPE_2x1x1", + "CUBLASLT_CLUSTER_SHAPE_1x9x1", + "CUBLASLT_CLUSTER_SHAPE_1x8x1", + "CUBLASLT_CLUSTER_SHAPE_1x7x1", + "CUBLASLT_CLUSTER_SHAPE_1x6x1", + "CUBLASLT_CLUSTER_SHAPE_1x5x1", + "CUBLASLT_CLUSTER_SHAPE_1x4x1", + "CUBLASLT_CLUSTER_SHAPE_1x3x1", + "CUBLASLT_CLUSTER_SHAPE_1x2x1", + "CUBLASLT_CLUSTER_SHAPE_1x1x1", + "CUBLASLT_CLUSTER_SHAPE_1x16x1", + "CUBLASLT_CLUSTER_SHAPE_1x15x1", + "CUBLASLT_CLUSTER_SHAPE_1x14x1", + "CUBLASLT_CLUSTER_SHAPE_1x13x1", + "CUBLASLT_CLUSTER_SHAPE_1x12x1", + "CUBLASLT_CLUSTER_SHAPE_1x11x1", + "CUBLASLT_CLUSTER_SHAPE_1x10x1", + "CUBLASLT_CLUSTER_SHAPE_16x1x1", + "CUBLASLT_CLUSTER_SHAPE_15x1x1", + "CUBLASLT_CLUSTER_SHAPE_14x1x1", + "CUBLASLT_CLUSTER_SHAPE_13x1x1", + "CUBLASLT_CLUSTER_SHAPE_12x1x1", + "CUBLASLT_CLUSTER_SHAPE_11x1x1", + "CUBLASLT_CLUSTER_SHAPE_10x1x1", + "CUBLASLT_ALGO_CONFIG_TILE_ID", + "CUBLASLT_ALGO_CONFIG_STAGES_ID", + "CUBLASLT_ALGO_CONFIG_SPLITK_NUM", + "CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME", + "CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID", + "CUBLASLT_ALGO_CONFIG_ID", + "CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION", + "CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING", + "CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID", + "CUBLASLT_ALGO_CAP_UPLO_SUPPORT", + "CUBLASLT_ALGO_CAP_TILE_IDS", + "CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT", + "CUBLASLT_ALGO_CAP_STAGES_IDS", + "CUBLASLT_ALGO_CAP_SPLITK_SUPPORT", + "CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK", + "CUBLASLT_ALGO_CAP_POINTER_MODE_MASK", + "CUBLASLT_ALGO_CAP_OUT_OF_PLACE_RESULT_SUPPORT", + "CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS", + "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES", + "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES", + "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_B_BYTES", + "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_A_BYTES", + "CUBLASLT_ALGO_CAP_LD_NEGATIVE", + "CUBLASLT_ALGO_CAP_EPILOGUE_MASK", + "CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX", + "CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER", + "CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT", + "CUBLASLT_ALGO_CAP_ATOMIC_SYNC" +); + sub warnHipOnlyUnsupportedFunctions { my $line_num = shift; my $k = 0; - foreach $func ( - "cutensorWriteKernelCacheToFile", - "cutensorTensorDescriptor", - "cutensorReduce", - "cutensorReadKernelCacheFromFile", - "cutensorPlanPreferenceSetAttribute", - "cutensorPlanPreferenceAttribute_t", - "cutensorPlanGetAttribute", - "cutensorPlanAttribute_t", - "cutensorPlan", - "cutensorPermute", - "cutensorOperationDescriptorSetAttribute", - "cutensorOperationDescriptorGetAttribute", - "cutensorOperationDescriptorAttribute_t", - "cutensorMgTensorDescriptor_t", - "cutensorMgTensorDescriptor_s", - "cutensorMgHostDevice_t", - "cutensorMgHandle_t", - "cutensorMgHandle_s", - "cutensorMgDestroyTensorDescriptor", - "cutensorMgDestroyCopyPlan", - "cutensorMgDestroyCopyDescriptor", - "cutensorMgDestroyContractionPlan", - "cutensorMgDestroyContractionFind", - "cutensorMgDestroyContractionDescriptor", - "cutensorMgDestroy", - "cutensorMgCreateTensorDescriptor", - "cutensorMgCreateCopyPlan", - "cutensorMgCreateCopyDescriptor", - "cutensorMgCreateContractionPlan", - "cutensorMgCreateContractionFind", - "cutensorMgCreateContractionDescriptor", - "cutensorMgCreate", - "cutensorMgCopyPlan_t", - "cutensorMgCopyPlan_s", - "cutensorMgCopyGetWorkspace", - "cutensorMgCopyDescriptor_t", - "cutensorMgCopyDescriptor_s", - "cutensorMgCopy", - "cutensorMgContractionPlan_t", - "cutensorMgContractionPlan_s", - "cutensorMgContractionGetWorkspace", - "cutensorMgContractionFind_t", - "cutensorMgContractionFind_s", - "cutensorMgContractionFindSetAttribute", - "cutensorMgContractionFindAttribute_t", - "cutensorMgContractionDescriptor_t", - "cutensorMgContractionDescriptor_s", - "cutensorMgContraction", - "cutensorMgAlgo_t", - "cutensorJitMode_t", - "cutensorHandleWritePlanCacheToFile", - "cutensorHandleResizePlanCache", - "cutensorHandleReadPlanCacheFromFile", - "cutensorHandle", - "cutensorGetVersion", - "cutensorEstimateWorkspaceSize", - "cutensorElementwiseTrinaryExecute", - "cutensorElementwiseBinaryExecute", - "cutensorDestroyTensorDescriptor", - "cutensorDestroyPlanPreference", - "cutensorDestroyPlan", - "cutensorDestroyOperationDescriptor", - "cutensorCreateTensorDescriptor", - "cutensorCreateReduction", - "cutensorCreatePlanPreference", - "cutensorCreatePlan", - "cutensorCreatePermutation", - "cutensorCreateElementwiseTrinary", - "cutensorCreateElementwiseBinary", - "cutensorCreateContraction", - "cutensorCacheMode_t", - "cutensorAutotuneMode_t", - "cusparseZhybsv_solve", - "cusparseZhybsv_analysis", - "cusparseZhyb2dense", - "cusparseZhyb2csc", - "cusparseZgtsv_nopivot", - "cusparseZgtsvStridedBatch", - "cusparseZgtsv", - "cusparseZgebsr2gebsr_bufferSizeExt", - "cusparseZgebsr2gebsc_bufferSizeExt", - "cusparseZdense2hyb", - "cusparseZcsrsv_solve", - "cusparseZcsrsv_analysis", - "cusparseZcsrsm_solve", - "cusparseZcsrsm_analysis", - "cusparseZcsrmv_mp", - "cusparseZcsrilu0", - "cusparseZcsric0", - "cusparseZcsr2gebsr_bufferSizeExt", - "cusparseZcsc2hyb", - "cusparseZbsrsm2_bufferSizeExt", - "cusparseZbsrilu02_bufferSizeExt", - "cusparseZbsric02_bufferSizeExt", - "cusparseXgebsr2csr", - "cusparseSpVecDescr", - "cusparseSpSV_updateMatrix", - "cusparseSpSVUpdate_t", - "cusparseSpSM_updateMatrix", - "cusparseSpSMUpdate_t", - "cusparseSpMatSetNumBatches", - "cusparseSpMatGetNumBatches", - "cusparseSpMatDescr", - "cusparseSpMMOp_destroyPlan", - "cusparseSpMMOp_createPlan", - "cusparseSpMMOpPlan_t", - "cusparseSpMMOpPlan", - "cusparseSpMMOpAlg_t", - "cusparseSpMMOp", - "cusparseSpGEMM_getNumProducts", - "cusparseSpGEMM_estimateMemory", - "cusparseSolveAnalysisInfo_t", - "cusparseSolveAnalysisInfo", - "cusparseSideMode_t", - "cusparseShybsv_solve", - "cusparseShybsv_analysis", - "cusparseShyb2dense", - "cusparseShyb2csc", - "cusparseSgtsv_nopivot", - "cusparseSgtsvStridedBatch", - "cusparseSgtsv", - "cusparseSgebsr2gebsr_bufferSizeExt", - "cusparseSgebsr2gebsc_bufferSizeExt", - "cusparseSdense2hyb", - "cusparseScsrsv_solve", - "cusparseScsrsv_analysis", - "cusparseScsrsm_solve", - "cusparseScsrsm_analysis", - "cusparseScsrmv_mp", - "cusparseScsrilu0", - "cusparseScsric0", - "cusparseScsr2gebsr_bufferSizeExt", - "cusparseScsc2hyb", - "cusparseSbsrsm2_bufferSizeExt", - "cusparseSbsrilu02_bufferSizeExt", - "cusparseSbsric02_bufferSizeExt", - "cusparseMatDescr", - "cusparseLoggerSetMask", - "cusparseLoggerSetLevel", - "cusparseLoggerSetFile", - "cusparseLoggerSetCallback", - "cusparseLoggerOpenFile", - "cusparseLoggerForceDisable", - "cusparseLoggerCallback_t", - "cusparseHybMat", - "cusparseHpruneDense2csr_bufferSizeExt", - "cusparseHpruneDense2csrNnzByPercentage", - "cusparseHpruneDense2csrNnz", - "cusparseHpruneDense2csrByPercentage_bufferSizeExt", - "cusparseHpruneDense2csrByPercentage", - "cusparseHpruneDense2csr", - "cusparseHpruneCsr2csr_bufferSizeExt", - "cusparseHpruneCsr2csrNnzByPercentage", - "cusparseHpruneCsr2csrNnz", - "cusparseHpruneCsr2csrByPercentage_bufferSizeExt", - "cusparseHpruneCsr2csrByPercentage", - "cusparseHpruneCsr2csr", - "cusparseGetLevelInfo", - "cusparseDnVecDescr", - "cusparseDnMatDescr", - "cusparseDhybsv_solve", - "cusparseDhybsv_analysis", - "cusparseDhyb2dense", - "cusparseDhyb2csc", - "cusparseDgtsv_nopivot", - "cusparseDgtsvStridedBatch", - "cusparseDgtsv", - "cusparseDgebsr2gebsr_bufferSizeExt", - "cusparseDgebsr2gebsc_bufferSizeExt", - "cusparseDestroySolveAnalysisInfo", - "cusparseDdense2hyb", - "cusparseDcsrsv_solve", - "cusparseDcsrsv_analysis", - "cusparseDcsrsm_solve", - "cusparseDcsrsm_analysis", - "cusparseDcsrmv_mp", - "cusparseDcsrilu0", - "cusparseDcsric0", - "cusparseDcsr2gebsr_bufferSizeExt", - "cusparseDcsc2hyb", - "cusparseDbsrsm2_bufferSizeExt", - "cusparseDbsrilu02_bufferSizeExt", - "cusparseDbsric02_bufferSizeExt", - "cusparseCsrsv_solveEx", - "cusparseCsrsv_analysisEx", - "cusparseCsrmvEx_bufferSize", - "cusparseCsrmvEx", - "cusparseCsrilu0Ex", - "cusparseCsr2cscEx", - "cusparseCreateSolveAnalysisInfo", - "cusparseCreateSlicedEll", - "cusparseCreateConstSlicedEll", - "cusparseCreateConstBsr", - "cusparseCreateBsr", - "cusparseContext", - "cusparseConstrainedGeMM_bufferSize", - "cusparseConstrainedGeMM", - "cusparseColorInfo", - "cusparseColorAlg_t", - "cusparseChybsv_solve", - "cusparseChybsv_analysis", - "cusparseChyb2dense", - "cusparseChyb2csc", - "cusparseCgtsv_nopivot", - "cusparseCgtsvStridedBatch", - "cusparseCgtsv", - "cusparseCgebsr2gebsr_bufferSizeExt", - "cusparseCgebsr2gebsc_bufferSizeExt", - "cusparseCdense2hyb", - "cusparseCcsrsv_solve", - "cusparseCcsrsv_analysis", - "cusparseCcsrsm_solve", - "cusparseCcsrsm_analysis", - "cusparseCcsrmv_mp", - "cusparseCcsrilu0", - "cusparseCcsric0", - "cusparseCcsr2gebsr_bufferSizeExt", - "cusparseCcsc2hyb", - "cusparseCbsrsm2_bufferSizeExt", - "cusparseCbsrilu02_bufferSizeExt", - "cusparseCbsric02_bufferSizeExt", - "cusparseBsrSetStridedBatch", - "cusparseAlgMode_t", - "curand_mtgp32_specific", - "curand_mtgp32_single_specific", - "curand_mtgp32_single", - "curand_Philox4x32_10", - "curandMethod_t", - "curandMethod", - "curandHistogramM2_t", - "curandHistogramM2_st", - "curandHistogramM2V_t", - "curandHistogramM2V_st", - "curandHistogramM2K_t", - "curandHistogramM2K_st", - "curandGetProperty", - "curandDistribution_t", - "curandDistribution_st", - "curandDistributionShift_t", - "curandDistributionShift_st", - "curandDistributionM2Shift_t", - "curandDistributionM2Shift_st", - "cublasZtrttp", - "cublasZtpttr", - "cublasZmatinvBatched", - "cublasZgemm3m_64", - "cublasZgemm3m", - "cublasXerbla", - "cublasUint8gemmBias", - "cublasTSTgemvStridedBatched_64", - "cublasTSTgemvStridedBatched", - "cublasTSTgemvBatched_64", - "cublasTSTgemvBatched", - "cublasTSSgemvStridedBatched_64", - "cublasTSSgemvStridedBatched", - "cublasTSSgemvBatched_64", - "cublasTSSgemvBatched", - "cublasSwapEx_64", - "cublasSwapEx", - "cublasStrttp", - "cublasStpttr", - "cublasSmatinvBatched", - "cublasShutdown", - "cublasSgemmGroupedBatched_64", - "cublasSgemmGroupedBatched", - "cublasSgemmEx_64", - "cublasSgemmEx", - "cublasSetVector_64", - "cublasSetVectorAsync_64", - "cublasSetSmCountTarget", - "cublasSetMatrix_64", - "cublasSetMatrixAsync_64", - "cublasSetLoggerCallback", - "cublasSetKernelStream", - "cublasRotmgEx", - "cublasRotmEx_64", - "cublasRotmEx", - "cublasRotgEx", - "cublasMigrateComputeType", - "cublasLtReductionScheme_t", - "cublasLtPointerModeMask_t", - "cublasLtNumericalImplFlags_t", - "cublasLtMatrixTransformDescInit", - "cublasLtMatrixLayoutInit", - "cublasLtMatmulTile_t", - "cublasLtMatmulStages_t", - "cublasLtMatmulSearch_t", - "cublasLtMatmulPreferenceInit", - "cublasLtMatmulInnerShape_t", - "cublasLtMatmulDescInit", - "cublasLtMatmulAlgoInit", - "cublasLtMatmulAlgoGetIds", - "cublasLtMatmulAlgoConfigSetAttribute", - "cublasLtMatmulAlgoConfigGetAttribute", - "cublasLtMatmulAlgoConfigAttributes_t", - "cublasLtMatmulAlgoCheck", - "cublasLtMatmulAlgoCapGetAttribute", - "cublasLtMatmulAlgoCapAttributes_t", - "cublasLtLoggerSetMask", - "cublasLtLoggerSetLevel", - "cublasLtLoggerSetFile", - "cublasLtLoggerSetCallback", - "cublasLtLoggerOpenFile", - "cublasLtLoggerForceDisable", - "cublasLtLoggerCallback_t", - "cublasLtHeuristicsCacheSetCapacity", - "cublasLtHeuristicsCacheGetCapacity", - "cublasLtGetVersion", - "cublasLtGetStatusString", - "cublasLtGetStatusName", - "cublasLtGetProperty", - "cublasLtGetCudartVersion", - "cublasLtDisableCpuInstructionsSetMask", - "cublasLtContext", - "cublasLtClusterShape_t", - "cublasLoggerConfigure", - "cublasLogCallback", - "cublasInit", - "cublasIaminEx_64", - "cublasIaminEx", - "cublasIamaxEx_64", - "cublasIamaxEx", - "cublasHSSgemvStridedBatched_64", - "cublasHSSgemvStridedBatched", - "cublasHSSgemvBatched_64", - "cublasHSSgemvBatched", - "cublasHSHgemvStridedBatched_64", - "cublasHSHgemvStridedBatched", - "cublasHSHgemvBatched_64", - "cublasHSHgemvBatched", - "cublasGetVersion_v2", - "cublasGetVersion", - "cublasGetVector_64", - "cublasGetVectorAsync_64", - "cublasGetStatusString", - "cublasGetStatusName", - "cublasGetSmCountTarget", - "cublasGetProperty", - "cublasGetMatrix_64", - "cublasGetMatrixAsync_64", - "cublasGetLoggerCallback", - "cublasGetError", - "cublasGetCudartVersion", - "cublasGemmGroupedBatchedEx_64", - "cublasGemmGroupedBatchedEx", - "cublasFree", - "cublasDtrttp", - "cublasDtpttr", - "cublasDmatinvBatched", - "cublasDgemmGroupedBatched_64", - "cublasDgemmGroupedBatched", - "cublasCtrttp", - "cublasCtpttr", - "cublasCsyrkEx_64", - "cublasCsyrkEx", - "cublasCsyrk3mEx_64", - "cublasCsyrk3mEx", - "cublasCopyEx_64", - "cublasCopyEx", - "cublasContext", - "cublasCmatinvBatched", - "cublasCherkEx_64", - "cublasCherkEx", - "cublasCherk3mEx_64", - "cublasCherk3mEx", - "cublasCgemmEx_64", - "cublasCgemmEx", - "cublasCgemm3m_64", - "cublasCgemm3mStridedBatched_64", - "cublasCgemm3mStridedBatched", - "cublasCgemm3mEx_64", - "cublasCgemm3mEx", - "cublasCgemm3mBatched_64", - "cublasCgemm3mBatched", - "cublasCgemm3m", - "cublasAsumEx_64", - "cublasAsumEx", - "cublasAlloc", - "csrsv2Info", - "csrsm2Info", - "__curand_umul", - "CUTENSOR_WORKSPACE_DEFAULT", - "CUTENSOR_STATUS_MAPPING_ERROR", - "CUTENSOR_STATUS_LICENSE_ERROR", - "CUTENSOR_STATUS_CUDA_ERROR", - "CUTENSOR_STATUS_CUBLAS_ERROR", - "CUTENSOR_R_MIN_TF32", - "CUTENSOR_R_MIN_8U", - "CUTENSOR_R_MIN_8I", - "CUTENSOR_R_MIN_64F", - "CUTENSOR_R_MIN_32U", - "CUTENSOR_R_MIN_32I", - "CUTENSOR_R_MIN_32F", - "CUTENSOR_R_MIN_16F", - "CUTENSOR_R_MIN_16BF", - "CUTENSOR_R_64U", - "CUTENSOR_R_64I", - "CUTENSOR_R_4U", - "CUTENSOR_R_4I", - "CUTENSOR_R_16U", - "CUTENSOR_R_16I", - "CUTENSOR_PLAN_REQUIRED_WORKSPACE", - "CUTENSOR_PLAN_PREFERENCE_KERNEL_RANK", - "CUTENSOR_PLAN_PREFERENCE_JIT", - "CUTENSOR_PLAN_PREFERENCE_INCREMENTAL_COUNT", - "CUTENSOR_PLAN_PREFERENCE_CACHE_MODE", - "CUTENSOR_PLAN_PREFERENCE_AUTOTUNE_MODE", - "CUTENSOR_PLAN_PREFERENCE_ALGO", - "CUTENSOR_OP_TANH", - "CUTENSOR_OP_TAN", - "CUTENSOR_OP_SWISH", - "CUTENSOR_OP_SOFT_SIGN", - "CUTENSOR_OP_SOFT_PLUS", - "CUTENSOR_OP_SINH", - "CUTENSOR_OP_SIN", - "CUTENSOR_OP_SIGMOID", - "CUTENSOR_OP_RELU", - "CUTENSOR_OP_RCP", - "CUTENSOR_OP_NEG", - "CUTENSOR_OP_MISH", - "CUTENSOR_OP_LOG", - "CUTENSOR_OP_FLOOR", - "CUTENSOR_OP_EXP", - "CUTENSOR_OP_COSH", - "CUTENSOR_OP_COS", - "CUTENSOR_OP_CONJ", - "CUTENSOR_OP_CEIL", - "CUTENSOR_OP_ATANH", - "CUTENSOR_OP_ATAN", - "CUTENSOR_OP_ASINH", - "CUTENSOR_OP_ASIN", - "CUTENSOR_OP_ACOSH", - "CUTENSOR_OP_ACOS", - "CUTENSOR_OP_ABS", - "CUTENSOR_OPERATION_DESCRIPTOR_TAG", - "CUTENSOR_OPERATION_DESCRIPTOR_SCALAR_TYPE", - "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_VALUE", - "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_RIGHT", - "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_LEFT", - "CUTENSOR_OPERATION_DESCRIPTOR_MOVED_BYTES", - "CUTENSOR_OPERATION_DESCRIPTOR_FLOPS", - "CUTENSOR_MG_DEVICE_HOST_PINNED", - "CUTENSOR_MG_DEVICE_HOST", - "CUTENSOR_JIT_MODE_NONE", - "CUTENSOR_JIT_MODE_DEFAULT", - "CUTENSOR_C_MIN_TF32", - "CUTENSOR_C_MIN_64F", - "CUTENSOR_C_MIN_32F", - "CUTENSOR_C_MIN_16F", - "CUTENSOR_C_8U", - "CUTENSOR_C_8I", - "CUTENSOR_C_64U", - "CUTENSOR_C_64I", - "CUTENSOR_C_4U", - "CUTENSOR_C_4I", - "CUTENSOR_C_32U", - "CUTENSOR_C_32I", - "CUTENSOR_C_16U", - "CUTENSOR_C_16I", - "CUTENSOR_C_16F", - "CUTENSOR_C_16BF", - "CUTENSOR_COMPUTE_TF32", - "CUTENSOR_COMPUTE_3XTF32", - "CUTENSOR_CACHE_MODE_PEDANTIC", - "CUTENSOR_CACHE_MODE_NONE", - "CUTENSOR_AUTOTUNE_MODE_NONE", - "CUTENSOR_AUTOTUNE_MODE_INCREMENTAL", - "CUTENSOR_ALGO_TTGT", - "CUTENSOR_ALGO_TGETT", - "CUTENSOR_ALGO_GETT", - "CUTENSORMG_CONTRACTION_FIND_ATTRIBUTE_MAX", - "CUTENSORMG_ALGO_DEFAULT", - "CUSPARSE_SPSV_UPDATE_GENERAL", - "CUSPARSE_SPSV_UPDATE_DIAGONAL", - "CUSPARSE_SPSM_UPDATE_GENERAL", - "CUSPARSE_SPSM_UPDATE_DIAGONAL", - "CUSPARSE_SPMV_SELL_ALG1", - "CUSPARSE_SPMM_OP_ALG_DEFAULT", - "CUSPARSE_SPMM_BSR_ALG1", - "CUSPARSE_SPMMA_PREPROCESS", - "CUSPARSE_SPMMA_ALG4", - "CUSPARSE_SPMMA_ALG3", - "CUSPARSE_SPMMA_ALG2", - "CUSPARSE_SPMMA_ALG1", - "CUSPARSE_SIDE_RIGHT", - "CUSPARSE_SIDE_LEFT", - "CUSPARSE_FORMAT_SLICED_ELLPACK", - "CUSPARSE_FORMAT_BSR", - "CUSPARSE_COLOR_ALG1", - "CUSPARSE_COLOR_ALG0", - "CUSPARSE_ALG_NAIVE", - "CUSPARSE_ALG_MERGE_PATH", - "CUSPARSE_ALG1", - "CUSPARSE_ALG0", - "CURAND_REJECTION", - "CURAND_POISSON", - "CURAND_M2", - "CURAND_M1", - "CURAND_KNUTH", - "CURAND_ITR", - "CURAND_HITR", - "CURAND_FAST_REJECTION", - "CURAND_DISCRETE_GAUSS", - "CURAND_DEVICE_API", - "CURAND_DEFINITION", - "CURAND_CHOOSE_BEST", - "CURAND_BINARY_SEARCH", - "CURAND_3RD", - "CUDA_R_8F_E5M2", - "CUDA_R_8F_E4M3", - "CUDA_R_64U", - "CUDA_R_64I", - "CUDA_R_4U", - "CUDA_R_4I", - "CUDA_R_16U", - "CUDA_R_16I", - "CUDA_C_64U", - "CUDA_C_64I", - "CUDA_C_4U", - "CUDA_C_4I", - "CUDA_C_16U", - "CUDA_C_16I", - "CUBLAS_OP_CONJG", - "CUBLAS_GEMM_DFALT_TENSOR_OP", - "CUBLAS_GEMM_DEFAULT_TENSOR_OP", - "CUBLAS_GEMM_ALGO9_TENSOR_OP", - "CUBLAS_GEMM_ALGO9", - "CUBLAS_GEMM_ALGO8_TENSOR_OP", - "CUBLAS_GEMM_ALGO8", - "CUBLAS_GEMM_ALGO7_TENSOR_OP", - "CUBLAS_GEMM_ALGO7", - "CUBLAS_GEMM_ALGO6_TENSOR_OP", - "CUBLAS_GEMM_ALGO6", - "CUBLAS_GEMM_ALGO5_TENSOR_OP", - "CUBLAS_GEMM_ALGO5", - "CUBLAS_GEMM_ALGO4_TENSOR_OP", - "CUBLAS_GEMM_ALGO4", - "CUBLAS_GEMM_ALGO3_TENSOR_OP", - "CUBLAS_GEMM_ALGO3", - "CUBLAS_GEMM_ALGO2_TENSOR_OP", - "CUBLAS_GEMM_ALGO23", - "CUBLAS_GEMM_ALGO22", - "CUBLAS_GEMM_ALGO21", - "CUBLAS_GEMM_ALGO20", - "CUBLAS_GEMM_ALGO2", - "CUBLAS_GEMM_ALGO1_TENSOR_OP", - "CUBLAS_GEMM_ALGO19", - "CUBLAS_GEMM_ALGO18", - "CUBLAS_GEMM_ALGO17", - "CUBLAS_GEMM_ALGO16", - "CUBLAS_GEMM_ALGO15_TENSOR_OP", - "CUBLAS_GEMM_ALGO15", - "CUBLAS_GEMM_ALGO14_TENSOR_OP", - "CUBLAS_GEMM_ALGO14", - "CUBLAS_GEMM_ALGO13_TENSOR_OP", - "CUBLAS_GEMM_ALGO13", - "CUBLAS_GEMM_ALGO12_TENSOR_OP", - "CUBLAS_GEMM_ALGO12", - "CUBLAS_GEMM_ALGO11_TENSOR_OP", - "CUBLAS_GEMM_ALGO11", - "CUBLAS_GEMM_ALGO10_TENSOR_OP", - "CUBLAS_GEMM_ALGO10", - "CUBLAS_GEMM_ALGO1", - "CUBLAS_GEMM_ALGO0_TENSOR_OP", - "CUBLAS_GEMM_ALGO0", - "CUBLASLT_SEARCH_RESERVED_09", - "CUBLASLT_SEARCH_RESERVED_08", - "CUBLASLT_SEARCH_RESERVED_07", - "CUBLASLT_SEARCH_RESERVED_06", - "CUBLASLT_SEARCH_RESERVED_05", - "CUBLASLT_SEARCH_RESERVED_04", - "CUBLASLT_SEARCH_RESERVED_03", - "CUBLASLT_SEARCH_RESERVED_02", - "CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID", - "CUBLASLT_SEARCH_BEST_FIT", - "CUBLASLT_REDUCTION_SCHEME_OUTPUT_TYPE", - "CUBLASLT_REDUCTION_SCHEME_NONE", - "CUBLASLT_REDUCTION_SCHEME_MASK", - "CUBLASLT_REDUCTION_SCHEME_INPLACE", - "CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE", - "CUBLASLT_POINTER_MODE_MASK_HOST", - "CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR", - "CUBLASLT_POINTER_MODE_MASK_DEVICE", - "CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO", - "CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_HOST", - "CUBLASLT_POINTER_MODE_DEVICE_VECTOR", - "CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO", - "CUBLASLT_ORDER_COL4_4R2_8C", - "CUBLASLT_ORDER_COL32_2R_4R4", - "CUBLASLT_ORDER_COL32", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_TENSOR_OP_MASK", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_TYPE_MASK", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_TF32", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8I", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E5M2", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E4M3", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_64F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_32F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16BF", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_IMMA", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_FMA", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_DMMA", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_TYPE_MASK", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_64F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32I", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_16F", - "CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET", - "CUBLASLT_MATMUL_TILE_UNDEFINED", - "CUBLASLT_MATMUL_TILE_END", - "CUBLASLT_MATMUL_TILE_96x64", - "CUBLASLT_MATMUL_TILE_96x512", - "CUBLASLT_MATMUL_TILE_96x448", - "CUBLASLT_MATMUL_TILE_96x384", - "CUBLASLT_MATMUL_TILE_96x320", - "CUBLASLT_MATMUL_TILE_96x256", - "CUBLASLT_MATMUL_TILE_96x192", - "CUBLASLT_MATMUL_TILE_96x128", - "CUBLASLT_MATMUL_TILE_8x8", - "CUBLASLT_MATMUL_TILE_8x768", - "CUBLASLT_MATMUL_TILE_8x704", - "CUBLASLT_MATMUL_TILE_8x640", - "CUBLASLT_MATMUL_TILE_8x64", - "CUBLASLT_MATMUL_TILE_8x576", - "CUBLASLT_MATMUL_TILE_8x512", - "CUBLASLT_MATMUL_TILE_8x448", - "CUBLASLT_MATMUL_TILE_8x384", - "CUBLASLT_MATMUL_TILE_8x320", - "CUBLASLT_MATMUL_TILE_8x32", - "CUBLASLT_MATMUL_TILE_8x256", - "CUBLASLT_MATMUL_TILE_8x192", - "CUBLASLT_MATMUL_TILE_8x16", - "CUBLASLT_MATMUL_TILE_8x128", - "CUBLASLT_MATMUL_TILE_88x64", - "CUBLASLT_MATMUL_TILE_88x512", - "CUBLASLT_MATMUL_TILE_88x448", - "CUBLASLT_MATMUL_TILE_88x384", - "CUBLASLT_MATMUL_TILE_88x320", - "CUBLASLT_MATMUL_TILE_88x256", - "CUBLASLT_MATMUL_TILE_88x192", - "CUBLASLT_MATMUL_TILE_88x128", - "CUBLASLT_MATMUL_TILE_80x64", - "CUBLASLT_MATMUL_TILE_80x576", - "CUBLASLT_MATMUL_TILE_80x512", - "CUBLASLT_MATMUL_TILE_80x448", - "CUBLASLT_MATMUL_TILE_80x384", - "CUBLASLT_MATMUL_TILE_80x320", - "CUBLASLT_MATMUL_TILE_80x256", - "CUBLASLT_MATMUL_TILE_80x192", - "CUBLASLT_MATMUL_TILE_80x128", - "CUBLASLT_MATMUL_TILE_768x80", - "CUBLASLT_MATMUL_TILE_768x8", - "CUBLASLT_MATMUL_TILE_768x72", - "CUBLASLT_MATMUL_TILE_768x64", - "CUBLASLT_MATMUL_TILE_768x56", - "CUBLASLT_MATMUL_TILE_768x48", - "CUBLASLT_MATMUL_TILE_768x40", - "CUBLASLT_MATMUL_TILE_768x32", - "CUBLASLT_MATMUL_TILE_768x24", - "CUBLASLT_MATMUL_TILE_768x16", - "CUBLASLT_MATMUL_TILE_760x64", - "CUBLASLT_MATMUL_TILE_752x64", - "CUBLASLT_MATMUL_TILE_744x64", - "CUBLASLT_MATMUL_TILE_736x64", - "CUBLASLT_MATMUL_TILE_72x640", - "CUBLASLT_MATMUL_TILE_72x64", - "CUBLASLT_MATMUL_TILE_72x576", - "CUBLASLT_MATMUL_TILE_72x512", - "CUBLASLT_MATMUL_TILE_72x448", - "CUBLASLT_MATMUL_TILE_72x384", - "CUBLASLT_MATMUL_TILE_72x320", - "CUBLASLT_MATMUL_TILE_72x256", - "CUBLASLT_MATMUL_TILE_72x192", - "CUBLASLT_MATMUL_TILE_72x128", - "CUBLASLT_MATMUL_TILE_728x64", - "CUBLASLT_MATMUL_TILE_720x64", - "CUBLASLT_MATMUL_TILE_712x64", - "CUBLASLT_MATMUL_TILE_704x88", - "CUBLASLT_MATMUL_TILE_704x80", - "CUBLASLT_MATMUL_TILE_704x8", - "CUBLASLT_MATMUL_TILE_704x72", - "CUBLASLT_MATMUL_TILE_704x64", - "CUBLASLT_MATMUL_TILE_704x56", - "CUBLASLT_MATMUL_TILE_704x48", - "CUBLASLT_MATMUL_TILE_704x40", - "CUBLASLT_MATMUL_TILE_704x32", - "CUBLASLT_MATMUL_TILE_704x24", - "CUBLASLT_MATMUL_TILE_704x16", - "CUBLASLT_MATMUL_TILE_696x64", - "CUBLASLT_MATMUL_TILE_688x64", - "CUBLASLT_MATMUL_TILE_680x64", - "CUBLASLT_MATMUL_TILE_672x64", - "CUBLASLT_MATMUL_TILE_664x64", - "CUBLASLT_MATMUL_TILE_656x64", - "CUBLASLT_MATMUL_TILE_64x96", - "CUBLASLT_MATMUL_TILE_64x88", - "CUBLASLT_MATMUL_TILE_64x80", - "CUBLASLT_MATMUL_TILE_64x8", - "CUBLASLT_MATMUL_TILE_64x768", - "CUBLASLT_MATMUL_TILE_64x760", - "CUBLASLT_MATMUL_TILE_64x752", - "CUBLASLT_MATMUL_TILE_64x744", - "CUBLASLT_MATMUL_TILE_64x736", - "CUBLASLT_MATMUL_TILE_64x728", - "CUBLASLT_MATMUL_TILE_64x720", - "CUBLASLT_MATMUL_TILE_64x72", - "CUBLASLT_MATMUL_TILE_64x712", - "CUBLASLT_MATMUL_TILE_64x704", - "CUBLASLT_MATMUL_TILE_64x696", - "CUBLASLT_MATMUL_TILE_64x688", - "CUBLASLT_MATMUL_TILE_64x680", - "CUBLASLT_MATMUL_TILE_64x672", - "CUBLASLT_MATMUL_TILE_64x664", - "CUBLASLT_MATMUL_TILE_64x656", - "CUBLASLT_MATMUL_TILE_64x648", - "CUBLASLT_MATMUL_TILE_64x640", - "CUBLASLT_MATMUL_TILE_64x64", - "CUBLASLT_MATMUL_TILE_64x632", - "CUBLASLT_MATMUL_TILE_64x624", - "CUBLASLT_MATMUL_TILE_64x616", - "CUBLASLT_MATMUL_TILE_64x608", - "CUBLASLT_MATMUL_TILE_64x600", - "CUBLASLT_MATMUL_TILE_64x592", - "CUBLASLT_MATMUL_TILE_64x584", - "CUBLASLT_MATMUL_TILE_64x576", - "CUBLASLT_MATMUL_TILE_64x568", - "CUBLASLT_MATMUL_TILE_64x560", - "CUBLASLT_MATMUL_TILE_64x56", - "CUBLASLT_MATMUL_TILE_64x552", - "CUBLASLT_MATMUL_TILE_64x544", - "CUBLASLT_MATMUL_TILE_64x536", - "CUBLASLT_MATMUL_TILE_64x528", - "CUBLASLT_MATMUL_TILE_64x520", - "CUBLASLT_MATMUL_TILE_64x512", - "CUBLASLT_MATMUL_TILE_64x504", - "CUBLASLT_MATMUL_TILE_64x496", - "CUBLASLT_MATMUL_TILE_64x488", - "CUBLASLT_MATMUL_TILE_64x480", - "CUBLASLT_MATMUL_TILE_64x48", - "CUBLASLT_MATMUL_TILE_64x472", - "CUBLASLT_MATMUL_TILE_64x464", - "CUBLASLT_MATMUL_TILE_64x456", - "CUBLASLT_MATMUL_TILE_64x448", - "CUBLASLT_MATMUL_TILE_64x440", - "CUBLASLT_MATMUL_TILE_64x432", - "CUBLASLT_MATMUL_TILE_64x424", - "CUBLASLT_MATMUL_TILE_64x416", - "CUBLASLT_MATMUL_TILE_64x408", - "CUBLASLT_MATMUL_TILE_64x400", - "CUBLASLT_MATMUL_TILE_64x40", - "CUBLASLT_MATMUL_TILE_64x392", - "CUBLASLT_MATMUL_TILE_64x384", - "CUBLASLT_MATMUL_TILE_64x376", - "CUBLASLT_MATMUL_TILE_64x368", - "CUBLASLT_MATMUL_TILE_64x360", - "CUBLASLT_MATMUL_TILE_64x352", - "CUBLASLT_MATMUL_TILE_64x344", - "CUBLASLT_MATMUL_TILE_64x336", - "CUBLASLT_MATMUL_TILE_64x328", - "CUBLASLT_MATMUL_TILE_64x320", - "CUBLASLT_MATMUL_TILE_64x32", - "CUBLASLT_MATMUL_TILE_64x312", - "CUBLASLT_MATMUL_TILE_64x304", - "CUBLASLT_MATMUL_TILE_64x296", - "CUBLASLT_MATMUL_TILE_64x288", - "CUBLASLT_MATMUL_TILE_64x280", - "CUBLASLT_MATMUL_TILE_64x272", - "CUBLASLT_MATMUL_TILE_64x264", - "CUBLASLT_MATMUL_TILE_64x256", - "CUBLASLT_MATMUL_TILE_64x248", - "CUBLASLT_MATMUL_TILE_64x240", - "CUBLASLT_MATMUL_TILE_64x24", - "CUBLASLT_MATMUL_TILE_64x232", - "CUBLASLT_MATMUL_TILE_64x224", - "CUBLASLT_MATMUL_TILE_64x216", - "CUBLASLT_MATMUL_TILE_64x208", - "CUBLASLT_MATMUL_TILE_64x200", - "CUBLASLT_MATMUL_TILE_64x192", - "CUBLASLT_MATMUL_TILE_64x184", - "CUBLASLT_MATMUL_TILE_64x176", - "CUBLASLT_MATMUL_TILE_64x168", - "CUBLASLT_MATMUL_TILE_64x160", - "CUBLASLT_MATMUL_TILE_64x16", - "CUBLASLT_MATMUL_TILE_64x152", - "CUBLASLT_MATMUL_TILE_64x144", - "CUBLASLT_MATMUL_TILE_64x136", - "CUBLASLT_MATMUL_TILE_64x128", - "CUBLASLT_MATMUL_TILE_64x120", - "CUBLASLT_MATMUL_TILE_64x112", - "CUBLASLT_MATMUL_TILE_64x104", - "CUBLASLT_MATMUL_TILE_648x64", - "CUBLASLT_MATMUL_TILE_640x96", - "CUBLASLT_MATMUL_TILE_640x88", - "CUBLASLT_MATMUL_TILE_640x80", - "CUBLASLT_MATMUL_TILE_640x8", - "CUBLASLT_MATMUL_TILE_640x72", - "CUBLASLT_MATMUL_TILE_640x64", - "CUBLASLT_MATMUL_TILE_640x56", - "CUBLASLT_MATMUL_TILE_640x48", - "CUBLASLT_MATMUL_TILE_640x40", - "CUBLASLT_MATMUL_TILE_640x32", - "CUBLASLT_MATMUL_TILE_640x24", - "CUBLASLT_MATMUL_TILE_640x16", - "CUBLASLT_MATMUL_TILE_632x64", - "CUBLASLT_MATMUL_TILE_624x64", - "CUBLASLT_MATMUL_TILE_616x64", - "CUBLASLT_MATMUL_TILE_608x64", - "CUBLASLT_MATMUL_TILE_600x64", - "CUBLASLT_MATMUL_TILE_592x64", - "CUBLASLT_MATMUL_TILE_584x64", - "CUBLASLT_MATMUL_TILE_576x96", - "CUBLASLT_MATMUL_TILE_576x88", - "CUBLASLT_MATMUL_TILE_576x80", - "CUBLASLT_MATMUL_TILE_576x8", - "CUBLASLT_MATMUL_TILE_576x72", - "CUBLASLT_MATMUL_TILE_576x64", - "CUBLASLT_MATMUL_TILE_576x56", - "CUBLASLT_MATMUL_TILE_576x48", - "CUBLASLT_MATMUL_TILE_576x40", - "CUBLASLT_MATMUL_TILE_576x32", - "CUBLASLT_MATMUL_TILE_576x24", - "CUBLASLT_MATMUL_TILE_576x16", - "CUBLASLT_MATMUL_TILE_576x112", - "CUBLASLT_MATMUL_TILE_576x104", - "CUBLASLT_MATMUL_TILE_56x768", - "CUBLASLT_MATMUL_TILE_56x704", - "CUBLASLT_MATMUL_TILE_56x640", - "CUBLASLT_MATMUL_TILE_56x576", - "CUBLASLT_MATMUL_TILE_56x512", - "CUBLASLT_MATMUL_TILE_56x448", - "CUBLASLT_MATMUL_TILE_56x384", - "CUBLASLT_MATMUL_TILE_56x320", - "CUBLASLT_MATMUL_TILE_56x256", - "CUBLASLT_MATMUL_TILE_56x192", - "CUBLASLT_MATMUL_TILE_56x128", - "CUBLASLT_MATMUL_TILE_568x64", - "CUBLASLT_MATMUL_TILE_560x64", - "CUBLASLT_MATMUL_TILE_552x64", - "CUBLASLT_MATMUL_TILE_544x64", - "CUBLASLT_MATMUL_TILE_536x64", - "CUBLASLT_MATMUL_TILE_528x64", - "CUBLASLT_MATMUL_TILE_520x64", - "CUBLASLT_MATMUL_TILE_512x96", - "CUBLASLT_MATMUL_TILE_512x88", - "CUBLASLT_MATMUL_TILE_512x80", - "CUBLASLT_MATMUL_TILE_512x8", - "CUBLASLT_MATMUL_TILE_512x72", - "CUBLASLT_MATMUL_TILE_512x64", - "CUBLASLT_MATMUL_TILE_512x56", - "CUBLASLT_MATMUL_TILE_512x48", - "CUBLASLT_MATMUL_TILE_512x40", - "CUBLASLT_MATMUL_TILE_512x32", - "CUBLASLT_MATMUL_TILE_512x24", - "CUBLASLT_MATMUL_TILE_512x16", - "CUBLASLT_MATMUL_TILE_512x128", - "CUBLASLT_MATMUL_TILE_512x120", - "CUBLASLT_MATMUL_TILE_512x112", - "CUBLASLT_MATMUL_TILE_512x104", - "CUBLASLT_MATMUL_TILE_504x64", - "CUBLASLT_MATMUL_TILE_496x64", - "CUBLASLT_MATMUL_TILE_48x768", - "CUBLASLT_MATMUL_TILE_48x704", - "CUBLASLT_MATMUL_TILE_48x640", - "CUBLASLT_MATMUL_TILE_48x64", - "CUBLASLT_MATMUL_TILE_48x576", - "CUBLASLT_MATMUL_TILE_48x512", - "CUBLASLT_MATMUL_TILE_48x448", - "CUBLASLT_MATMUL_TILE_48x384", - "CUBLASLT_MATMUL_TILE_48x320", - "CUBLASLT_MATMUL_TILE_48x256", - "CUBLASLT_MATMUL_TILE_48x192", - "CUBLASLT_MATMUL_TILE_48x128", - "CUBLASLT_MATMUL_TILE_488x64", - "CUBLASLT_MATMUL_TILE_480x64", - "CUBLASLT_MATMUL_TILE_472x64", - "CUBLASLT_MATMUL_TILE_464x64", - "CUBLASLT_MATMUL_TILE_456x64", - "CUBLASLT_MATMUL_TILE_448x96", - "CUBLASLT_MATMUL_TILE_448x88", - "CUBLASLT_MATMUL_TILE_448x80", - "CUBLASLT_MATMUL_TILE_448x8", - "CUBLASLT_MATMUL_TILE_448x72", - "CUBLASLT_MATMUL_TILE_448x64", - "CUBLASLT_MATMUL_TILE_448x56", - "CUBLASLT_MATMUL_TILE_448x48", - "CUBLASLT_MATMUL_TILE_448x40", - "CUBLASLT_MATMUL_TILE_448x32", - "CUBLASLT_MATMUL_TILE_448x24", - "CUBLASLT_MATMUL_TILE_448x16", - "CUBLASLT_MATMUL_TILE_448x144", - "CUBLASLT_MATMUL_TILE_448x136", - "CUBLASLT_MATMUL_TILE_448x128", - "CUBLASLT_MATMUL_TILE_448x120", - "CUBLASLT_MATMUL_TILE_448x112", - "CUBLASLT_MATMUL_TILE_448x104", - "CUBLASLT_MATMUL_TILE_440x64", - "CUBLASLT_MATMUL_TILE_432x64", - "CUBLASLT_MATMUL_TILE_424x64", - "CUBLASLT_MATMUL_TILE_416x64", - "CUBLASLT_MATMUL_TILE_40x768", - "CUBLASLT_MATMUL_TILE_40x704", - "CUBLASLT_MATMUL_TILE_40x640", - "CUBLASLT_MATMUL_TILE_40x64", - "CUBLASLT_MATMUL_TILE_40x576", - "CUBLASLT_MATMUL_TILE_40x512", - "CUBLASLT_MATMUL_TILE_40x448", - "CUBLASLT_MATMUL_TILE_40x384", - "CUBLASLT_MATMUL_TILE_40x320", - "CUBLASLT_MATMUL_TILE_40x256", - "CUBLASLT_MATMUL_TILE_40x192", - "CUBLASLT_MATMUL_TILE_40x128", - "CUBLASLT_MATMUL_TILE_408x64", - "CUBLASLT_MATMUL_TILE_400x64", - "CUBLASLT_MATMUL_TILE_392x64", - "CUBLASLT_MATMUL_TILE_384x96", - "CUBLASLT_MATMUL_TILE_384x88", - "CUBLASLT_MATMUL_TILE_384x80", - "CUBLASLT_MATMUL_TILE_384x8", - "CUBLASLT_MATMUL_TILE_384x72", - "CUBLASLT_MATMUL_TILE_384x64", - "CUBLASLT_MATMUL_TILE_384x56", - "CUBLASLT_MATMUL_TILE_384x48", - "CUBLASLT_MATMUL_TILE_384x40", - "CUBLASLT_MATMUL_TILE_384x32", - "CUBLASLT_MATMUL_TILE_384x24", - "CUBLASLT_MATMUL_TILE_384x168", - "CUBLASLT_MATMUL_TILE_384x160", - "CUBLASLT_MATMUL_TILE_384x16", - "CUBLASLT_MATMUL_TILE_384x152", - "CUBLASLT_MATMUL_TILE_384x144", - "CUBLASLT_MATMUL_TILE_384x136", - "CUBLASLT_MATMUL_TILE_384x128", - "CUBLASLT_MATMUL_TILE_384x120", - "CUBLASLT_MATMUL_TILE_384x112", - "CUBLASLT_MATMUL_TILE_384x104", - "CUBLASLT_MATMUL_TILE_376x64", - "CUBLASLT_MATMUL_TILE_376x128", - "CUBLASLT_MATMUL_TILE_368x64", - "CUBLASLT_MATMUL_TILE_368x128", - "CUBLASLT_MATMUL_TILE_360x64", - "CUBLASLT_MATMUL_TILE_360x128", - "CUBLASLT_MATMUL_TILE_352x64", - "CUBLASLT_MATMUL_TILE_352x128", - "CUBLASLT_MATMUL_TILE_344x64", - "CUBLASLT_MATMUL_TILE_344x128", - "CUBLASLT_MATMUL_TILE_336x64", - "CUBLASLT_MATMUL_TILE_336x128", - "CUBLASLT_MATMUL_TILE_32x8", - "CUBLASLT_MATMUL_TILE_32x768", - "CUBLASLT_MATMUL_TILE_32x704", - "CUBLASLT_MATMUL_TILE_32x640", - "CUBLASLT_MATMUL_TILE_32x64", - "CUBLASLT_MATMUL_TILE_32x576", - "CUBLASLT_MATMUL_TILE_32x512", - "CUBLASLT_MATMUL_TILE_32x448", - "CUBLASLT_MATMUL_TILE_32x384", - "CUBLASLT_MATMUL_TILE_32x320", - "CUBLASLT_MATMUL_TILE_32x32", - "CUBLASLT_MATMUL_TILE_32x256", - "CUBLASLT_MATMUL_TILE_32x192", - "CUBLASLT_MATMUL_TILE_32x16", - "CUBLASLT_MATMUL_TILE_32x128", - "CUBLASLT_MATMUL_TILE_328x64", - "CUBLASLT_MATMUL_TILE_328x128", - "CUBLASLT_MATMUL_TILE_320x96", - "CUBLASLT_MATMUL_TILE_320x88", - "CUBLASLT_MATMUL_TILE_320x80", - "CUBLASLT_MATMUL_TILE_320x8", - "CUBLASLT_MATMUL_TILE_320x72", - "CUBLASLT_MATMUL_TILE_320x64", - "CUBLASLT_MATMUL_TILE_320x56", - "CUBLASLT_MATMUL_TILE_320x48", - "CUBLASLT_MATMUL_TILE_320x40", - "CUBLASLT_MATMUL_TILE_320x32", - "CUBLASLT_MATMUL_TILE_320x24", - "CUBLASLT_MATMUL_TILE_320x200", - "CUBLASLT_MATMUL_TILE_320x192", - "CUBLASLT_MATMUL_TILE_320x184", - "CUBLASLT_MATMUL_TILE_320x176", - "CUBLASLT_MATMUL_TILE_320x168", - "CUBLASLT_MATMUL_TILE_320x160", - "CUBLASLT_MATMUL_TILE_320x16", - "CUBLASLT_MATMUL_TILE_320x152", - "CUBLASLT_MATMUL_TILE_320x144", - "CUBLASLT_MATMUL_TILE_320x136", - "CUBLASLT_MATMUL_TILE_320x128", - "CUBLASLT_MATMUL_TILE_320x120", - "CUBLASLT_MATMUL_TILE_320x112", - "CUBLASLT_MATMUL_TILE_320x104", - "CUBLASLT_MATMUL_TILE_312x64", - "CUBLASLT_MATMUL_TILE_312x128", - "CUBLASLT_MATMUL_TILE_304x64", - "CUBLASLT_MATMUL_TILE_304x128", - "CUBLASLT_MATMUL_TILE_296x64", - "CUBLASLT_MATMUL_TILE_296x128", - "CUBLASLT_MATMUL_TILE_288x64", - "CUBLASLT_MATMUL_TILE_288x128", - "CUBLASLT_MATMUL_TILE_280x64", - "CUBLASLT_MATMUL_TILE_280x128", - "CUBLASLT_MATMUL_TILE_272x64", - "CUBLASLT_MATMUL_TILE_272x128", - "CUBLASLT_MATMUL_TILE_264x64", - "CUBLASLT_MATMUL_TILE_264x128", - "CUBLASLT_MATMUL_TILE_256x96", - "CUBLASLT_MATMUL_TILE_256x88", - "CUBLASLT_MATMUL_TILE_256x80", - "CUBLASLT_MATMUL_TILE_256x8", - "CUBLASLT_MATMUL_TILE_256x72", - "CUBLASLT_MATMUL_TILE_256x64", - "CUBLASLT_MATMUL_TILE_256x56", - "CUBLASLT_MATMUL_TILE_256x48", - "CUBLASLT_MATMUL_TILE_256x40", - "CUBLASLT_MATMUL_TILE_256x32", - "CUBLASLT_MATMUL_TILE_256x256", - "CUBLASLT_MATMUL_TILE_256x248", - "CUBLASLT_MATMUL_TILE_256x240", - "CUBLASLT_MATMUL_TILE_256x24", - "CUBLASLT_MATMUL_TILE_256x232", - "CUBLASLT_MATMUL_TILE_256x224", - "CUBLASLT_MATMUL_TILE_256x216", - "CUBLASLT_MATMUL_TILE_256x208", - "CUBLASLT_MATMUL_TILE_256x200", - "CUBLASLT_MATMUL_TILE_256x192", - "CUBLASLT_MATMUL_TILE_256x184", - "CUBLASLT_MATMUL_TILE_256x176", - "CUBLASLT_MATMUL_TILE_256x168", - "CUBLASLT_MATMUL_TILE_256x160", - "CUBLASLT_MATMUL_TILE_256x16", - "CUBLASLT_MATMUL_TILE_256x152", - "CUBLASLT_MATMUL_TILE_256x144", - "CUBLASLT_MATMUL_TILE_256x136", - "CUBLASLT_MATMUL_TILE_256x128", - "CUBLASLT_MATMUL_TILE_256x120", - "CUBLASLT_MATMUL_TILE_256x112", - "CUBLASLT_MATMUL_TILE_256x104", - "CUBLASLT_MATMUL_TILE_24x768", - "CUBLASLT_MATMUL_TILE_24x704", - "CUBLASLT_MATMUL_TILE_24x640", - "CUBLASLT_MATMUL_TILE_24x64", - "CUBLASLT_MATMUL_TILE_24x576", - "CUBLASLT_MATMUL_TILE_24x512", - "CUBLASLT_MATMUL_TILE_24x448", - "CUBLASLT_MATMUL_TILE_24x384", - "CUBLASLT_MATMUL_TILE_24x320", - "CUBLASLT_MATMUL_TILE_24x256", - "CUBLASLT_MATMUL_TILE_24x192", - "CUBLASLT_MATMUL_TILE_24x128", - "CUBLASLT_MATMUL_TILE_248x64", - "CUBLASLT_MATMUL_TILE_248x192", - "CUBLASLT_MATMUL_TILE_248x128", - "CUBLASLT_MATMUL_TILE_240x64", - "CUBLASLT_MATMUL_TILE_240x192", - "CUBLASLT_MATMUL_TILE_240x128", - "CUBLASLT_MATMUL_TILE_232x64", - "CUBLASLT_MATMUL_TILE_232x192", - "CUBLASLT_MATMUL_TILE_232x128", - "CUBLASLT_MATMUL_TILE_224x64", - "CUBLASLT_MATMUL_TILE_224x192", - "CUBLASLT_MATMUL_TILE_224x128", - "CUBLASLT_MATMUL_TILE_216x64", - "CUBLASLT_MATMUL_TILE_216x192", - "CUBLASLT_MATMUL_TILE_216x128", - "CUBLASLT_MATMUL_TILE_208x64", - "CUBLASLT_MATMUL_TILE_208x192", - "CUBLASLT_MATMUL_TILE_208x128", - "CUBLASLT_MATMUL_TILE_200x64", - "CUBLASLT_MATMUL_TILE_200x192", - "CUBLASLT_MATMUL_TILE_200x128", - "CUBLASLT_MATMUL_TILE_192x96", - "CUBLASLT_MATMUL_TILE_192x88", - "CUBLASLT_MATMUL_TILE_192x80", - "CUBLASLT_MATMUL_TILE_192x8", - "CUBLASLT_MATMUL_TILE_192x72", - "CUBLASLT_MATMUL_TILE_192x64", - "CUBLASLT_MATMUL_TILE_192x56", - "CUBLASLT_MATMUL_TILE_192x48", - "CUBLASLT_MATMUL_TILE_192x40", - "CUBLASLT_MATMUL_TILE_192x336", - "CUBLASLT_MATMUL_TILE_192x328", - "CUBLASLT_MATMUL_TILE_192x320", - "CUBLASLT_MATMUL_TILE_192x32", - "CUBLASLT_MATMUL_TILE_192x312", - "CUBLASLT_MATMUL_TILE_192x304", - "CUBLASLT_MATMUL_TILE_192x296", - "CUBLASLT_MATMUL_TILE_192x288", - "CUBLASLT_MATMUL_TILE_192x280", - "CUBLASLT_MATMUL_TILE_192x272", - "CUBLASLT_MATMUL_TILE_192x264", - "CUBLASLT_MATMUL_TILE_192x256", - "CUBLASLT_MATMUL_TILE_192x248", - "CUBLASLT_MATMUL_TILE_192x240", - "CUBLASLT_MATMUL_TILE_192x24", - "CUBLASLT_MATMUL_TILE_192x232", - "CUBLASLT_MATMUL_TILE_192x224", - "CUBLASLT_MATMUL_TILE_192x216", - "CUBLASLT_MATMUL_TILE_192x208", - "CUBLASLT_MATMUL_TILE_192x200", - "CUBLASLT_MATMUL_TILE_192x192", - "CUBLASLT_MATMUL_TILE_192x184", - "CUBLASLT_MATMUL_TILE_192x176", - "CUBLASLT_MATMUL_TILE_192x168", - "CUBLASLT_MATMUL_TILE_192x160", - "CUBLASLT_MATMUL_TILE_192x16", - "CUBLASLT_MATMUL_TILE_192x152", - "CUBLASLT_MATMUL_TILE_192x144", - "CUBLASLT_MATMUL_TILE_192x136", - "CUBLASLT_MATMUL_TILE_192x128", - "CUBLASLT_MATMUL_TILE_192x120", - "CUBLASLT_MATMUL_TILE_192x112", - "CUBLASLT_MATMUL_TILE_192x104", - "CUBLASLT_MATMUL_TILE_184x64", - "CUBLASLT_MATMUL_TILE_184x256", - "CUBLASLT_MATMUL_TILE_184x192", - "CUBLASLT_MATMUL_TILE_184x128", - "CUBLASLT_MATMUL_TILE_176x64", - "CUBLASLT_MATMUL_TILE_176x256", - "CUBLASLT_MATMUL_TILE_176x192", - "CUBLASLT_MATMUL_TILE_176x128", - "CUBLASLT_MATMUL_TILE_16x8", - "CUBLASLT_MATMUL_TILE_16x768", - "CUBLASLT_MATMUL_TILE_16x704", - "CUBLASLT_MATMUL_TILE_16x640", - "CUBLASLT_MATMUL_TILE_16x64", - "CUBLASLT_MATMUL_TILE_16x576", - "CUBLASLT_MATMUL_TILE_16x512", - "CUBLASLT_MATMUL_TILE_16x448", - "CUBLASLT_MATMUL_TILE_16x384", - "CUBLASLT_MATMUL_TILE_16x320", - "CUBLASLT_MATMUL_TILE_16x32", - "CUBLASLT_MATMUL_TILE_16x256", - "CUBLASLT_MATMUL_TILE_16x192", - "CUBLASLT_MATMUL_TILE_16x16", - "CUBLASLT_MATMUL_TILE_16x128", - "CUBLASLT_MATMUL_TILE_168x64", - "CUBLASLT_MATMUL_TILE_168x256", - "CUBLASLT_MATMUL_TILE_168x192", - "CUBLASLT_MATMUL_TILE_168x128", - "CUBLASLT_MATMUL_TILE_160x64", - "CUBLASLT_MATMUL_TILE_160x256", - "CUBLASLT_MATMUL_TILE_160x192", - "CUBLASLT_MATMUL_TILE_160x128", - "CUBLASLT_MATMUL_TILE_152x64", - "CUBLASLT_MATMUL_TILE_152x320", - "CUBLASLT_MATMUL_TILE_152x256", - "CUBLASLT_MATMUL_TILE_152x192", - "CUBLASLT_MATMUL_TILE_152x128", - "CUBLASLT_MATMUL_TILE_144x64", - "CUBLASLT_MATMUL_TILE_144x320", - "CUBLASLT_MATMUL_TILE_144x256", - "CUBLASLT_MATMUL_TILE_144x192", - "CUBLASLT_MATMUL_TILE_144x128", - "CUBLASLT_MATMUL_TILE_136x64", - "CUBLASLT_MATMUL_TILE_136x320", - "CUBLASLT_MATMUL_TILE_136x256", - "CUBLASLT_MATMUL_TILE_136x192", - "CUBLASLT_MATMUL_TILE_136x128", - "CUBLASLT_MATMUL_TILE_128x96", - "CUBLASLT_MATMUL_TILE_128x88", - "CUBLASLT_MATMUL_TILE_128x80", - "CUBLASLT_MATMUL_TILE_128x8", - "CUBLASLT_MATMUL_TILE_128x72", - "CUBLASLT_MATMUL_TILE_128x64", - "CUBLASLT_MATMUL_TILE_128x56", - "CUBLASLT_MATMUL_TILE_128x512", - "CUBLASLT_MATMUL_TILE_128x504", - "CUBLASLT_MATMUL_TILE_128x496", - "CUBLASLT_MATMUL_TILE_128x488", - "CUBLASLT_MATMUL_TILE_128x480", - "CUBLASLT_MATMUL_TILE_128x48", - "CUBLASLT_MATMUL_TILE_128x472", - "CUBLASLT_MATMUL_TILE_128x464", - "CUBLASLT_MATMUL_TILE_128x456", - "CUBLASLT_MATMUL_TILE_128x448", - "CUBLASLT_MATMUL_TILE_128x440", - "CUBLASLT_MATMUL_TILE_128x432", - "CUBLASLT_MATMUL_TILE_128x424", - "CUBLASLT_MATMUL_TILE_128x416", - "CUBLASLT_MATMUL_TILE_128x408", - "CUBLASLT_MATMUL_TILE_128x400", - "CUBLASLT_MATMUL_TILE_128x40", - "CUBLASLT_MATMUL_TILE_128x392", - "CUBLASLT_MATMUL_TILE_128x384", - "CUBLASLT_MATMUL_TILE_128x376", - "CUBLASLT_MATMUL_TILE_128x368", - "CUBLASLT_MATMUL_TILE_128x360", - "CUBLASLT_MATMUL_TILE_128x352", - "CUBLASLT_MATMUL_TILE_128x344", - "CUBLASLT_MATMUL_TILE_128x336", - "CUBLASLT_MATMUL_TILE_128x328", - "CUBLASLT_MATMUL_TILE_128x320", - "CUBLASLT_MATMUL_TILE_128x32", - "CUBLASLT_MATMUL_TILE_128x312", - "CUBLASLT_MATMUL_TILE_128x304", - "CUBLASLT_MATMUL_TILE_128x296", - "CUBLASLT_MATMUL_TILE_128x288", - "CUBLASLT_MATMUL_TILE_128x280", - "CUBLASLT_MATMUL_TILE_128x272", - "CUBLASLT_MATMUL_TILE_128x264", - "CUBLASLT_MATMUL_TILE_128x256", - "CUBLASLT_MATMUL_TILE_128x248", - "CUBLASLT_MATMUL_TILE_128x240", - "CUBLASLT_MATMUL_TILE_128x24", - "CUBLASLT_MATMUL_TILE_128x232", - "CUBLASLT_MATMUL_TILE_128x224", - "CUBLASLT_MATMUL_TILE_128x216", - "CUBLASLT_MATMUL_TILE_128x208", - "CUBLASLT_MATMUL_TILE_128x200", - "CUBLASLT_MATMUL_TILE_128x192", - "CUBLASLT_MATMUL_TILE_128x184", - "CUBLASLT_MATMUL_TILE_128x176", - "CUBLASLT_MATMUL_TILE_128x168", - "CUBLASLT_MATMUL_TILE_128x160", - "CUBLASLT_MATMUL_TILE_128x16", - "CUBLASLT_MATMUL_TILE_128x152", - "CUBLASLT_MATMUL_TILE_128x144", - "CUBLASLT_MATMUL_TILE_128x136", - "CUBLASLT_MATMUL_TILE_128x128", - "CUBLASLT_MATMUL_TILE_128x120", - "CUBLASLT_MATMUL_TILE_128x112", - "CUBLASLT_MATMUL_TILE_128x104", - "CUBLASLT_MATMUL_TILE_120x64", - "CUBLASLT_MATMUL_TILE_120x384", - "CUBLASLT_MATMUL_TILE_120x320", - "CUBLASLT_MATMUL_TILE_120x256", - "CUBLASLT_MATMUL_TILE_120x192", - "CUBLASLT_MATMUL_TILE_120x128", - "CUBLASLT_MATMUL_TILE_112x64", - "CUBLASLT_MATMUL_TILE_112x384", - "CUBLASLT_MATMUL_TILE_112x320", - "CUBLASLT_MATMUL_TILE_112x256", - "CUBLASLT_MATMUL_TILE_112x192", - "CUBLASLT_MATMUL_TILE_112x128", - "CUBLASLT_MATMUL_TILE_104x64", - "CUBLASLT_MATMUL_TILE_104x448", - "CUBLASLT_MATMUL_TILE_104x384", - "CUBLASLT_MATMUL_TILE_104x320", - "CUBLASLT_MATMUL_TILE_104x256", - "CUBLASLT_MATMUL_TILE_104x192", - "CUBLASLT_MATMUL_TILE_104x128", - "CUBLASLT_MATMUL_STAGES_UNDEFINED", - "CUBLASLT_MATMUL_STAGES_END", - "CUBLASLT_MATMUL_STAGES_8xAUTO", - "CUBLASLT_MATMUL_STAGES_8x5", - "CUBLASLT_MATMUL_STAGES_8x4", - "CUBLASLT_MATMUL_STAGES_8x3", - "CUBLASLT_MATMUL_STAGES_64xAUTO", - "CUBLASLT_MATMUL_STAGES_64x6", - "CUBLASLT_MATMUL_STAGES_64x5", - "CUBLASLT_MATMUL_STAGES_64x4", - "CUBLASLT_MATMUL_STAGES_64x3", - "CUBLASLT_MATMUL_STAGES_64x2", - "CUBLASLT_MATMUL_STAGES_64x1", - "CUBLASLT_MATMUL_STAGES_32xAUTO", - "CUBLASLT_MATMUL_STAGES_32x6", - "CUBLASLT_MATMUL_STAGES_32x5", - "CUBLASLT_MATMUL_STAGES_32x4", - "CUBLASLT_MATMUL_STAGES_32x3", - "CUBLASLT_MATMUL_STAGES_32x2", - "CUBLASLT_MATMUL_STAGES_32x10", - "CUBLASLT_MATMUL_STAGES_32x1", - "CUBLASLT_MATMUL_STAGES_16xAUTO", - "CUBLASLT_MATMUL_STAGES_16x6", - "CUBLASLT_MATMUL_STAGES_16x5", - "CUBLASLT_MATMUL_STAGES_16x4", - "CUBLASLT_MATMUL_STAGES_16x3", - "CUBLASLT_MATMUL_STAGES_16x2", - "CUBLASLT_MATMUL_STAGES_16x10", - "CUBLASLT_MATMUL_STAGES_16x1", - "CUBLASLT_MATMUL_STAGES_128xAUTO", - "CUBLASLT_MATMUL_STAGES_128x6", - "CUBLASLT_MATMUL_STAGES_128x5", - "CUBLASLT_MATMUL_STAGES_128x4", - "CUBLASLT_MATMUL_STAGES_128x3", - "CUBLASLT_MATMUL_STAGES_128x2", - "CUBLASLT_MATMUL_STAGES_128x1", - "CUBLASLT_MATMUL_PREF_REDUCTION_SCHEME_MASK", - "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_D_BYTES", - "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_C_BYTES", - "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_B_BYTES", - "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_A_BYTES", - "CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT", - "CUBLASLT_MATMUL_PREF_IMPL_MASK", - "CUBLASLT_MATMUL_INNER_SHAPE_UNDEFINED", - "CUBLASLT_MATMUL_INNER_SHAPE_MMA884", - "CUBLASLT_MATMUL_INNER_SHAPE_MMA1688", - "CUBLASLT_MATMUL_INNER_SHAPE_MMA1684", - "CUBLASLT_MATMUL_INNER_SHAPE_MMA16816", - "CUBLASLT_MATMUL_INNER_SHAPE_END", - "CUBLASLT_MATMUL_DESC_TRANSC", - "CUBLASLT_MATMUL_DESC_SM_COUNT_TARGET", - "CUBLASLT_MATMUL_DESC_SCALE_TYPE", - "CUBLASLT_MATMUL_DESC_FILL_MODE", - "CUBLASLT_MATMUL_DESC_FAST_ACCUM", - "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_DATA_TYPE", - "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_AMAX_POINTER", - "CUBLASLT_MATMUL_DESC_COMPUTE_TYPE", - "CUBLASLT_MATMUL_DESC_BIAS_BATCH_STRIDE", - "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_OUT_COUNTERS_POINTER", - "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_NUM_CHUNKS_D_ROWS", - "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_NUM_CHUNKS_D_COLS", - "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_IN_COUNTERS_POINTER", - "CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE", - "CUBLASLT_EPILOGUE_RELU_AUX_BIAS", - "CUBLASLT_EPILOGUE_RELU_AUX", - "CUBLASLT_EPILOGUE_DRELU_BGRAD", - "CUBLASLT_EPILOGUE_DRELU", - "CUBLASLT_CLUSTER_SHAPE_END", - "CUBLASLT_CLUSTER_SHAPE_AUTO", - "CUBLASLT_CLUSTER_SHAPE_9x1x1", - "CUBLASLT_CLUSTER_SHAPE_8x2x1", - "CUBLASLT_CLUSTER_SHAPE_8x1x1", - "CUBLASLT_CLUSTER_SHAPE_7x2x1", - "CUBLASLT_CLUSTER_SHAPE_7x1x1", - "CUBLASLT_CLUSTER_SHAPE_6x2x1", - "CUBLASLT_CLUSTER_SHAPE_6x1x1", - "CUBLASLT_CLUSTER_SHAPE_5x3x1", - "CUBLASLT_CLUSTER_SHAPE_5x2x1", - "CUBLASLT_CLUSTER_SHAPE_5x1x1", - "CUBLASLT_CLUSTER_SHAPE_4x4x1", - "CUBLASLT_CLUSTER_SHAPE_4x3x1", - "CUBLASLT_CLUSTER_SHAPE_4x2x1", - "CUBLASLT_CLUSTER_SHAPE_4x1x1", - "CUBLASLT_CLUSTER_SHAPE_3x5x1", - "CUBLASLT_CLUSTER_SHAPE_3x4x1", - "CUBLASLT_CLUSTER_SHAPE_3x3x1", - "CUBLASLT_CLUSTER_SHAPE_3x2x1", - "CUBLASLT_CLUSTER_SHAPE_3x1x1", - "CUBLASLT_CLUSTER_SHAPE_2x8x1", - "CUBLASLT_CLUSTER_SHAPE_2x7x1", - "CUBLASLT_CLUSTER_SHAPE_2x6x1", - "CUBLASLT_CLUSTER_SHAPE_2x5x1", - "CUBLASLT_CLUSTER_SHAPE_2x4x1", - "CUBLASLT_CLUSTER_SHAPE_2x3x1", - "CUBLASLT_CLUSTER_SHAPE_2x2x1", - "CUBLASLT_CLUSTER_SHAPE_2x1x1", - "CUBLASLT_CLUSTER_SHAPE_1x9x1", - "CUBLASLT_CLUSTER_SHAPE_1x8x1", - "CUBLASLT_CLUSTER_SHAPE_1x7x1", - "CUBLASLT_CLUSTER_SHAPE_1x6x1", - "CUBLASLT_CLUSTER_SHAPE_1x5x1", - "CUBLASLT_CLUSTER_SHAPE_1x4x1", - "CUBLASLT_CLUSTER_SHAPE_1x3x1", - "CUBLASLT_CLUSTER_SHAPE_1x2x1", - "CUBLASLT_CLUSTER_SHAPE_1x1x1", - "CUBLASLT_CLUSTER_SHAPE_1x16x1", - "CUBLASLT_CLUSTER_SHAPE_1x15x1", - "CUBLASLT_CLUSTER_SHAPE_1x14x1", - "CUBLASLT_CLUSTER_SHAPE_1x13x1", - "CUBLASLT_CLUSTER_SHAPE_1x12x1", - "CUBLASLT_CLUSTER_SHAPE_1x11x1", - "CUBLASLT_CLUSTER_SHAPE_1x10x1", - "CUBLASLT_CLUSTER_SHAPE_16x1x1", - "CUBLASLT_CLUSTER_SHAPE_15x1x1", - "CUBLASLT_CLUSTER_SHAPE_14x1x1", - "CUBLASLT_CLUSTER_SHAPE_13x1x1", - "CUBLASLT_CLUSTER_SHAPE_12x1x1", - "CUBLASLT_CLUSTER_SHAPE_11x1x1", - "CUBLASLT_CLUSTER_SHAPE_10x1x1", - "CUBLASLT_ALGO_CONFIG_TILE_ID", - "CUBLASLT_ALGO_CONFIG_STAGES_ID", - "CUBLASLT_ALGO_CONFIG_SPLITK_NUM", - "CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME", - "CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID", - "CUBLASLT_ALGO_CONFIG_ID", - "CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION", - "CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING", - "CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID", - "CUBLASLT_ALGO_CAP_UPLO_SUPPORT", - "CUBLASLT_ALGO_CAP_TILE_IDS", - "CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT", - "CUBLASLT_ALGO_CAP_STAGES_IDS", - "CUBLASLT_ALGO_CAP_SPLITK_SUPPORT", - "CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK", - "CUBLASLT_ALGO_CAP_POINTER_MODE_MASK", - "CUBLASLT_ALGO_CAP_OUT_OF_PLACE_RESULT_SUPPORT", - "CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS", - "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES", - "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES", - "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_B_BYTES", - "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_A_BYTES", - "CUBLASLT_ALGO_CAP_LD_NEGATIVE", - "CUBLASLT_ALGO_CAP_EPILOGUE_MASK", - "CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX", - "CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER", - "CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT", - "CUBLASLT_ALGO_CAP_ATOMIC_SYNC" - ) + while (my($func) = each %HipOnlyUnsupportedFunctions) { my $mt = m/($func)/g; if ($mt) { @@ -11331,1572 +11337,1574 @@ sub warnHipOnlyUnsupportedFunctions { return $k; } +@RocOnlyUnsupportedFunctions = ( + "cutensorWriteKernelCacheToFile", + "cutensorTensorDescriptor", + "cutensorReduce", + "cutensorReadKernelCacheFromFile", + "cutensorPlanPreferenceSetAttribute", + "cutensorPlanPreferenceAttribute_t", + "cutensorPlanGetAttribute", + "cutensorPlanAttribute_t", + "cutensorPlan", + "cutensorPermute", + "cutensorOperationDescriptorSetAttribute", + "cutensorOperationDescriptorGetAttribute", + "cutensorOperationDescriptorAttribute_t", + "cutensorMgTensorDescriptor_t", + "cutensorMgTensorDescriptor_s", + "cutensorMgHostDevice_t", + "cutensorMgHandle_t", + "cutensorMgHandle_s", + "cutensorMgDestroyTensorDescriptor", + "cutensorMgDestroyCopyPlan", + "cutensorMgDestroyCopyDescriptor", + "cutensorMgDestroyContractionPlan", + "cutensorMgDestroyContractionFind", + "cutensorMgDestroyContractionDescriptor", + "cutensorMgDestroy", + "cutensorMgCreateTensorDescriptor", + "cutensorMgCreateCopyPlan", + "cutensorMgCreateCopyDescriptor", + "cutensorMgCreateContractionPlan", + "cutensorMgCreateContractionFind", + "cutensorMgCreateContractionDescriptor", + "cutensorMgCreate", + "cutensorMgCopyPlan_t", + "cutensorMgCopyPlan_s", + "cutensorMgCopyGetWorkspace", + "cutensorMgCopyDescriptor_t", + "cutensorMgCopyDescriptor_s", + "cutensorMgCopy", + "cutensorMgContractionPlan_t", + "cutensorMgContractionPlan_s", + "cutensorMgContractionGetWorkspace", + "cutensorMgContractionFind_t", + "cutensorMgContractionFind_s", + "cutensorMgContractionFindSetAttribute", + "cutensorMgContractionFindAttribute_t", + "cutensorMgContractionDescriptor_t", + "cutensorMgContractionDescriptor_s", + "cutensorMgContraction", + "cutensorMgAlgo_t", + "cutensorJitMode_t", + "cutensorHandleWritePlanCacheToFile", + "cutensorHandleResizePlanCache", + "cutensorHandleReadPlanCacheFromFile", + "cutensorHandle", + "cutensorGetVersion", + "cutensorEstimateWorkspaceSize", + "cutensorElementwiseTrinaryExecute", + "cutensorElementwiseBinaryExecute", + "cutensorDestroyTensorDescriptor", + "cutensorDestroyPlanPreference", + "cutensorDestroyPlan", + "cutensorDestroyOperationDescriptor", + "cutensorCreateTensorDescriptor", + "cutensorCreateReduction", + "cutensorCreatePlanPreference", + "cutensorCreatePlan", + "cutensorCreatePermutation", + "cutensorCreateElementwiseTrinary", + "cutensorCreateElementwiseBinary", + "cutensorCreateContraction", + "cutensorCacheMode_t", + "cutensorAutotuneMode_t", + "cusparseZhybsv_solve", + "cusparseZhybsv_analysis", + "cusparseZhyb2dense", + "cusparseZhyb2csr", + "cusparseZhyb2csc", + "cusparseZgtsv_nopivot", + "cusparseZgtsvStridedBatch", + "cusparseZgtsv", + "cusparseZgemmi", + "cusparseZgebsr2gebsr_bufferSizeExt", + "cusparseZgebsr2gebsc_bufferSizeExt", + "cusparseZdense2hyb", + "cusparseZcsru2csr_bufferSizeExt", + "cusparseZcsru2csr", + "cusparseZcsrsv_solve", + "cusparseZcsrsv_analysis", + "cusparseZcsrsm_solve", + "cusparseZcsrsm_analysis", + "cusparseZcsrmv_mp", + "cusparseZcsrilu0", + "cusparseZcsric0", + "cusparseZcsrgemm", + "cusparseZcsrgeam2_bufferSizeExt", + "cusparseZcsr2gebsr_bufferSizeExt", + "cusparseZcsr2csru", + "cusparseZcsr2csc", + "cusparseZcsc2hyb", + "cusparseZbsrsm2_bufferSizeExt", + "cusparseZbsrilu02_bufferSizeExt", + "cusparseZbsric02_bufferSizeExt", + "cusparseXgebsr2csr", + "cusparseXcsrgemmNnz", + "cusparseSpSV_updateMatrix", + "cusparseSpSV_solve", + "cusparseSpSV_destroyDescr", + "cusparseSpSV_createDescr", + "cusparseSpSV_analysis", + "cusparseSpSVUpdate_t", + "cusparseSpSVDescr_t", + "cusparseSpSVDescr", + "cusparseSpSM_updateMatrix", + "cusparseSpSM_destroyDescr", + "cusparseSpSM_createDescr", + "cusparseSpSM_bufferSize", + "cusparseSpSMUpdate_t", + "cusparseSpSMDescr_t", + "cusparseSpSMDescr", + "cusparseSpMatSetNumBatches", + "cusparseSpMatGetNumBatches", + "cusparseSpMV_preprocess", + "cusparseSpMMOp_destroyPlan", + "cusparseSpMMOp_createPlan", + "cusparseSpMMOpPlan_t", + "cusparseSpMMOpPlan", + "cusparseSpMMOpAlg_t", + "cusparseSpMMOp", + "cusparseSpGEMMreuse_workEstimation", + "cusparseSpGEMMreuse_nnz", + "cusparseSpGEMMreuse_copy", + "cusparseSpGEMMreuse_compute", + "cusparseSpGEMM_workEstimation", + "cusparseSpGEMM_getNumProducts", + "cusparseSpGEMM_estimateMemory", + "cusparseSpGEMM_destroyDescr", + "cusparseSpGEMM_createDescr", + "cusparseSpGEMM_copy", + "cusparseSpGEMM_compute", + "cusparseSpGEMMDescr_t", + "cusparseSpGEMMDescr", + "cusparseSolveAnalysisInfo_t", + "cusparseSolveAnalysisInfo", + "cusparseSideMode_t", + "cusparseShybsv_solve", + "cusparseShybsv_analysis", + "cusparseShyb2dense", + "cusparseShyb2csr", + "cusparseShyb2csc", + "cusparseSgtsv_nopivot", + "cusparseSgtsvStridedBatch", + "cusparseSgtsv", + "cusparseSgemmi", + "cusparseSgebsr2gebsr_bufferSizeExt", + "cusparseSgebsr2gebsc_bufferSizeExt", + "cusparseSdense2hyb", + "cusparseScsru2csr_bufferSizeExt", + "cusparseScsru2csr", + "cusparseScsrsv_solve", + "cusparseScsrsv_analysis", + "cusparseScsrsm_solve", + "cusparseScsrsm_analysis", + "cusparseScsrmv_mp", + "cusparseScsrilu0", + "cusparseScsric0", + "cusparseScsrgemm", + "cusparseScsrgeam2_bufferSizeExt", + "cusparseScsr2gebsr_bufferSizeExt", + "cusparseScsr2csru", + "cusparseScsr2csc", + "cusparseScsc2hyb", + "cusparseSbsrsm2_bufferSizeExt", + "cusparseSbsrilu02_bufferSizeExt", + "cusparseSbsric02_bufferSizeExt", + "cusparseLoggerSetMask", + "cusparseLoggerSetLevel", + "cusparseLoggerSetFile", + "cusparseLoggerSetCallback", + "cusparseLoggerOpenFile", + "cusparseLoggerForceDisable", + "cusparseLoggerCallback_t", + "cusparseHpruneDense2csr_bufferSizeExt", + "cusparseHpruneDense2csrNnzByPercentage", + "cusparseHpruneDense2csrNnz", + "cusparseHpruneDense2csrByPercentage_bufferSizeExt", + "cusparseHpruneDense2csrByPercentage", + "cusparseHpruneDense2csr", + "cusparseHpruneCsr2csr_bufferSizeExt", + "cusparseHpruneCsr2csrNnzByPercentage", + "cusparseHpruneCsr2csrNnz", + "cusparseHpruneCsr2csrByPercentage_bufferSizeExt", + "cusparseHpruneCsr2csrByPercentage", + "cusparseHpruneCsr2csr", + "cusparseGetLevelInfo", + "cusparseDhybsv_solve", + "cusparseDhybsv_analysis", + "cusparseDhyb2dense", + "cusparseDhyb2csr", + "cusparseDhyb2csc", + "cusparseDgtsv_nopivot", + "cusparseDgtsvStridedBatch", + "cusparseDgtsv", + "cusparseDgemmi", + "cusparseDgebsr2gebsr_bufferSizeExt", + "cusparseDgebsr2gebsc_bufferSizeExt", + "cusparseDestroySolveAnalysisInfo", + "cusparseDestroyCsru2csrInfo", + "cusparseDenseToSparse_convert", + "cusparseDdense2hyb", + "cusparseDcsru2csr_bufferSizeExt", + "cusparseDcsru2csr", + "cusparseDcsrsv_solve", + "cusparseDcsrsv_analysis", + "cusparseDcsrsm_solve", + "cusparseDcsrsm_analysis", + "cusparseDcsrmv_mp", + "cusparseDcsrilu0", + "cusparseDcsric0", + "cusparseDcsrgemm", + "cusparseDcsrgeam2_bufferSizeExt", + "cusparseDcsr2gebsr_bufferSizeExt", + "cusparseDcsr2csru", + "cusparseDcsr2csc", + "cusparseDcsc2hyb", + "cusparseDbsrsm2_bufferSizeExt", + "cusparseDbsrilu02_bufferSizeExt", + "cusparseDbsric02_bufferSizeExt", + "cusparseCsrsv_solveEx", + "cusparseCsrsv_analysisEx", + "cusparseCsrmvEx_bufferSize", + "cusparseCsrmvEx", + "cusparseCsrilu0Ex", + "cusparseCsr2cscEx2", + "cusparseCsr2cscEx", + "cusparseCsr2CscAlg_t", + "cusparseCreateSolveAnalysisInfo", + "cusparseCreateSlicedEll", + "cusparseCreateCsru2csrInfo", + "cusparseCreateConstSlicedEll", + "cusparseCreateConstBsr", + "cusparseCreateBsr", + "cusparseConstrainedGeMM_bufferSize", + "cusparseConstrainedGeMM", + "cusparseColorAlg_t", + "cusparseChybsv_solve", + "cusparseChybsv_analysis", + "cusparseChyb2dense", + "cusparseChyb2csr", + "cusparseChyb2csc", + "cusparseCgtsv_nopivot", + "cusparseCgtsvStridedBatch", + "cusparseCgtsv", + "cusparseCgemmi", + "cusparseCgebsr2gebsr_bufferSizeExt", + "cusparseCgebsr2gebsc_bufferSizeExt", + "cusparseCdense2hyb", + "cusparseCcsru2csr_bufferSizeExt", + "cusparseCcsru2csr", + "cusparseCcsrsv_solve", + "cusparseCcsrsv_analysis", + "cusparseCcsrsm_solve", + "cusparseCcsrsm_analysis", + "cusparseCcsrmv_mp", + "cusparseCcsrilu0", + "cusparseCcsric0", + "cusparseCcsrgemm", + "cusparseCcsrgeam2_bufferSizeExt", + "cusparseCcsr2gebsr_bufferSizeExt", + "cusparseCcsr2csru", + "cusparseCcsr2csc", + "cusparseCcsc2hyb", + "cusparseCbsrsm2_bufferSizeExt", + "cusparseCbsrilu02_bufferSizeExt", + "cusparseCbsric02_bufferSizeExt", + "cusparseBsrSetStridedBatch", + "cusparseAlgMode_t", + "curand_mtgp32_specific", + "curand_mtgp32_single_specific", + "curand_mtgp32_single", + "curand_Philox4x32_10", + "curandState_t", + "curandStateXORWOW_t", + "curandStateXORWOW", + "curandState", + "curandMethod_t", + "curandMethod", + "curandHistogramM2_t", + "curandHistogramM2_st", + "curandHistogramM2V_t", + "curandHistogramM2V_st", + "curandHistogramM2K_t", + "curandHistogramM2K_st", + "curandGetProperty", + "curandDistribution_t", + "curandDistribution_st", + "curandDistributionShift_t", + "curandDistributionShift_st", + "curandDistributionM2Shift_t", + "curandDistributionM2Shift_st", + "curandDirectionVectors64_t", + "curandDirectionVectors32_t", + "cublasZtrttp", + "cublasZtpttr", + "cublasZmatinvBatched", + "cublasZgetrsBatched", + "cublasZgetriBatched", + "cublasZgetrfBatched", + "cublasZgeqrfBatched", + "cublasZgemm3m_64", + "cublasZgemm3m", + "cublasZgelsBatched", + "cublasXerbla", + "cublasUint8gemmBias", + "cublasSwapEx_64", + "cublasSwapEx", + "cublasStrttp", + "cublasStpttr", + "cublasSmatinvBatched", + "cublasShutdown", + "cublasSgetrsBatched", + "cublasSgetriBatched", + "cublasSgetrfBatched", + "cublasSgeqrfBatched", + "cublasSgemmGroupedBatched_64", + "cublasSgemmGroupedBatched", + "cublasSgemmEx_64", + "cublasSgemmEx", + "cublasSgelsBatched", + "cublasSetVector_64", + "cublasSetVectorAsync_64", + "cublasSetSmCountTarget", + "cublasSetMatrix_64", + "cublasSetMatrixAsync_64", + "cublasSetLoggerCallback", + "cublasSetKernelStream", + "cublasRotmgEx", + "cublasRotmEx_64", + "cublasRotmEx", + "cublasRotgEx", + "cublasMigrateComputeType", + "cublasLtReductionScheme_t", + "cublasLtPointerMode_t", + "cublasLtPointerModeMask_t", + "cublasLtOrder_t", + "cublasLtNumericalImplFlags_t", + "cublasLtMatrixTransformDescSetAttribute", + "cublasLtMatrixTransformDescInit", + "cublasLtMatrixTransformDescGetAttribute", + "cublasLtMatrixTransformDescDestroy", + "cublasLtMatrixTransformDescCreate", + "cublasLtMatrixTransformDescAttributes_t", + "cublasLtMatrixTransform", + "cublasLtMatrixLayoutSetAttribute", + "cublasLtMatrixLayoutInit", + "cublasLtMatrixLayoutGetAttribute", + "cublasLtMatrixLayoutDestroy", + "cublasLtMatrixLayoutCreate", + "cublasLtMatrixLayoutAttribute_t", + "cublasLtMatmulTile_t", + "cublasLtMatmulStages_t", + "cublasLtMatmulSearch_t", + "cublasLtMatmulPreferenceSetAttribute", + "cublasLtMatmulPreferenceInit", + "cublasLtMatmulPreferenceGetAttribute", + "cublasLtMatmulPreferenceDestroy", + "cublasLtMatmulPreferenceCreate", + "cublasLtMatmulPreferenceAttributes_t", + "cublasLtMatmulInnerShape_t", + "cublasLtMatmulHeuristicResult_t", + "cublasLtMatmulDescSetAttribute", + "cublasLtMatmulDescInit", + "cublasLtMatmulDescGetAttribute", + "cublasLtMatmulDescDestroy", + "cublasLtMatmulDescCreate", + "cublasLtMatmulDescAttributes_t", + "cublasLtMatmulAlgoInit", + "cublasLtMatmulAlgoGetIds", + "cublasLtMatmulAlgoGetHeuristic", + "cublasLtMatmulAlgoConfigSetAttribute", + "cublasLtMatmulAlgoConfigGetAttribute", + "cublasLtMatmulAlgoConfigAttributes_t", + "cublasLtMatmulAlgoCheck", + "cublasLtMatmulAlgoCapGetAttribute", + "cublasLtMatmulAlgoCapAttributes_t", + "cublasLtMatmul", + "cublasLtLoggerSetMask", + "cublasLtLoggerSetLevel", + "cublasLtLoggerSetFile", + "cublasLtLoggerSetCallback", + "cublasLtLoggerOpenFile", + "cublasLtLoggerForceDisable", + "cublasLtLoggerCallback_t", + "cublasLtHeuristicsCacheSetCapacity", + "cublasLtHeuristicsCacheGetCapacity", + "cublasLtGetVersion", + "cublasLtGetStatusString", + "cublasLtGetStatusName", + "cublasLtGetProperty", + "cublasLtGetCudartVersion", + "cublasLtEpilogue_t", + "cublasLtDisableCpuInstructionsSetMask", + "cublasLtContext", + "cublasLtClusterShape_t", + "cublasLoggerConfigure", + "cublasLogCallback", + "cublasIaminEx_64", + "cublasIaminEx", + "cublasIamaxEx_64", + "cublasIamaxEx", + "cublasGetVersion_v2", + "cublasGetVersion", + "cublasGetVector_64", + "cublasGetVectorAsync_64", + "cublasGetStatusName", + "cublasGetSmCountTarget", + "cublasGetProperty", + "cublasGetMatrix_64", + "cublasGetMatrixAsync_64", + "cublasGetLoggerCallback", + "cublasGetError", + "cublasGetCudartVersion", + "cublasGemmStridedBatchedEx_64", + "cublasGemmGroupedBatchedEx_64", + "cublasGemmGroupedBatchedEx", + "cublasGemmEx_64", + "cublasGemmBatchedEx_64", + "cublasFree", + "cublasDtrttp", + "cublasDtpttr", + "cublasDmatinvBatched", + "cublasDgetrsBatched", + "cublasDgetriBatched", + "cublasDgetrfBatched", + "cublasDgeqrfBatched", + "cublasDgemmGroupedBatched_64", + "cublasDgemmGroupedBatched", + "cublasDgelsBatched", + "cublasCtrttp", + "cublasCtpttr", + "cublasCsyrkEx_64", + "cublasCsyrkEx", + "cublasCsyrk3mEx_64", + "cublasCsyrk3mEx", + "cublasCopyEx_64", + "cublasCopyEx", + "cublasCmatinvBatched", + "cublasCherkEx_64", + "cublasCherkEx", + "cublasCherk3mEx_64", + "cublasCherk3mEx", + "cublasCgetrsBatched", + "cublasCgetriBatched", + "cublasCgetrfBatched", + "cublasCgeqrfBatched", + "cublasCgemmEx_64", + "cublasCgemmEx", + "cublasCgemm3m_64", + "cublasCgemm3mStridedBatched_64", + "cublasCgemm3mStridedBatched", + "cublasCgemm3mEx_64", + "cublasCgemm3mEx", + "cublasCgemm3mBatched_64", + "cublasCgemm3mBatched", + "cublasCgemm3m", + "cublasCgelsBatched", + "cublasAsumEx_64", + "cublasAsumEx", + "cublasAlloc", + "csru2csrInfo_t", + "csru2csrInfo", + "__curand_umul", + "CUTENSOR_WORKSPACE_DEFAULT", + "CUTENSOR_STATUS_MAPPING_ERROR", + "CUTENSOR_STATUS_LICENSE_ERROR", + "CUTENSOR_STATUS_CUDA_ERROR", + "CUTENSOR_STATUS_CUBLAS_ERROR", + "CUTENSOR_R_MIN_TF32", + "CUTENSOR_R_MIN_8U", + "CUTENSOR_R_MIN_8I", + "CUTENSOR_R_MIN_64F", + "CUTENSOR_R_MIN_32U", + "CUTENSOR_R_MIN_32I", + "CUTENSOR_R_MIN_32F", + "CUTENSOR_R_MIN_16F", + "CUTENSOR_R_MIN_16BF", + "CUTENSOR_R_64U", + "CUTENSOR_R_64I", + "CUTENSOR_R_4U", + "CUTENSOR_R_4I", + "CUTENSOR_R_16U", + "CUTENSOR_R_16I", + "CUTENSOR_PLAN_REQUIRED_WORKSPACE", + "CUTENSOR_PLAN_PREFERENCE_KERNEL_RANK", + "CUTENSOR_PLAN_PREFERENCE_JIT", + "CUTENSOR_PLAN_PREFERENCE_INCREMENTAL_COUNT", + "CUTENSOR_PLAN_PREFERENCE_CACHE_MODE", + "CUTENSOR_PLAN_PREFERENCE_AUTOTUNE_MODE", + "CUTENSOR_PLAN_PREFERENCE_ALGO", + "CUTENSOR_OP_TANH", + "CUTENSOR_OP_TAN", + "CUTENSOR_OP_SWISH", + "CUTENSOR_OP_SOFT_SIGN", + "CUTENSOR_OP_SOFT_PLUS", + "CUTENSOR_OP_SINH", + "CUTENSOR_OP_SIN", + "CUTENSOR_OP_SIGMOID", + "CUTENSOR_OP_RELU", + "CUTENSOR_OP_RCP", + "CUTENSOR_OP_NEG", + "CUTENSOR_OP_MISH", + "CUTENSOR_OP_LOG", + "CUTENSOR_OP_FLOOR", + "CUTENSOR_OP_EXP", + "CUTENSOR_OP_COSH", + "CUTENSOR_OP_COS", + "CUTENSOR_OP_CONJ", + "CUTENSOR_OP_CEIL", + "CUTENSOR_OP_ATANH", + "CUTENSOR_OP_ATAN", + "CUTENSOR_OP_ASINH", + "CUTENSOR_OP_ASIN", + "CUTENSOR_OP_ACOSH", + "CUTENSOR_OP_ACOS", + "CUTENSOR_OP_ABS", + "CUTENSOR_OPERATION_DESCRIPTOR_TAG", + "CUTENSOR_OPERATION_DESCRIPTOR_SCALAR_TYPE", + "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_VALUE", + "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_RIGHT", + "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_LEFT", + "CUTENSOR_OPERATION_DESCRIPTOR_MOVED_BYTES", + "CUTENSOR_OPERATION_DESCRIPTOR_FLOPS", + "CUTENSOR_MG_DEVICE_HOST_PINNED", + "CUTENSOR_MG_DEVICE_HOST", + "CUTENSOR_JIT_MODE_NONE", + "CUTENSOR_JIT_MODE_DEFAULT", + "CUTENSOR_C_MIN_TF32", + "CUTENSOR_C_MIN_64F", + "CUTENSOR_C_MIN_32F", + "CUTENSOR_C_MIN_16F", + "CUTENSOR_C_8U", + "CUTENSOR_C_8I", + "CUTENSOR_C_64U", + "CUTENSOR_C_64I", + "CUTENSOR_C_4U", + "CUTENSOR_C_4I", + "CUTENSOR_C_32U", + "CUTENSOR_C_32I", + "CUTENSOR_C_16U", + "CUTENSOR_C_16I", + "CUTENSOR_C_16F", + "CUTENSOR_C_16BF", + "CUTENSOR_COMPUTE_TF32", + "CUTENSOR_COMPUTE_3XTF32", + "CUTENSOR_CACHE_MODE_PEDANTIC", + "CUTENSOR_CACHE_MODE_NONE", + "CUTENSOR_AUTOTUNE_MODE_NONE", + "CUTENSOR_AUTOTUNE_MODE_INCREMENTAL", + "CUTENSOR_ALGO_TTGT", + "CUTENSOR_ALGO_TGETT", + "CUTENSOR_ALGO_GETT", + "CUTENSORMG_CONTRACTION_FIND_ATTRIBUTE_MAX", + "CUTENSORMG_ALGO_DEFAULT", + "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", + "CUSPARSE_STATUS_MAPPING_ERROR", + "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES", + "CUSPARSE_STATUS_EXECUTION_FAILED", + "CUSPARSE_SPSV_UPDATE_GENERAL", + "CUSPARSE_SPSV_UPDATE_DIAGONAL", + "CUSPARSE_SPSM_UPDATE_GENERAL", + "CUSPARSE_SPSM_UPDATE_DIAGONAL", + "CUSPARSE_SPMM_OP_ALG_DEFAULT", + "CUSPARSE_SPMM_COO_ALG4", + "CUSPARSE_SPMM_BSR_ALG1", + "CUSPARSE_SPMMA_PREPROCESS", + "CUSPARSE_SPMMA_ALG4", + "CUSPARSE_SPMMA_ALG3", + "CUSPARSE_SPMMA_ALG2", + "CUSPARSE_SPMMA_ALG1", + "CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC", + "CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC", + "CUSPARSE_SPGEMM_ALG3", + "CUSPARSE_SPGEMM_ALG2", + "CUSPARSE_SPGEMM_ALG1", + "CUSPARSE_SIDE_RIGHT", + "CUSPARSE_SIDE_LEFT", + "CUSPARSE_MV_ALG_DEFAULT", + "CUSPARSE_MM_ALG_DEFAULT", + "CUSPARSE_CSRMV_ALG2", + "CUSPARSE_CSRMV_ALG1", + "CUSPARSE_CSRMM_ALG1", + "CUSPARSE_CSR2CSC_ALG_DEFAULT", + "CUSPARSE_CSR2CSC_ALG2", + "CUSPARSE_CSR2CSC_ALG1", + "CUSPARSE_COOMV_ALG", + "CUSPARSE_COOMM_ALG3", + "CUSPARSE_COOMM_ALG2", + "CUSPARSE_COOMM_ALG1", + "CUSPARSE_COLOR_ALG1", + "CUSPARSE_COLOR_ALG0", + "CUSPARSE_ALG_NAIVE", + "CUSPARSE_ALG_MERGE_PATH", + "CUSPARSE_ALG1", + "CUSPARSE_ALG0", + "CURAND_STATUS_PREEXISTING_FAILURE", + "CURAND_STATUS_INITIALIZATION_FAILED", + "CURAND_STATUS_ARCH_MISMATCH", + "CURAND_RNG_TEST", + "CURAND_REJECTION", + "CURAND_POISSON", + "CURAND_M2", + "CURAND_M1", + "CURAND_KNUTH", + "CURAND_ITR", + "CURAND_HITR", + "CURAND_FAST_REJECTION", + "CURAND_DISCRETE_GAUSS", + "CURAND_DEVICE_API", + "CURAND_DEFINITION", + "CURAND_CHOOSE_BEST", + "CURAND_BINARY_SEARCH", + "CURAND_3RD", + "CUDA_R_8F_E5M2", + "CUDA_R_8F_E4M3", + "CUDA_R_64U", + "CUDA_R_64I", + "CUDA_R_4U", + "CUDA_R_4I", + "CUDA_R_16U", + "CUDA_R_16I", + "CUDA_C_64U", + "CUDA_C_64I", + "CUDA_C_4U", + "CUDA_C_4I", + "CUDA_C_16U", + "CUDA_C_16I", + "CUBLAS_TF32_TENSOR_OP_MATH", + "CUBLAS_TENSOR_OP_MATH", + "CUBLAS_STATUS_LICENSE_ERROR", + "CUBLAS_PEDANTIC_MATH", + "CUBLAS_OP_CONJG", + "CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", + "CUBLAS_GEMM_DFALT_TENSOR_OP", + "CUBLAS_GEMM_DEFAULT_TENSOR_OP", + "CUBLAS_GEMM_ALGO9_TENSOR_OP", + "CUBLAS_GEMM_ALGO9", + "CUBLAS_GEMM_ALGO8_TENSOR_OP", + "CUBLAS_GEMM_ALGO8", + "CUBLAS_GEMM_ALGO7_TENSOR_OP", + "CUBLAS_GEMM_ALGO7", + "CUBLAS_GEMM_ALGO6_TENSOR_OP", + "CUBLAS_GEMM_ALGO6", + "CUBLAS_GEMM_ALGO5_TENSOR_OP", + "CUBLAS_GEMM_ALGO5", + "CUBLAS_GEMM_ALGO4_TENSOR_OP", + "CUBLAS_GEMM_ALGO4", + "CUBLAS_GEMM_ALGO3_TENSOR_OP", + "CUBLAS_GEMM_ALGO3", + "CUBLAS_GEMM_ALGO2_TENSOR_OP", + "CUBLAS_GEMM_ALGO23", + "CUBLAS_GEMM_ALGO22", + "CUBLAS_GEMM_ALGO21", + "CUBLAS_GEMM_ALGO20", + "CUBLAS_GEMM_ALGO2", + "CUBLAS_GEMM_ALGO1_TENSOR_OP", + "CUBLAS_GEMM_ALGO19", + "CUBLAS_GEMM_ALGO18", + "CUBLAS_GEMM_ALGO17", + "CUBLAS_GEMM_ALGO16", + "CUBLAS_GEMM_ALGO15_TENSOR_OP", + "CUBLAS_GEMM_ALGO15", + "CUBLAS_GEMM_ALGO14_TENSOR_OP", + "CUBLAS_GEMM_ALGO14", + "CUBLAS_GEMM_ALGO13_TENSOR_OP", + "CUBLAS_GEMM_ALGO13", + "CUBLAS_GEMM_ALGO12_TENSOR_OP", + "CUBLAS_GEMM_ALGO12", + "CUBLAS_GEMM_ALGO11_TENSOR_OP", + "CUBLAS_GEMM_ALGO11", + "CUBLAS_GEMM_ALGO10_TENSOR_OP", + "CUBLAS_GEMM_ALGO10", + "CUBLAS_GEMM_ALGO1", + "CUBLAS_GEMM_ALGO0_TENSOR_OP", + "CUBLAS_GEMM_ALGO0", + "CUBLAS_COMPUTE_64F_PEDANTIC", + "CUBLAS_COMPUTE_64F", + "CUBLAS_COMPUTE_32I_PEDANTIC", + "CUBLAS_COMPUTE_32I", + "CUBLAS_COMPUTE_32F_PEDANTIC", + "CUBLAS_COMPUTE_32F_FAST_TF32", + "CUBLAS_COMPUTE_32F_FAST_16F", + "CUBLAS_COMPUTE_32F_FAST_16BF", + "CUBLAS_COMPUTE_16F_PEDANTIC", + "CUBLAS_COMPUTE_16F", + "CUBLASLT_SEARCH_RESERVED_09", + "CUBLASLT_SEARCH_RESERVED_08", + "CUBLASLT_SEARCH_RESERVED_07", + "CUBLASLT_SEARCH_RESERVED_06", + "CUBLASLT_SEARCH_RESERVED_05", + "CUBLASLT_SEARCH_RESERVED_04", + "CUBLASLT_SEARCH_RESERVED_03", + "CUBLASLT_SEARCH_RESERVED_02", + "CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID", + "CUBLASLT_SEARCH_BEST_FIT", + "CUBLASLT_REDUCTION_SCHEME_OUTPUT_TYPE", + "CUBLASLT_REDUCTION_SCHEME_NONE", + "CUBLASLT_REDUCTION_SCHEME_MASK", + "CUBLASLT_REDUCTION_SCHEME_INPLACE", + "CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE", + "CUBLASLT_POINTER_MODE_MASK_HOST", + "CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR", + "CUBLASLT_POINTER_MODE_MASK_DEVICE", + "CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO", + "CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_HOST", + "CUBLASLT_POINTER_MODE_HOST", + "CUBLASLT_POINTER_MODE_DEVICE_VECTOR", + "CUBLASLT_POINTER_MODE_DEVICE", + "CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO", + "CUBLASLT_ORDER_ROW", + "CUBLASLT_ORDER_COL4_4R2_8C", + "CUBLASLT_ORDER_COL32_2R_4R4", + "CUBLASLT_ORDER_COL32", + "CUBLASLT_ORDER_COL", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_TENSOR_OP_MASK", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_TYPE_MASK", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_TF32", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8I", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E5M2", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E4M3", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_64F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_32F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16BF", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_IMMA", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_FMA", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_DMMA", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_TYPE_MASK", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_64F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32I", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32F", + "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_16F", + "CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSB", + "CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSA", + "CUBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE", + "CUBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE", + "CUBLASLT_MATRIX_LAYOUT_TYPE", + "CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET", + "CUBLASLT_MATRIX_LAYOUT_ROWS", + "CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET", + "CUBLASLT_MATRIX_LAYOUT_ORDER", + "CUBLASLT_MATRIX_LAYOUT_LD", + "CUBLASLT_MATRIX_LAYOUT_COLS", + "CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT", + "CUBLASLT_MATMUL_TILE_UNDEFINED", + "CUBLASLT_MATMUL_TILE_END", + "CUBLASLT_MATMUL_TILE_96x64", + "CUBLASLT_MATMUL_TILE_96x512", + "CUBLASLT_MATMUL_TILE_96x448", + "CUBLASLT_MATMUL_TILE_96x384", + "CUBLASLT_MATMUL_TILE_96x320", + "CUBLASLT_MATMUL_TILE_96x256", + "CUBLASLT_MATMUL_TILE_96x192", + "CUBLASLT_MATMUL_TILE_96x128", + "CUBLASLT_MATMUL_TILE_8x8", + "CUBLASLT_MATMUL_TILE_8x768", + "CUBLASLT_MATMUL_TILE_8x704", + "CUBLASLT_MATMUL_TILE_8x640", + "CUBLASLT_MATMUL_TILE_8x64", + "CUBLASLT_MATMUL_TILE_8x576", + "CUBLASLT_MATMUL_TILE_8x512", + "CUBLASLT_MATMUL_TILE_8x448", + "CUBLASLT_MATMUL_TILE_8x384", + "CUBLASLT_MATMUL_TILE_8x320", + "CUBLASLT_MATMUL_TILE_8x32", + "CUBLASLT_MATMUL_TILE_8x256", + "CUBLASLT_MATMUL_TILE_8x192", + "CUBLASLT_MATMUL_TILE_8x16", + "CUBLASLT_MATMUL_TILE_8x128", + "CUBLASLT_MATMUL_TILE_88x64", + "CUBLASLT_MATMUL_TILE_88x512", + "CUBLASLT_MATMUL_TILE_88x448", + "CUBLASLT_MATMUL_TILE_88x384", + "CUBLASLT_MATMUL_TILE_88x320", + "CUBLASLT_MATMUL_TILE_88x256", + "CUBLASLT_MATMUL_TILE_88x192", + "CUBLASLT_MATMUL_TILE_88x128", + "CUBLASLT_MATMUL_TILE_80x64", + "CUBLASLT_MATMUL_TILE_80x576", + "CUBLASLT_MATMUL_TILE_80x512", + "CUBLASLT_MATMUL_TILE_80x448", + "CUBLASLT_MATMUL_TILE_80x384", + "CUBLASLT_MATMUL_TILE_80x320", + "CUBLASLT_MATMUL_TILE_80x256", + "CUBLASLT_MATMUL_TILE_80x192", + "CUBLASLT_MATMUL_TILE_80x128", + "CUBLASLT_MATMUL_TILE_768x80", + "CUBLASLT_MATMUL_TILE_768x8", + "CUBLASLT_MATMUL_TILE_768x72", + "CUBLASLT_MATMUL_TILE_768x64", + "CUBLASLT_MATMUL_TILE_768x56", + "CUBLASLT_MATMUL_TILE_768x48", + "CUBLASLT_MATMUL_TILE_768x40", + "CUBLASLT_MATMUL_TILE_768x32", + "CUBLASLT_MATMUL_TILE_768x24", + "CUBLASLT_MATMUL_TILE_768x16", + "CUBLASLT_MATMUL_TILE_760x64", + "CUBLASLT_MATMUL_TILE_752x64", + "CUBLASLT_MATMUL_TILE_744x64", + "CUBLASLT_MATMUL_TILE_736x64", + "CUBLASLT_MATMUL_TILE_72x640", + "CUBLASLT_MATMUL_TILE_72x64", + "CUBLASLT_MATMUL_TILE_72x576", + "CUBLASLT_MATMUL_TILE_72x512", + "CUBLASLT_MATMUL_TILE_72x448", + "CUBLASLT_MATMUL_TILE_72x384", + "CUBLASLT_MATMUL_TILE_72x320", + "CUBLASLT_MATMUL_TILE_72x256", + "CUBLASLT_MATMUL_TILE_72x192", + "CUBLASLT_MATMUL_TILE_72x128", + "CUBLASLT_MATMUL_TILE_728x64", + "CUBLASLT_MATMUL_TILE_720x64", + "CUBLASLT_MATMUL_TILE_712x64", + "CUBLASLT_MATMUL_TILE_704x88", + "CUBLASLT_MATMUL_TILE_704x80", + "CUBLASLT_MATMUL_TILE_704x8", + "CUBLASLT_MATMUL_TILE_704x72", + "CUBLASLT_MATMUL_TILE_704x64", + "CUBLASLT_MATMUL_TILE_704x56", + "CUBLASLT_MATMUL_TILE_704x48", + "CUBLASLT_MATMUL_TILE_704x40", + "CUBLASLT_MATMUL_TILE_704x32", + "CUBLASLT_MATMUL_TILE_704x24", + "CUBLASLT_MATMUL_TILE_704x16", + "CUBLASLT_MATMUL_TILE_696x64", + "CUBLASLT_MATMUL_TILE_688x64", + "CUBLASLT_MATMUL_TILE_680x64", + "CUBLASLT_MATMUL_TILE_672x64", + "CUBLASLT_MATMUL_TILE_664x64", + "CUBLASLT_MATMUL_TILE_656x64", + "CUBLASLT_MATMUL_TILE_64x96", + "CUBLASLT_MATMUL_TILE_64x88", + "CUBLASLT_MATMUL_TILE_64x80", + "CUBLASLT_MATMUL_TILE_64x8", + "CUBLASLT_MATMUL_TILE_64x768", + "CUBLASLT_MATMUL_TILE_64x760", + "CUBLASLT_MATMUL_TILE_64x752", + "CUBLASLT_MATMUL_TILE_64x744", + "CUBLASLT_MATMUL_TILE_64x736", + "CUBLASLT_MATMUL_TILE_64x728", + "CUBLASLT_MATMUL_TILE_64x720", + "CUBLASLT_MATMUL_TILE_64x72", + "CUBLASLT_MATMUL_TILE_64x712", + "CUBLASLT_MATMUL_TILE_64x704", + "CUBLASLT_MATMUL_TILE_64x696", + "CUBLASLT_MATMUL_TILE_64x688", + "CUBLASLT_MATMUL_TILE_64x680", + "CUBLASLT_MATMUL_TILE_64x672", + "CUBLASLT_MATMUL_TILE_64x664", + "CUBLASLT_MATMUL_TILE_64x656", + "CUBLASLT_MATMUL_TILE_64x648", + "CUBLASLT_MATMUL_TILE_64x640", + "CUBLASLT_MATMUL_TILE_64x64", + "CUBLASLT_MATMUL_TILE_64x632", + "CUBLASLT_MATMUL_TILE_64x624", + "CUBLASLT_MATMUL_TILE_64x616", + "CUBLASLT_MATMUL_TILE_64x608", + "CUBLASLT_MATMUL_TILE_64x600", + "CUBLASLT_MATMUL_TILE_64x592", + "CUBLASLT_MATMUL_TILE_64x584", + "CUBLASLT_MATMUL_TILE_64x576", + "CUBLASLT_MATMUL_TILE_64x568", + "CUBLASLT_MATMUL_TILE_64x560", + "CUBLASLT_MATMUL_TILE_64x56", + "CUBLASLT_MATMUL_TILE_64x552", + "CUBLASLT_MATMUL_TILE_64x544", + "CUBLASLT_MATMUL_TILE_64x536", + "CUBLASLT_MATMUL_TILE_64x528", + "CUBLASLT_MATMUL_TILE_64x520", + "CUBLASLT_MATMUL_TILE_64x512", + "CUBLASLT_MATMUL_TILE_64x504", + "CUBLASLT_MATMUL_TILE_64x496", + "CUBLASLT_MATMUL_TILE_64x488", + "CUBLASLT_MATMUL_TILE_64x480", + "CUBLASLT_MATMUL_TILE_64x48", + "CUBLASLT_MATMUL_TILE_64x472", + "CUBLASLT_MATMUL_TILE_64x464", + "CUBLASLT_MATMUL_TILE_64x456", + "CUBLASLT_MATMUL_TILE_64x448", + "CUBLASLT_MATMUL_TILE_64x440", + "CUBLASLT_MATMUL_TILE_64x432", + "CUBLASLT_MATMUL_TILE_64x424", + "CUBLASLT_MATMUL_TILE_64x416", + "CUBLASLT_MATMUL_TILE_64x408", + "CUBLASLT_MATMUL_TILE_64x400", + "CUBLASLT_MATMUL_TILE_64x40", + "CUBLASLT_MATMUL_TILE_64x392", + "CUBLASLT_MATMUL_TILE_64x384", + "CUBLASLT_MATMUL_TILE_64x376", + "CUBLASLT_MATMUL_TILE_64x368", + "CUBLASLT_MATMUL_TILE_64x360", + "CUBLASLT_MATMUL_TILE_64x352", + "CUBLASLT_MATMUL_TILE_64x344", + "CUBLASLT_MATMUL_TILE_64x336", + "CUBLASLT_MATMUL_TILE_64x328", + "CUBLASLT_MATMUL_TILE_64x320", + "CUBLASLT_MATMUL_TILE_64x32", + "CUBLASLT_MATMUL_TILE_64x312", + "CUBLASLT_MATMUL_TILE_64x304", + "CUBLASLT_MATMUL_TILE_64x296", + "CUBLASLT_MATMUL_TILE_64x288", + "CUBLASLT_MATMUL_TILE_64x280", + "CUBLASLT_MATMUL_TILE_64x272", + "CUBLASLT_MATMUL_TILE_64x264", + "CUBLASLT_MATMUL_TILE_64x256", + "CUBLASLT_MATMUL_TILE_64x248", + "CUBLASLT_MATMUL_TILE_64x240", + "CUBLASLT_MATMUL_TILE_64x24", + "CUBLASLT_MATMUL_TILE_64x232", + "CUBLASLT_MATMUL_TILE_64x224", + "CUBLASLT_MATMUL_TILE_64x216", + "CUBLASLT_MATMUL_TILE_64x208", + "CUBLASLT_MATMUL_TILE_64x200", + "CUBLASLT_MATMUL_TILE_64x192", + "CUBLASLT_MATMUL_TILE_64x184", + "CUBLASLT_MATMUL_TILE_64x176", + "CUBLASLT_MATMUL_TILE_64x168", + "CUBLASLT_MATMUL_TILE_64x160", + "CUBLASLT_MATMUL_TILE_64x16", + "CUBLASLT_MATMUL_TILE_64x152", + "CUBLASLT_MATMUL_TILE_64x144", + "CUBLASLT_MATMUL_TILE_64x136", + "CUBLASLT_MATMUL_TILE_64x128", + "CUBLASLT_MATMUL_TILE_64x120", + "CUBLASLT_MATMUL_TILE_64x112", + "CUBLASLT_MATMUL_TILE_64x104", + "CUBLASLT_MATMUL_TILE_648x64", + "CUBLASLT_MATMUL_TILE_640x96", + "CUBLASLT_MATMUL_TILE_640x88", + "CUBLASLT_MATMUL_TILE_640x80", + "CUBLASLT_MATMUL_TILE_640x8", + "CUBLASLT_MATMUL_TILE_640x72", + "CUBLASLT_MATMUL_TILE_640x64", + "CUBLASLT_MATMUL_TILE_640x56", + "CUBLASLT_MATMUL_TILE_640x48", + "CUBLASLT_MATMUL_TILE_640x40", + "CUBLASLT_MATMUL_TILE_640x32", + "CUBLASLT_MATMUL_TILE_640x24", + "CUBLASLT_MATMUL_TILE_640x16", + "CUBLASLT_MATMUL_TILE_632x64", + "CUBLASLT_MATMUL_TILE_624x64", + "CUBLASLT_MATMUL_TILE_616x64", + "CUBLASLT_MATMUL_TILE_608x64", + "CUBLASLT_MATMUL_TILE_600x64", + "CUBLASLT_MATMUL_TILE_592x64", + "CUBLASLT_MATMUL_TILE_584x64", + "CUBLASLT_MATMUL_TILE_576x96", + "CUBLASLT_MATMUL_TILE_576x88", + "CUBLASLT_MATMUL_TILE_576x80", + "CUBLASLT_MATMUL_TILE_576x8", + "CUBLASLT_MATMUL_TILE_576x72", + "CUBLASLT_MATMUL_TILE_576x64", + "CUBLASLT_MATMUL_TILE_576x56", + "CUBLASLT_MATMUL_TILE_576x48", + "CUBLASLT_MATMUL_TILE_576x40", + "CUBLASLT_MATMUL_TILE_576x32", + "CUBLASLT_MATMUL_TILE_576x24", + "CUBLASLT_MATMUL_TILE_576x16", + "CUBLASLT_MATMUL_TILE_576x112", + "CUBLASLT_MATMUL_TILE_576x104", + "CUBLASLT_MATMUL_TILE_56x768", + "CUBLASLT_MATMUL_TILE_56x704", + "CUBLASLT_MATMUL_TILE_56x640", + "CUBLASLT_MATMUL_TILE_56x576", + "CUBLASLT_MATMUL_TILE_56x512", + "CUBLASLT_MATMUL_TILE_56x448", + "CUBLASLT_MATMUL_TILE_56x384", + "CUBLASLT_MATMUL_TILE_56x320", + "CUBLASLT_MATMUL_TILE_56x256", + "CUBLASLT_MATMUL_TILE_56x192", + "CUBLASLT_MATMUL_TILE_56x128", + "CUBLASLT_MATMUL_TILE_568x64", + "CUBLASLT_MATMUL_TILE_560x64", + "CUBLASLT_MATMUL_TILE_552x64", + "CUBLASLT_MATMUL_TILE_544x64", + "CUBLASLT_MATMUL_TILE_536x64", + "CUBLASLT_MATMUL_TILE_528x64", + "CUBLASLT_MATMUL_TILE_520x64", + "CUBLASLT_MATMUL_TILE_512x96", + "CUBLASLT_MATMUL_TILE_512x88", + "CUBLASLT_MATMUL_TILE_512x80", + "CUBLASLT_MATMUL_TILE_512x8", + "CUBLASLT_MATMUL_TILE_512x72", + "CUBLASLT_MATMUL_TILE_512x64", + "CUBLASLT_MATMUL_TILE_512x56", + "CUBLASLT_MATMUL_TILE_512x48", + "CUBLASLT_MATMUL_TILE_512x40", + "CUBLASLT_MATMUL_TILE_512x32", + "CUBLASLT_MATMUL_TILE_512x24", + "CUBLASLT_MATMUL_TILE_512x16", + "CUBLASLT_MATMUL_TILE_512x128", + "CUBLASLT_MATMUL_TILE_512x120", + "CUBLASLT_MATMUL_TILE_512x112", + "CUBLASLT_MATMUL_TILE_512x104", + "CUBLASLT_MATMUL_TILE_504x64", + "CUBLASLT_MATMUL_TILE_496x64", + "CUBLASLT_MATMUL_TILE_48x768", + "CUBLASLT_MATMUL_TILE_48x704", + "CUBLASLT_MATMUL_TILE_48x640", + "CUBLASLT_MATMUL_TILE_48x64", + "CUBLASLT_MATMUL_TILE_48x576", + "CUBLASLT_MATMUL_TILE_48x512", + "CUBLASLT_MATMUL_TILE_48x448", + "CUBLASLT_MATMUL_TILE_48x384", + "CUBLASLT_MATMUL_TILE_48x320", + "CUBLASLT_MATMUL_TILE_48x256", + "CUBLASLT_MATMUL_TILE_48x192", + "CUBLASLT_MATMUL_TILE_48x128", + "CUBLASLT_MATMUL_TILE_488x64", + "CUBLASLT_MATMUL_TILE_480x64", + "CUBLASLT_MATMUL_TILE_472x64", + "CUBLASLT_MATMUL_TILE_464x64", + "CUBLASLT_MATMUL_TILE_456x64", + "CUBLASLT_MATMUL_TILE_448x96", + "CUBLASLT_MATMUL_TILE_448x88", + "CUBLASLT_MATMUL_TILE_448x80", + "CUBLASLT_MATMUL_TILE_448x8", + "CUBLASLT_MATMUL_TILE_448x72", + "CUBLASLT_MATMUL_TILE_448x64", + "CUBLASLT_MATMUL_TILE_448x56", + "CUBLASLT_MATMUL_TILE_448x48", + "CUBLASLT_MATMUL_TILE_448x40", + "CUBLASLT_MATMUL_TILE_448x32", + "CUBLASLT_MATMUL_TILE_448x24", + "CUBLASLT_MATMUL_TILE_448x16", + "CUBLASLT_MATMUL_TILE_448x144", + "CUBLASLT_MATMUL_TILE_448x136", + "CUBLASLT_MATMUL_TILE_448x128", + "CUBLASLT_MATMUL_TILE_448x120", + "CUBLASLT_MATMUL_TILE_448x112", + "CUBLASLT_MATMUL_TILE_448x104", + "CUBLASLT_MATMUL_TILE_440x64", + "CUBLASLT_MATMUL_TILE_432x64", + "CUBLASLT_MATMUL_TILE_424x64", + "CUBLASLT_MATMUL_TILE_416x64", + "CUBLASLT_MATMUL_TILE_40x768", + "CUBLASLT_MATMUL_TILE_40x704", + "CUBLASLT_MATMUL_TILE_40x640", + "CUBLASLT_MATMUL_TILE_40x64", + "CUBLASLT_MATMUL_TILE_40x576", + "CUBLASLT_MATMUL_TILE_40x512", + "CUBLASLT_MATMUL_TILE_40x448", + "CUBLASLT_MATMUL_TILE_40x384", + "CUBLASLT_MATMUL_TILE_40x320", + "CUBLASLT_MATMUL_TILE_40x256", + "CUBLASLT_MATMUL_TILE_40x192", + "CUBLASLT_MATMUL_TILE_40x128", + "CUBLASLT_MATMUL_TILE_408x64", + "CUBLASLT_MATMUL_TILE_400x64", + "CUBLASLT_MATMUL_TILE_392x64", + "CUBLASLT_MATMUL_TILE_384x96", + "CUBLASLT_MATMUL_TILE_384x88", + "CUBLASLT_MATMUL_TILE_384x80", + "CUBLASLT_MATMUL_TILE_384x8", + "CUBLASLT_MATMUL_TILE_384x72", + "CUBLASLT_MATMUL_TILE_384x64", + "CUBLASLT_MATMUL_TILE_384x56", + "CUBLASLT_MATMUL_TILE_384x48", + "CUBLASLT_MATMUL_TILE_384x40", + "CUBLASLT_MATMUL_TILE_384x32", + "CUBLASLT_MATMUL_TILE_384x24", + "CUBLASLT_MATMUL_TILE_384x168", + "CUBLASLT_MATMUL_TILE_384x160", + "CUBLASLT_MATMUL_TILE_384x16", + "CUBLASLT_MATMUL_TILE_384x152", + "CUBLASLT_MATMUL_TILE_384x144", + "CUBLASLT_MATMUL_TILE_384x136", + "CUBLASLT_MATMUL_TILE_384x128", + "CUBLASLT_MATMUL_TILE_384x120", + "CUBLASLT_MATMUL_TILE_384x112", + "CUBLASLT_MATMUL_TILE_384x104", + "CUBLASLT_MATMUL_TILE_376x64", + "CUBLASLT_MATMUL_TILE_376x128", + "CUBLASLT_MATMUL_TILE_368x64", + "CUBLASLT_MATMUL_TILE_368x128", + "CUBLASLT_MATMUL_TILE_360x64", + "CUBLASLT_MATMUL_TILE_360x128", + "CUBLASLT_MATMUL_TILE_352x64", + "CUBLASLT_MATMUL_TILE_352x128", + "CUBLASLT_MATMUL_TILE_344x64", + "CUBLASLT_MATMUL_TILE_344x128", + "CUBLASLT_MATMUL_TILE_336x64", + "CUBLASLT_MATMUL_TILE_336x128", + "CUBLASLT_MATMUL_TILE_32x8", + "CUBLASLT_MATMUL_TILE_32x768", + "CUBLASLT_MATMUL_TILE_32x704", + "CUBLASLT_MATMUL_TILE_32x640", + "CUBLASLT_MATMUL_TILE_32x64", + "CUBLASLT_MATMUL_TILE_32x576", + "CUBLASLT_MATMUL_TILE_32x512", + "CUBLASLT_MATMUL_TILE_32x448", + "CUBLASLT_MATMUL_TILE_32x384", + "CUBLASLT_MATMUL_TILE_32x320", + "CUBLASLT_MATMUL_TILE_32x32", + "CUBLASLT_MATMUL_TILE_32x256", + "CUBLASLT_MATMUL_TILE_32x192", + "CUBLASLT_MATMUL_TILE_32x16", + "CUBLASLT_MATMUL_TILE_32x128", + "CUBLASLT_MATMUL_TILE_328x64", + "CUBLASLT_MATMUL_TILE_328x128", + "CUBLASLT_MATMUL_TILE_320x96", + "CUBLASLT_MATMUL_TILE_320x88", + "CUBLASLT_MATMUL_TILE_320x80", + "CUBLASLT_MATMUL_TILE_320x8", + "CUBLASLT_MATMUL_TILE_320x72", + "CUBLASLT_MATMUL_TILE_320x64", + "CUBLASLT_MATMUL_TILE_320x56", + "CUBLASLT_MATMUL_TILE_320x48", + "CUBLASLT_MATMUL_TILE_320x40", + "CUBLASLT_MATMUL_TILE_320x32", + "CUBLASLT_MATMUL_TILE_320x24", + "CUBLASLT_MATMUL_TILE_320x200", + "CUBLASLT_MATMUL_TILE_320x192", + "CUBLASLT_MATMUL_TILE_320x184", + "CUBLASLT_MATMUL_TILE_320x176", + "CUBLASLT_MATMUL_TILE_320x168", + "CUBLASLT_MATMUL_TILE_320x160", + "CUBLASLT_MATMUL_TILE_320x16", + "CUBLASLT_MATMUL_TILE_320x152", + "CUBLASLT_MATMUL_TILE_320x144", + "CUBLASLT_MATMUL_TILE_320x136", + "CUBLASLT_MATMUL_TILE_320x128", + "CUBLASLT_MATMUL_TILE_320x120", + "CUBLASLT_MATMUL_TILE_320x112", + "CUBLASLT_MATMUL_TILE_320x104", + "CUBLASLT_MATMUL_TILE_312x64", + "CUBLASLT_MATMUL_TILE_312x128", + "CUBLASLT_MATMUL_TILE_304x64", + "CUBLASLT_MATMUL_TILE_304x128", + "CUBLASLT_MATMUL_TILE_296x64", + "CUBLASLT_MATMUL_TILE_296x128", + "CUBLASLT_MATMUL_TILE_288x64", + "CUBLASLT_MATMUL_TILE_288x128", + "CUBLASLT_MATMUL_TILE_280x64", + "CUBLASLT_MATMUL_TILE_280x128", + "CUBLASLT_MATMUL_TILE_272x64", + "CUBLASLT_MATMUL_TILE_272x128", + "CUBLASLT_MATMUL_TILE_264x64", + "CUBLASLT_MATMUL_TILE_264x128", + "CUBLASLT_MATMUL_TILE_256x96", + "CUBLASLT_MATMUL_TILE_256x88", + "CUBLASLT_MATMUL_TILE_256x80", + "CUBLASLT_MATMUL_TILE_256x8", + "CUBLASLT_MATMUL_TILE_256x72", + "CUBLASLT_MATMUL_TILE_256x64", + "CUBLASLT_MATMUL_TILE_256x56", + "CUBLASLT_MATMUL_TILE_256x48", + "CUBLASLT_MATMUL_TILE_256x40", + "CUBLASLT_MATMUL_TILE_256x32", + "CUBLASLT_MATMUL_TILE_256x256", + "CUBLASLT_MATMUL_TILE_256x248", + "CUBLASLT_MATMUL_TILE_256x240", + "CUBLASLT_MATMUL_TILE_256x24", + "CUBLASLT_MATMUL_TILE_256x232", + "CUBLASLT_MATMUL_TILE_256x224", + "CUBLASLT_MATMUL_TILE_256x216", + "CUBLASLT_MATMUL_TILE_256x208", + "CUBLASLT_MATMUL_TILE_256x200", + "CUBLASLT_MATMUL_TILE_256x192", + "CUBLASLT_MATMUL_TILE_256x184", + "CUBLASLT_MATMUL_TILE_256x176", + "CUBLASLT_MATMUL_TILE_256x168", + "CUBLASLT_MATMUL_TILE_256x160", + "CUBLASLT_MATMUL_TILE_256x16", + "CUBLASLT_MATMUL_TILE_256x152", + "CUBLASLT_MATMUL_TILE_256x144", + "CUBLASLT_MATMUL_TILE_256x136", + "CUBLASLT_MATMUL_TILE_256x128", + "CUBLASLT_MATMUL_TILE_256x120", + "CUBLASLT_MATMUL_TILE_256x112", + "CUBLASLT_MATMUL_TILE_256x104", + "CUBLASLT_MATMUL_TILE_24x768", + "CUBLASLT_MATMUL_TILE_24x704", + "CUBLASLT_MATMUL_TILE_24x640", + "CUBLASLT_MATMUL_TILE_24x64", + "CUBLASLT_MATMUL_TILE_24x576", + "CUBLASLT_MATMUL_TILE_24x512", + "CUBLASLT_MATMUL_TILE_24x448", + "CUBLASLT_MATMUL_TILE_24x384", + "CUBLASLT_MATMUL_TILE_24x320", + "CUBLASLT_MATMUL_TILE_24x256", + "CUBLASLT_MATMUL_TILE_24x192", + "CUBLASLT_MATMUL_TILE_24x128", + "CUBLASLT_MATMUL_TILE_248x64", + "CUBLASLT_MATMUL_TILE_248x192", + "CUBLASLT_MATMUL_TILE_248x128", + "CUBLASLT_MATMUL_TILE_240x64", + "CUBLASLT_MATMUL_TILE_240x192", + "CUBLASLT_MATMUL_TILE_240x128", + "CUBLASLT_MATMUL_TILE_232x64", + "CUBLASLT_MATMUL_TILE_232x192", + "CUBLASLT_MATMUL_TILE_232x128", + "CUBLASLT_MATMUL_TILE_224x64", + "CUBLASLT_MATMUL_TILE_224x192", + "CUBLASLT_MATMUL_TILE_224x128", + "CUBLASLT_MATMUL_TILE_216x64", + "CUBLASLT_MATMUL_TILE_216x192", + "CUBLASLT_MATMUL_TILE_216x128", + "CUBLASLT_MATMUL_TILE_208x64", + "CUBLASLT_MATMUL_TILE_208x192", + "CUBLASLT_MATMUL_TILE_208x128", + "CUBLASLT_MATMUL_TILE_200x64", + "CUBLASLT_MATMUL_TILE_200x192", + "CUBLASLT_MATMUL_TILE_200x128", + "CUBLASLT_MATMUL_TILE_192x96", + "CUBLASLT_MATMUL_TILE_192x88", + "CUBLASLT_MATMUL_TILE_192x80", + "CUBLASLT_MATMUL_TILE_192x8", + "CUBLASLT_MATMUL_TILE_192x72", + "CUBLASLT_MATMUL_TILE_192x64", + "CUBLASLT_MATMUL_TILE_192x56", + "CUBLASLT_MATMUL_TILE_192x48", + "CUBLASLT_MATMUL_TILE_192x40", + "CUBLASLT_MATMUL_TILE_192x336", + "CUBLASLT_MATMUL_TILE_192x328", + "CUBLASLT_MATMUL_TILE_192x320", + "CUBLASLT_MATMUL_TILE_192x32", + "CUBLASLT_MATMUL_TILE_192x312", + "CUBLASLT_MATMUL_TILE_192x304", + "CUBLASLT_MATMUL_TILE_192x296", + "CUBLASLT_MATMUL_TILE_192x288", + "CUBLASLT_MATMUL_TILE_192x280", + "CUBLASLT_MATMUL_TILE_192x272", + "CUBLASLT_MATMUL_TILE_192x264", + "CUBLASLT_MATMUL_TILE_192x256", + "CUBLASLT_MATMUL_TILE_192x248", + "CUBLASLT_MATMUL_TILE_192x240", + "CUBLASLT_MATMUL_TILE_192x24", + "CUBLASLT_MATMUL_TILE_192x232", + "CUBLASLT_MATMUL_TILE_192x224", + "CUBLASLT_MATMUL_TILE_192x216", + "CUBLASLT_MATMUL_TILE_192x208", + "CUBLASLT_MATMUL_TILE_192x200", + "CUBLASLT_MATMUL_TILE_192x192", + "CUBLASLT_MATMUL_TILE_192x184", + "CUBLASLT_MATMUL_TILE_192x176", + "CUBLASLT_MATMUL_TILE_192x168", + "CUBLASLT_MATMUL_TILE_192x160", + "CUBLASLT_MATMUL_TILE_192x16", + "CUBLASLT_MATMUL_TILE_192x152", + "CUBLASLT_MATMUL_TILE_192x144", + "CUBLASLT_MATMUL_TILE_192x136", + "CUBLASLT_MATMUL_TILE_192x128", + "CUBLASLT_MATMUL_TILE_192x120", + "CUBLASLT_MATMUL_TILE_192x112", + "CUBLASLT_MATMUL_TILE_192x104", + "CUBLASLT_MATMUL_TILE_184x64", + "CUBLASLT_MATMUL_TILE_184x256", + "CUBLASLT_MATMUL_TILE_184x192", + "CUBLASLT_MATMUL_TILE_184x128", + "CUBLASLT_MATMUL_TILE_176x64", + "CUBLASLT_MATMUL_TILE_176x256", + "CUBLASLT_MATMUL_TILE_176x192", + "CUBLASLT_MATMUL_TILE_176x128", + "CUBLASLT_MATMUL_TILE_16x8", + "CUBLASLT_MATMUL_TILE_16x768", + "CUBLASLT_MATMUL_TILE_16x704", + "CUBLASLT_MATMUL_TILE_16x640", + "CUBLASLT_MATMUL_TILE_16x64", + "CUBLASLT_MATMUL_TILE_16x576", + "CUBLASLT_MATMUL_TILE_16x512", + "CUBLASLT_MATMUL_TILE_16x448", + "CUBLASLT_MATMUL_TILE_16x384", + "CUBLASLT_MATMUL_TILE_16x320", + "CUBLASLT_MATMUL_TILE_16x32", + "CUBLASLT_MATMUL_TILE_16x256", + "CUBLASLT_MATMUL_TILE_16x192", + "CUBLASLT_MATMUL_TILE_16x16", + "CUBLASLT_MATMUL_TILE_16x128", + "CUBLASLT_MATMUL_TILE_168x64", + "CUBLASLT_MATMUL_TILE_168x256", + "CUBLASLT_MATMUL_TILE_168x192", + "CUBLASLT_MATMUL_TILE_168x128", + "CUBLASLT_MATMUL_TILE_160x64", + "CUBLASLT_MATMUL_TILE_160x256", + "CUBLASLT_MATMUL_TILE_160x192", + "CUBLASLT_MATMUL_TILE_160x128", + "CUBLASLT_MATMUL_TILE_152x64", + "CUBLASLT_MATMUL_TILE_152x320", + "CUBLASLT_MATMUL_TILE_152x256", + "CUBLASLT_MATMUL_TILE_152x192", + "CUBLASLT_MATMUL_TILE_152x128", + "CUBLASLT_MATMUL_TILE_144x64", + "CUBLASLT_MATMUL_TILE_144x320", + "CUBLASLT_MATMUL_TILE_144x256", + "CUBLASLT_MATMUL_TILE_144x192", + "CUBLASLT_MATMUL_TILE_144x128", + "CUBLASLT_MATMUL_TILE_136x64", + "CUBLASLT_MATMUL_TILE_136x320", + "CUBLASLT_MATMUL_TILE_136x256", + "CUBLASLT_MATMUL_TILE_136x192", + "CUBLASLT_MATMUL_TILE_136x128", + "CUBLASLT_MATMUL_TILE_128x96", + "CUBLASLT_MATMUL_TILE_128x88", + "CUBLASLT_MATMUL_TILE_128x80", + "CUBLASLT_MATMUL_TILE_128x8", + "CUBLASLT_MATMUL_TILE_128x72", + "CUBLASLT_MATMUL_TILE_128x64", + "CUBLASLT_MATMUL_TILE_128x56", + "CUBLASLT_MATMUL_TILE_128x512", + "CUBLASLT_MATMUL_TILE_128x504", + "CUBLASLT_MATMUL_TILE_128x496", + "CUBLASLT_MATMUL_TILE_128x488", + "CUBLASLT_MATMUL_TILE_128x480", + "CUBLASLT_MATMUL_TILE_128x48", + "CUBLASLT_MATMUL_TILE_128x472", + "CUBLASLT_MATMUL_TILE_128x464", + "CUBLASLT_MATMUL_TILE_128x456", + "CUBLASLT_MATMUL_TILE_128x448", + "CUBLASLT_MATMUL_TILE_128x440", + "CUBLASLT_MATMUL_TILE_128x432", + "CUBLASLT_MATMUL_TILE_128x424", + "CUBLASLT_MATMUL_TILE_128x416", + "CUBLASLT_MATMUL_TILE_128x408", + "CUBLASLT_MATMUL_TILE_128x400", + "CUBLASLT_MATMUL_TILE_128x40", + "CUBLASLT_MATMUL_TILE_128x392", + "CUBLASLT_MATMUL_TILE_128x384", + "CUBLASLT_MATMUL_TILE_128x376", + "CUBLASLT_MATMUL_TILE_128x368", + "CUBLASLT_MATMUL_TILE_128x360", + "CUBLASLT_MATMUL_TILE_128x352", + "CUBLASLT_MATMUL_TILE_128x344", + "CUBLASLT_MATMUL_TILE_128x336", + "CUBLASLT_MATMUL_TILE_128x328", + "CUBLASLT_MATMUL_TILE_128x320", + "CUBLASLT_MATMUL_TILE_128x32", + "CUBLASLT_MATMUL_TILE_128x312", + "CUBLASLT_MATMUL_TILE_128x304", + "CUBLASLT_MATMUL_TILE_128x296", + "CUBLASLT_MATMUL_TILE_128x288", + "CUBLASLT_MATMUL_TILE_128x280", + "CUBLASLT_MATMUL_TILE_128x272", + "CUBLASLT_MATMUL_TILE_128x264", + "CUBLASLT_MATMUL_TILE_128x256", + "CUBLASLT_MATMUL_TILE_128x248", + "CUBLASLT_MATMUL_TILE_128x240", + "CUBLASLT_MATMUL_TILE_128x24", + "CUBLASLT_MATMUL_TILE_128x232", + "CUBLASLT_MATMUL_TILE_128x224", + "CUBLASLT_MATMUL_TILE_128x216", + "CUBLASLT_MATMUL_TILE_128x208", + "CUBLASLT_MATMUL_TILE_128x200", + "CUBLASLT_MATMUL_TILE_128x192", + "CUBLASLT_MATMUL_TILE_128x184", + "CUBLASLT_MATMUL_TILE_128x176", + "CUBLASLT_MATMUL_TILE_128x168", + "CUBLASLT_MATMUL_TILE_128x160", + "CUBLASLT_MATMUL_TILE_128x16", + "CUBLASLT_MATMUL_TILE_128x152", + "CUBLASLT_MATMUL_TILE_128x144", + "CUBLASLT_MATMUL_TILE_128x136", + "CUBLASLT_MATMUL_TILE_128x128", + "CUBLASLT_MATMUL_TILE_128x120", + "CUBLASLT_MATMUL_TILE_128x112", + "CUBLASLT_MATMUL_TILE_128x104", + "CUBLASLT_MATMUL_TILE_120x64", + "CUBLASLT_MATMUL_TILE_120x384", + "CUBLASLT_MATMUL_TILE_120x320", + "CUBLASLT_MATMUL_TILE_120x256", + "CUBLASLT_MATMUL_TILE_120x192", + "CUBLASLT_MATMUL_TILE_120x128", + "CUBLASLT_MATMUL_TILE_112x64", + "CUBLASLT_MATMUL_TILE_112x384", + "CUBLASLT_MATMUL_TILE_112x320", + "CUBLASLT_MATMUL_TILE_112x256", + "CUBLASLT_MATMUL_TILE_112x192", + "CUBLASLT_MATMUL_TILE_112x128", + "CUBLASLT_MATMUL_TILE_104x64", + "CUBLASLT_MATMUL_TILE_104x448", + "CUBLASLT_MATMUL_TILE_104x384", + "CUBLASLT_MATMUL_TILE_104x320", + "CUBLASLT_MATMUL_TILE_104x256", + "CUBLASLT_MATMUL_TILE_104x192", + "CUBLASLT_MATMUL_TILE_104x128", + "CUBLASLT_MATMUL_STAGES_UNDEFINED", + "CUBLASLT_MATMUL_STAGES_END", + "CUBLASLT_MATMUL_STAGES_8xAUTO", + "CUBLASLT_MATMUL_STAGES_8x5", + "CUBLASLT_MATMUL_STAGES_8x4", + "CUBLASLT_MATMUL_STAGES_8x3", + "CUBLASLT_MATMUL_STAGES_64xAUTO", + "CUBLASLT_MATMUL_STAGES_64x6", + "CUBLASLT_MATMUL_STAGES_64x5", + "CUBLASLT_MATMUL_STAGES_64x4", + "CUBLASLT_MATMUL_STAGES_64x3", + "CUBLASLT_MATMUL_STAGES_64x2", + "CUBLASLT_MATMUL_STAGES_64x1", + "CUBLASLT_MATMUL_STAGES_32xAUTO", + "CUBLASLT_MATMUL_STAGES_32x6", + "CUBLASLT_MATMUL_STAGES_32x5", + "CUBLASLT_MATMUL_STAGES_32x4", + "CUBLASLT_MATMUL_STAGES_32x3", + "CUBLASLT_MATMUL_STAGES_32x2", + "CUBLASLT_MATMUL_STAGES_32x10", + "CUBLASLT_MATMUL_STAGES_32x1", + "CUBLASLT_MATMUL_STAGES_16xAUTO", + "CUBLASLT_MATMUL_STAGES_16x6", + "CUBLASLT_MATMUL_STAGES_16x5", + "CUBLASLT_MATMUL_STAGES_16x4", + "CUBLASLT_MATMUL_STAGES_16x3", + "CUBLASLT_MATMUL_STAGES_16x2", + "CUBLASLT_MATMUL_STAGES_16x10", + "CUBLASLT_MATMUL_STAGES_16x1", + "CUBLASLT_MATMUL_STAGES_128xAUTO", + "CUBLASLT_MATMUL_STAGES_128x6", + "CUBLASLT_MATMUL_STAGES_128x5", + "CUBLASLT_MATMUL_STAGES_128x4", + "CUBLASLT_MATMUL_STAGES_128x3", + "CUBLASLT_MATMUL_STAGES_128x2", + "CUBLASLT_MATMUL_STAGES_128x1", + "CUBLASLT_MATMUL_PREF_SEARCH_MODE", + "CUBLASLT_MATMUL_PREF_REDUCTION_SCHEME_MASK", + "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_D_BYTES", + "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_C_BYTES", + "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_B_BYTES", + "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_A_BYTES", + "CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES", + "CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT", + "CUBLASLT_MATMUL_PREF_IMPL_MASK", + "CUBLASLT_MATMUL_INNER_SHAPE_UNDEFINED", + "CUBLASLT_MATMUL_INNER_SHAPE_MMA884", + "CUBLASLT_MATMUL_INNER_SHAPE_MMA1688", + "CUBLASLT_MATMUL_INNER_SHAPE_MMA1684", + "CUBLASLT_MATMUL_INNER_SHAPE_MMA16816", + "CUBLASLT_MATMUL_INNER_SHAPE_END", + "CUBLASLT_MATMUL_DESC_TRANSC", + "CUBLASLT_MATMUL_DESC_TRANSB", + "CUBLASLT_MATMUL_DESC_TRANSA", + "CUBLASLT_MATMUL_DESC_SM_COUNT_TARGET", + "CUBLASLT_MATMUL_DESC_SCALE_TYPE", + "CUBLASLT_MATMUL_DESC_POINTER_MODE", + "CUBLASLT_MATMUL_DESC_FILL_MODE", + "CUBLASLT_MATMUL_DESC_FAST_ACCUM", + "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_SCALE_POINTER", + "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER", + "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD", + "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_DATA_TYPE", + "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_BATCH_STRIDE", + "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_AMAX_POINTER", + "CUBLASLT_MATMUL_DESC_EPILOGUE", + "CUBLASLT_MATMUL_DESC_D_SCALE_POINTER", + "CUBLASLT_MATMUL_DESC_C_SCALE_POINTER", + "CUBLASLT_MATMUL_DESC_COMPUTE_TYPE", + "CUBLASLT_MATMUL_DESC_B_SCALE_POINTER", + "CUBLASLT_MATMUL_DESC_BIAS_POINTER", + "CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE", + "CUBLASLT_MATMUL_DESC_BIAS_BATCH_STRIDE", + "CUBLASLT_MATMUL_DESC_A_SCALE_POINTER", + "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_OUT_COUNTERS_POINTER", + "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_NUM_CHUNKS_D_ROWS", + "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_NUM_CHUNKS_D_COLS", + "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_IN_COUNTERS_POINTER", + "CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE", + "CUBLASLT_EPILOGUE_RELU_BIAS", + "CUBLASLT_EPILOGUE_RELU_AUX_BIAS", + "CUBLASLT_EPILOGUE_RELU_AUX", + "CUBLASLT_EPILOGUE_RELU", + "CUBLASLT_EPILOGUE_GELU_BIAS", + "CUBLASLT_EPILOGUE_GELU_AUX_BIAS", + "CUBLASLT_EPILOGUE_GELU_AUX", + "CUBLASLT_EPILOGUE_GELU", + "CUBLASLT_EPILOGUE_DRELU_BGRAD", + "CUBLASLT_EPILOGUE_DRELU", + "CUBLASLT_EPILOGUE_DGELU_BGRAD", + "CUBLASLT_EPILOGUE_DGELU", + "CUBLASLT_EPILOGUE_DEFAULT", + "CUBLASLT_EPILOGUE_BIAS", + "CUBLASLT_EPILOGUE_BGRADB", + "CUBLASLT_EPILOGUE_BGRADA", + "CUBLASLT_CLUSTER_SHAPE_END", + "CUBLASLT_CLUSTER_SHAPE_AUTO", + "CUBLASLT_CLUSTER_SHAPE_9x1x1", + "CUBLASLT_CLUSTER_SHAPE_8x2x1", + "CUBLASLT_CLUSTER_SHAPE_8x1x1", + "CUBLASLT_CLUSTER_SHAPE_7x2x1", + "CUBLASLT_CLUSTER_SHAPE_7x1x1", + "CUBLASLT_CLUSTER_SHAPE_6x2x1", + "CUBLASLT_CLUSTER_SHAPE_6x1x1", + "CUBLASLT_CLUSTER_SHAPE_5x3x1", + "CUBLASLT_CLUSTER_SHAPE_5x2x1", + "CUBLASLT_CLUSTER_SHAPE_5x1x1", + "CUBLASLT_CLUSTER_SHAPE_4x4x1", + "CUBLASLT_CLUSTER_SHAPE_4x3x1", + "CUBLASLT_CLUSTER_SHAPE_4x2x1", + "CUBLASLT_CLUSTER_SHAPE_4x1x1", + "CUBLASLT_CLUSTER_SHAPE_3x5x1", + "CUBLASLT_CLUSTER_SHAPE_3x4x1", + "CUBLASLT_CLUSTER_SHAPE_3x3x1", + "CUBLASLT_CLUSTER_SHAPE_3x2x1", + "CUBLASLT_CLUSTER_SHAPE_3x1x1", + "CUBLASLT_CLUSTER_SHAPE_2x8x1", + "CUBLASLT_CLUSTER_SHAPE_2x7x1", + "CUBLASLT_CLUSTER_SHAPE_2x6x1", + "CUBLASLT_CLUSTER_SHAPE_2x5x1", + "CUBLASLT_CLUSTER_SHAPE_2x4x1", + "CUBLASLT_CLUSTER_SHAPE_2x3x1", + "CUBLASLT_CLUSTER_SHAPE_2x2x1", + "CUBLASLT_CLUSTER_SHAPE_2x1x1", + "CUBLASLT_CLUSTER_SHAPE_1x9x1", + "CUBLASLT_CLUSTER_SHAPE_1x8x1", + "CUBLASLT_CLUSTER_SHAPE_1x7x1", + "CUBLASLT_CLUSTER_SHAPE_1x6x1", + "CUBLASLT_CLUSTER_SHAPE_1x5x1", + "CUBLASLT_CLUSTER_SHAPE_1x4x1", + "CUBLASLT_CLUSTER_SHAPE_1x3x1", + "CUBLASLT_CLUSTER_SHAPE_1x2x1", + "CUBLASLT_CLUSTER_SHAPE_1x1x1", + "CUBLASLT_CLUSTER_SHAPE_1x16x1", + "CUBLASLT_CLUSTER_SHAPE_1x15x1", + "CUBLASLT_CLUSTER_SHAPE_1x14x1", + "CUBLASLT_CLUSTER_SHAPE_1x13x1", + "CUBLASLT_CLUSTER_SHAPE_1x12x1", + "CUBLASLT_CLUSTER_SHAPE_1x11x1", + "CUBLASLT_CLUSTER_SHAPE_1x10x1", + "CUBLASLT_CLUSTER_SHAPE_16x1x1", + "CUBLASLT_CLUSTER_SHAPE_15x1x1", + "CUBLASLT_CLUSTER_SHAPE_14x1x1", + "CUBLASLT_CLUSTER_SHAPE_13x1x1", + "CUBLASLT_CLUSTER_SHAPE_12x1x1", + "CUBLASLT_CLUSTER_SHAPE_11x1x1", + "CUBLASLT_CLUSTER_SHAPE_10x1x1", + "CUBLASLT_ALGO_CONFIG_TILE_ID", + "CUBLASLT_ALGO_CONFIG_STAGES_ID", + "CUBLASLT_ALGO_CONFIG_SPLITK_NUM", + "CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME", + "CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID", + "CUBLASLT_ALGO_CONFIG_ID", + "CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION", + "CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING", + "CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID", + "CUBLASLT_ALGO_CAP_UPLO_SUPPORT", + "CUBLASLT_ALGO_CAP_TILE_IDS", + "CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT", + "CUBLASLT_ALGO_CAP_STAGES_IDS", + "CUBLASLT_ALGO_CAP_SPLITK_SUPPORT", + "CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK", + "CUBLASLT_ALGO_CAP_POINTER_MODE_MASK", + "CUBLASLT_ALGO_CAP_OUT_OF_PLACE_RESULT_SUPPORT", + "CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS", + "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES", + "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES", + "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_B_BYTES", + "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_A_BYTES", + "CUBLASLT_ALGO_CAP_LD_NEGATIVE", + "CUBLASLT_ALGO_CAP_EPILOGUE_MASK", + "CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX", + "CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER", + "CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT", + "CUBLASLT_ALGO_CAP_ATOMIC_SYNC" +); + sub warnRocOnlyUnsupportedFunctions { my $line_num = shift; my $k = 0; - foreach $func ( - "cutensorWriteKernelCacheToFile", - "cutensorTensorDescriptor", - "cutensorReduce", - "cutensorReadKernelCacheFromFile", - "cutensorPlanPreferenceSetAttribute", - "cutensorPlanPreferenceAttribute_t", - "cutensorPlanGetAttribute", - "cutensorPlanAttribute_t", - "cutensorPlan", - "cutensorPermute", - "cutensorOperationDescriptorSetAttribute", - "cutensorOperationDescriptorGetAttribute", - "cutensorOperationDescriptorAttribute_t", - "cutensorMgTensorDescriptor_t", - "cutensorMgTensorDescriptor_s", - "cutensorMgHostDevice_t", - "cutensorMgHandle_t", - "cutensorMgHandle_s", - "cutensorMgDestroyTensorDescriptor", - "cutensorMgDestroyCopyPlan", - "cutensorMgDestroyCopyDescriptor", - "cutensorMgDestroyContractionPlan", - "cutensorMgDestroyContractionFind", - "cutensorMgDestroyContractionDescriptor", - "cutensorMgDestroy", - "cutensorMgCreateTensorDescriptor", - "cutensorMgCreateCopyPlan", - "cutensorMgCreateCopyDescriptor", - "cutensorMgCreateContractionPlan", - "cutensorMgCreateContractionFind", - "cutensorMgCreateContractionDescriptor", - "cutensorMgCreate", - "cutensorMgCopyPlan_t", - "cutensorMgCopyPlan_s", - "cutensorMgCopyGetWorkspace", - "cutensorMgCopyDescriptor_t", - "cutensorMgCopyDescriptor_s", - "cutensorMgCopy", - "cutensorMgContractionPlan_t", - "cutensorMgContractionPlan_s", - "cutensorMgContractionGetWorkspace", - "cutensorMgContractionFind_t", - "cutensorMgContractionFind_s", - "cutensorMgContractionFindSetAttribute", - "cutensorMgContractionFindAttribute_t", - "cutensorMgContractionDescriptor_t", - "cutensorMgContractionDescriptor_s", - "cutensorMgContraction", - "cutensorMgAlgo_t", - "cutensorJitMode_t", - "cutensorHandleWritePlanCacheToFile", - "cutensorHandleResizePlanCache", - "cutensorHandleReadPlanCacheFromFile", - "cutensorHandle", - "cutensorGetVersion", - "cutensorEstimateWorkspaceSize", - "cutensorElementwiseTrinaryExecute", - "cutensorElementwiseBinaryExecute", - "cutensorDestroyTensorDescriptor", - "cutensorDestroyPlanPreference", - "cutensorDestroyPlan", - "cutensorDestroyOperationDescriptor", - "cutensorCreateTensorDescriptor", - "cutensorCreateReduction", - "cutensorCreatePlanPreference", - "cutensorCreatePlan", - "cutensorCreatePermutation", - "cutensorCreateElementwiseTrinary", - "cutensorCreateElementwiseBinary", - "cutensorCreateContraction", - "cutensorCacheMode_t", - "cutensorAutotuneMode_t", - "cusparseZhybsv_solve", - "cusparseZhybsv_analysis", - "cusparseZhyb2dense", - "cusparseZhyb2csr", - "cusparseZhyb2csc", - "cusparseZgtsv_nopivot", - "cusparseZgtsvStridedBatch", - "cusparseZgtsv", - "cusparseZgemmi", - "cusparseZgebsr2gebsr_bufferSizeExt", - "cusparseZgebsr2gebsc_bufferSizeExt", - "cusparseZdense2hyb", - "cusparseZcsru2csr_bufferSizeExt", - "cusparseZcsru2csr", - "cusparseZcsrsv_solve", - "cusparseZcsrsv_analysis", - "cusparseZcsrsm_solve", - "cusparseZcsrsm_analysis", - "cusparseZcsrmv_mp", - "cusparseZcsrilu0", - "cusparseZcsric0", - "cusparseZcsrgemm", - "cusparseZcsrgeam2_bufferSizeExt", - "cusparseZcsr2gebsr_bufferSizeExt", - "cusparseZcsr2csru", - "cusparseZcsr2csc", - "cusparseZcsc2hyb", - "cusparseZbsrsm2_bufferSizeExt", - "cusparseZbsrilu02_bufferSizeExt", - "cusparseZbsric02_bufferSizeExt", - "cusparseXgebsr2csr", - "cusparseXcsrgemmNnz", - "cusparseSpSV_updateMatrix", - "cusparseSpSV_solve", - "cusparseSpSV_destroyDescr", - "cusparseSpSV_createDescr", - "cusparseSpSV_analysis", - "cusparseSpSVUpdate_t", - "cusparseSpSVDescr_t", - "cusparseSpSVDescr", - "cusparseSpSM_updateMatrix", - "cusparseSpSM_destroyDescr", - "cusparseSpSM_createDescr", - "cusparseSpSM_bufferSize", - "cusparseSpSMUpdate_t", - "cusparseSpSMDescr_t", - "cusparseSpSMDescr", - "cusparseSpMatSetNumBatches", - "cusparseSpMatGetNumBatches", - "cusparseSpMV_preprocess", - "cusparseSpMMOp_destroyPlan", - "cusparseSpMMOp_createPlan", - "cusparseSpMMOpPlan_t", - "cusparseSpMMOpPlan", - "cusparseSpMMOpAlg_t", - "cusparseSpMMOp", - "cusparseSpGEMMreuse_workEstimation", - "cusparseSpGEMMreuse_nnz", - "cusparseSpGEMMreuse_copy", - "cusparseSpGEMMreuse_compute", - "cusparseSpGEMM_workEstimation", - "cusparseSpGEMM_getNumProducts", - "cusparseSpGEMM_estimateMemory", - "cusparseSpGEMM_destroyDescr", - "cusparseSpGEMM_createDescr", - "cusparseSpGEMM_copy", - "cusparseSpGEMM_compute", - "cusparseSpGEMMDescr_t", - "cusparseSpGEMMDescr", - "cusparseSolveAnalysisInfo_t", - "cusparseSolveAnalysisInfo", - "cusparseSideMode_t", - "cusparseShybsv_solve", - "cusparseShybsv_analysis", - "cusparseShyb2dense", - "cusparseShyb2csr", - "cusparseShyb2csc", - "cusparseSgtsv_nopivot", - "cusparseSgtsvStridedBatch", - "cusparseSgtsv", - "cusparseSgemmi", - "cusparseSgebsr2gebsr_bufferSizeExt", - "cusparseSgebsr2gebsc_bufferSizeExt", - "cusparseSdense2hyb", - "cusparseScsru2csr_bufferSizeExt", - "cusparseScsru2csr", - "cusparseScsrsv_solve", - "cusparseScsrsv_analysis", - "cusparseScsrsm_solve", - "cusparseScsrsm_analysis", - "cusparseScsrmv_mp", - "cusparseScsrilu0", - "cusparseScsric0", - "cusparseScsrgemm", - "cusparseScsrgeam2_bufferSizeExt", - "cusparseScsr2gebsr_bufferSizeExt", - "cusparseScsr2csru", - "cusparseScsr2csc", - "cusparseScsc2hyb", - "cusparseSbsrsm2_bufferSizeExt", - "cusparseSbsrilu02_bufferSizeExt", - "cusparseSbsric02_bufferSizeExt", - "cusparseLoggerSetMask", - "cusparseLoggerSetLevel", - "cusparseLoggerSetFile", - "cusparseLoggerSetCallback", - "cusparseLoggerOpenFile", - "cusparseLoggerForceDisable", - "cusparseLoggerCallback_t", - "cusparseHpruneDense2csr_bufferSizeExt", - "cusparseHpruneDense2csrNnzByPercentage", - "cusparseHpruneDense2csrNnz", - "cusparseHpruneDense2csrByPercentage_bufferSizeExt", - "cusparseHpruneDense2csrByPercentage", - "cusparseHpruneDense2csr", - "cusparseHpruneCsr2csr_bufferSizeExt", - "cusparseHpruneCsr2csrNnzByPercentage", - "cusparseHpruneCsr2csrNnz", - "cusparseHpruneCsr2csrByPercentage_bufferSizeExt", - "cusparseHpruneCsr2csrByPercentage", - "cusparseHpruneCsr2csr", - "cusparseGetLevelInfo", - "cusparseDhybsv_solve", - "cusparseDhybsv_analysis", - "cusparseDhyb2dense", - "cusparseDhyb2csr", - "cusparseDhyb2csc", - "cusparseDgtsv_nopivot", - "cusparseDgtsvStridedBatch", - "cusparseDgtsv", - "cusparseDgemmi", - "cusparseDgebsr2gebsr_bufferSizeExt", - "cusparseDgebsr2gebsc_bufferSizeExt", - "cusparseDestroySolveAnalysisInfo", - "cusparseDestroyCsru2csrInfo", - "cusparseDenseToSparse_convert", - "cusparseDdense2hyb", - "cusparseDcsru2csr_bufferSizeExt", - "cusparseDcsru2csr", - "cusparseDcsrsv_solve", - "cusparseDcsrsv_analysis", - "cusparseDcsrsm_solve", - "cusparseDcsrsm_analysis", - "cusparseDcsrmv_mp", - "cusparseDcsrilu0", - "cusparseDcsric0", - "cusparseDcsrgemm", - "cusparseDcsrgeam2_bufferSizeExt", - "cusparseDcsr2gebsr_bufferSizeExt", - "cusparseDcsr2csru", - "cusparseDcsr2csc", - "cusparseDcsc2hyb", - "cusparseDbsrsm2_bufferSizeExt", - "cusparseDbsrilu02_bufferSizeExt", - "cusparseDbsric02_bufferSizeExt", - "cusparseCsrsv_solveEx", - "cusparseCsrsv_analysisEx", - "cusparseCsrmvEx_bufferSize", - "cusparseCsrmvEx", - "cusparseCsrilu0Ex", - "cusparseCsr2cscEx2", - "cusparseCsr2cscEx", - "cusparseCsr2CscAlg_t", - "cusparseCreateSolveAnalysisInfo", - "cusparseCreateSlicedEll", - "cusparseCreateCsru2csrInfo", - "cusparseCreateConstSlicedEll", - "cusparseCreateConstBsr", - "cusparseCreateBsr", - "cusparseConstrainedGeMM_bufferSize", - "cusparseConstrainedGeMM", - "cusparseColorAlg_t", - "cusparseChybsv_solve", - "cusparseChybsv_analysis", - "cusparseChyb2dense", - "cusparseChyb2csr", - "cusparseChyb2csc", - "cusparseCgtsv_nopivot", - "cusparseCgtsvStridedBatch", - "cusparseCgtsv", - "cusparseCgemmi", - "cusparseCgebsr2gebsr_bufferSizeExt", - "cusparseCgebsr2gebsc_bufferSizeExt", - "cusparseCdense2hyb", - "cusparseCcsru2csr_bufferSizeExt", - "cusparseCcsru2csr", - "cusparseCcsrsv_solve", - "cusparseCcsrsv_analysis", - "cusparseCcsrsm_solve", - "cusparseCcsrsm_analysis", - "cusparseCcsrmv_mp", - "cusparseCcsrilu0", - "cusparseCcsric0", - "cusparseCcsrgemm", - "cusparseCcsrgeam2_bufferSizeExt", - "cusparseCcsr2gebsr_bufferSizeExt", - "cusparseCcsr2csru", - "cusparseCcsr2csc", - "cusparseCcsc2hyb", - "cusparseCbsrsm2_bufferSizeExt", - "cusparseCbsrilu02_bufferSizeExt", - "cusparseCbsric02_bufferSizeExt", - "cusparseBsrSetStridedBatch", - "cusparseAlgMode_t", - "curand_mtgp32_specific", - "curand_mtgp32_single_specific", - "curand_mtgp32_single", - "curand_Philox4x32_10", - "curandState_t", - "curandStateXORWOW_t", - "curandStateXORWOW", - "curandState", - "curandMethod_t", - "curandMethod", - "curandHistogramM2_t", - "curandHistogramM2_st", - "curandHistogramM2V_t", - "curandHistogramM2V_st", - "curandHistogramM2K_t", - "curandHistogramM2K_st", - "curandGetProperty", - "curandDistribution_t", - "curandDistribution_st", - "curandDistributionShift_t", - "curandDistributionShift_st", - "curandDistributionM2Shift_t", - "curandDistributionM2Shift_st", - "curandDirectionVectors64_t", - "curandDirectionVectors32_t", - "cublasZtrttp", - "cublasZtpttr", - "cublasZmatinvBatched", - "cublasZgetrsBatched", - "cublasZgetriBatched", - "cublasZgetrfBatched", - "cublasZgeqrfBatched", - "cublasZgemm3m_64", - "cublasZgemm3m", - "cublasZgelsBatched", - "cublasXerbla", - "cublasUint8gemmBias", - "cublasSwapEx_64", - "cublasSwapEx", - "cublasStrttp", - "cublasStpttr", - "cublasSmatinvBatched", - "cublasShutdown", - "cublasSgetrsBatched", - "cublasSgetriBatched", - "cublasSgetrfBatched", - "cublasSgeqrfBatched", - "cublasSgemmGroupedBatched_64", - "cublasSgemmGroupedBatched", - "cublasSgemmEx_64", - "cublasSgemmEx", - "cublasSgelsBatched", - "cublasSetVector_64", - "cublasSetVectorAsync_64", - "cublasSetSmCountTarget", - "cublasSetMatrix_64", - "cublasSetMatrixAsync_64", - "cublasSetLoggerCallback", - "cublasSetKernelStream", - "cublasRotmgEx", - "cublasRotmEx_64", - "cublasRotmEx", - "cublasRotgEx", - "cublasMigrateComputeType", - "cublasLtReductionScheme_t", - "cublasLtPointerMode_t", - "cublasLtPointerModeMask_t", - "cublasLtOrder_t", - "cublasLtNumericalImplFlags_t", - "cublasLtMatrixTransformDescSetAttribute", - "cublasLtMatrixTransformDescInit", - "cublasLtMatrixTransformDescGetAttribute", - "cublasLtMatrixTransformDescDestroy", - "cublasLtMatrixTransformDescCreate", - "cublasLtMatrixTransformDescAttributes_t", - "cublasLtMatrixTransform", - "cublasLtMatrixLayoutSetAttribute", - "cublasLtMatrixLayoutInit", - "cublasLtMatrixLayoutGetAttribute", - "cublasLtMatrixLayoutDestroy", - "cublasLtMatrixLayoutCreate", - "cublasLtMatrixLayoutAttribute_t", - "cublasLtMatmulTile_t", - "cublasLtMatmulStages_t", - "cublasLtMatmulSearch_t", - "cublasLtMatmulPreferenceSetAttribute", - "cublasLtMatmulPreferenceInit", - "cublasLtMatmulPreferenceGetAttribute", - "cublasLtMatmulPreferenceDestroy", - "cublasLtMatmulPreferenceCreate", - "cublasLtMatmulPreferenceAttributes_t", - "cublasLtMatmulInnerShape_t", - "cublasLtMatmulHeuristicResult_t", - "cublasLtMatmulDescSetAttribute", - "cublasLtMatmulDescInit", - "cublasLtMatmulDescGetAttribute", - "cublasLtMatmulDescDestroy", - "cublasLtMatmulDescCreate", - "cublasLtMatmulDescAttributes_t", - "cublasLtMatmulAlgoInit", - "cublasLtMatmulAlgoGetIds", - "cublasLtMatmulAlgoGetHeuristic", - "cublasLtMatmulAlgoConfigSetAttribute", - "cublasLtMatmulAlgoConfigGetAttribute", - "cublasLtMatmulAlgoConfigAttributes_t", - "cublasLtMatmulAlgoCheck", - "cublasLtMatmulAlgoCapGetAttribute", - "cublasLtMatmulAlgoCapAttributes_t", - "cublasLtMatmul", - "cublasLtLoggerSetMask", - "cublasLtLoggerSetLevel", - "cublasLtLoggerSetFile", - "cublasLtLoggerSetCallback", - "cublasLtLoggerOpenFile", - "cublasLtLoggerForceDisable", - "cublasLtLoggerCallback_t", - "cublasLtHeuristicsCacheSetCapacity", - "cublasLtHeuristicsCacheGetCapacity", - "cublasLtGetVersion", - "cublasLtGetStatusString", - "cublasLtGetStatusName", - "cublasLtGetProperty", - "cublasLtGetCudartVersion", - "cublasLtEpilogue_t", - "cublasLtDisableCpuInstructionsSetMask", - "cublasLtContext", - "cublasLtClusterShape_t", - "cublasLoggerConfigure", - "cublasLogCallback", - "cublasIaminEx_64", - "cublasIaminEx", - "cublasIamaxEx_64", - "cublasIamaxEx", - "cublasGetVersion_v2", - "cublasGetVersion", - "cublasGetVector_64", - "cublasGetVectorAsync_64", - "cublasGetStatusName", - "cublasGetSmCountTarget", - "cublasGetProperty", - "cublasGetMatrix_64", - "cublasGetMatrixAsync_64", - "cublasGetLoggerCallback", - "cublasGetError", - "cublasGetCudartVersion", - "cublasGemmStridedBatchedEx_64", - "cublasGemmGroupedBatchedEx_64", - "cublasGemmGroupedBatchedEx", - "cublasGemmEx_64", - "cublasGemmBatchedEx_64", - "cublasFree", - "cublasDtrttp", - "cublasDtpttr", - "cublasDmatinvBatched", - "cublasDgetrsBatched", - "cublasDgetriBatched", - "cublasDgetrfBatched", - "cublasDgeqrfBatched", - "cublasDgemmGroupedBatched_64", - "cublasDgemmGroupedBatched", - "cublasDgelsBatched", - "cublasCtrttp", - "cublasCtpttr", - "cublasCsyrkEx_64", - "cublasCsyrkEx", - "cublasCsyrk3mEx_64", - "cublasCsyrk3mEx", - "cublasCopyEx_64", - "cublasCopyEx", - "cublasCmatinvBatched", - "cublasCherkEx_64", - "cublasCherkEx", - "cublasCherk3mEx_64", - "cublasCherk3mEx", - "cublasCgetrsBatched", - "cublasCgetriBatched", - "cublasCgetrfBatched", - "cublasCgeqrfBatched", - "cublasCgemmEx_64", - "cublasCgemmEx", - "cublasCgemm3m_64", - "cublasCgemm3mStridedBatched_64", - "cublasCgemm3mStridedBatched", - "cublasCgemm3mEx_64", - "cublasCgemm3mEx", - "cublasCgemm3mBatched_64", - "cublasCgemm3mBatched", - "cublasCgemm3m", - "cublasCgelsBatched", - "cublasAsumEx_64", - "cublasAsumEx", - "cublasAlloc", - "csru2csrInfo_t", - "csru2csrInfo", - "__curand_umul", - "CUTENSOR_WORKSPACE_DEFAULT", - "CUTENSOR_STATUS_MAPPING_ERROR", - "CUTENSOR_STATUS_LICENSE_ERROR", - "CUTENSOR_STATUS_CUDA_ERROR", - "CUTENSOR_STATUS_CUBLAS_ERROR", - "CUTENSOR_R_MIN_TF32", - "CUTENSOR_R_MIN_8U", - "CUTENSOR_R_MIN_8I", - "CUTENSOR_R_MIN_64F", - "CUTENSOR_R_MIN_32U", - "CUTENSOR_R_MIN_32I", - "CUTENSOR_R_MIN_32F", - "CUTENSOR_R_MIN_16F", - "CUTENSOR_R_MIN_16BF", - "CUTENSOR_R_64U", - "CUTENSOR_R_64I", - "CUTENSOR_R_4U", - "CUTENSOR_R_4I", - "CUTENSOR_R_16U", - "CUTENSOR_R_16I", - "CUTENSOR_PLAN_REQUIRED_WORKSPACE", - "CUTENSOR_PLAN_PREFERENCE_KERNEL_RANK", - "CUTENSOR_PLAN_PREFERENCE_JIT", - "CUTENSOR_PLAN_PREFERENCE_INCREMENTAL_COUNT", - "CUTENSOR_PLAN_PREFERENCE_CACHE_MODE", - "CUTENSOR_PLAN_PREFERENCE_AUTOTUNE_MODE", - "CUTENSOR_PLAN_PREFERENCE_ALGO", - "CUTENSOR_OP_TANH", - "CUTENSOR_OP_TAN", - "CUTENSOR_OP_SWISH", - "CUTENSOR_OP_SOFT_SIGN", - "CUTENSOR_OP_SOFT_PLUS", - "CUTENSOR_OP_SINH", - "CUTENSOR_OP_SIN", - "CUTENSOR_OP_SIGMOID", - "CUTENSOR_OP_RELU", - "CUTENSOR_OP_RCP", - "CUTENSOR_OP_NEG", - "CUTENSOR_OP_MISH", - "CUTENSOR_OP_LOG", - "CUTENSOR_OP_FLOOR", - "CUTENSOR_OP_EXP", - "CUTENSOR_OP_COSH", - "CUTENSOR_OP_COS", - "CUTENSOR_OP_CONJ", - "CUTENSOR_OP_CEIL", - "CUTENSOR_OP_ATANH", - "CUTENSOR_OP_ATAN", - "CUTENSOR_OP_ASINH", - "CUTENSOR_OP_ASIN", - "CUTENSOR_OP_ACOSH", - "CUTENSOR_OP_ACOS", - "CUTENSOR_OP_ABS", - "CUTENSOR_OPERATION_DESCRIPTOR_TAG", - "CUTENSOR_OPERATION_DESCRIPTOR_SCALAR_TYPE", - "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_VALUE", - "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_RIGHT", - "CUTENSOR_OPERATION_DESCRIPTOR_PADDING_LEFT", - "CUTENSOR_OPERATION_DESCRIPTOR_MOVED_BYTES", - "CUTENSOR_OPERATION_DESCRIPTOR_FLOPS", - "CUTENSOR_MG_DEVICE_HOST_PINNED", - "CUTENSOR_MG_DEVICE_HOST", - "CUTENSOR_JIT_MODE_NONE", - "CUTENSOR_JIT_MODE_DEFAULT", - "CUTENSOR_C_MIN_TF32", - "CUTENSOR_C_MIN_64F", - "CUTENSOR_C_MIN_32F", - "CUTENSOR_C_MIN_16F", - "CUTENSOR_C_8U", - "CUTENSOR_C_8I", - "CUTENSOR_C_64U", - "CUTENSOR_C_64I", - "CUTENSOR_C_4U", - "CUTENSOR_C_4I", - "CUTENSOR_C_32U", - "CUTENSOR_C_32I", - "CUTENSOR_C_16U", - "CUTENSOR_C_16I", - "CUTENSOR_C_16F", - "CUTENSOR_C_16BF", - "CUTENSOR_COMPUTE_TF32", - "CUTENSOR_COMPUTE_3XTF32", - "CUTENSOR_CACHE_MODE_PEDANTIC", - "CUTENSOR_CACHE_MODE_NONE", - "CUTENSOR_AUTOTUNE_MODE_NONE", - "CUTENSOR_AUTOTUNE_MODE_INCREMENTAL", - "CUTENSOR_ALGO_TTGT", - "CUTENSOR_ALGO_TGETT", - "CUTENSOR_ALGO_GETT", - "CUTENSORMG_CONTRACTION_FIND_ATTRIBUTE_MAX", - "CUTENSORMG_ALGO_DEFAULT", - "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", - "CUSPARSE_STATUS_MAPPING_ERROR", - "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES", - "CUSPARSE_STATUS_EXECUTION_FAILED", - "CUSPARSE_SPSV_UPDATE_GENERAL", - "CUSPARSE_SPSV_UPDATE_DIAGONAL", - "CUSPARSE_SPSM_UPDATE_GENERAL", - "CUSPARSE_SPSM_UPDATE_DIAGONAL", - "CUSPARSE_SPMM_OP_ALG_DEFAULT", - "CUSPARSE_SPMM_COO_ALG4", - "CUSPARSE_SPMM_BSR_ALG1", - "CUSPARSE_SPMMA_PREPROCESS", - "CUSPARSE_SPMMA_ALG4", - "CUSPARSE_SPMMA_ALG3", - "CUSPARSE_SPMMA_ALG2", - "CUSPARSE_SPMMA_ALG1", - "CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC", - "CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC", - "CUSPARSE_SPGEMM_ALG3", - "CUSPARSE_SPGEMM_ALG2", - "CUSPARSE_SPGEMM_ALG1", - "CUSPARSE_SIDE_RIGHT", - "CUSPARSE_SIDE_LEFT", - "CUSPARSE_MV_ALG_DEFAULT", - "CUSPARSE_MM_ALG_DEFAULT", - "CUSPARSE_CSRMV_ALG2", - "CUSPARSE_CSRMV_ALG1", - "CUSPARSE_CSRMM_ALG1", - "CUSPARSE_CSR2CSC_ALG_DEFAULT", - "CUSPARSE_CSR2CSC_ALG2", - "CUSPARSE_CSR2CSC_ALG1", - "CUSPARSE_COOMV_ALG", - "CUSPARSE_COOMM_ALG3", - "CUSPARSE_COOMM_ALG2", - "CUSPARSE_COOMM_ALG1", - "CUSPARSE_COLOR_ALG1", - "CUSPARSE_COLOR_ALG0", - "CUSPARSE_ALG_NAIVE", - "CUSPARSE_ALG_MERGE_PATH", - "CUSPARSE_ALG1", - "CUSPARSE_ALG0", - "CURAND_STATUS_PREEXISTING_FAILURE", - "CURAND_STATUS_INITIALIZATION_FAILED", - "CURAND_STATUS_ARCH_MISMATCH", - "CURAND_RNG_TEST", - "CURAND_REJECTION", - "CURAND_POISSON", - "CURAND_M2", - "CURAND_M1", - "CURAND_KNUTH", - "CURAND_ITR", - "CURAND_HITR", - "CURAND_FAST_REJECTION", - "CURAND_DISCRETE_GAUSS", - "CURAND_DEVICE_API", - "CURAND_DEFINITION", - "CURAND_CHOOSE_BEST", - "CURAND_BINARY_SEARCH", - "CURAND_3RD", - "CUDA_R_8F_E5M2", - "CUDA_R_8F_E4M3", - "CUDA_R_64U", - "CUDA_R_64I", - "CUDA_R_4U", - "CUDA_R_4I", - "CUDA_R_16U", - "CUDA_R_16I", - "CUDA_C_64U", - "CUDA_C_64I", - "CUDA_C_4U", - "CUDA_C_4I", - "CUDA_C_16U", - "CUDA_C_16I", - "CUBLAS_TF32_TENSOR_OP_MATH", - "CUBLAS_TENSOR_OP_MATH", - "CUBLAS_STATUS_LICENSE_ERROR", - "CUBLAS_PEDANTIC_MATH", - "CUBLAS_OP_CONJG", - "CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION", - "CUBLAS_GEMM_DFALT_TENSOR_OP", - "CUBLAS_GEMM_DEFAULT_TENSOR_OP", - "CUBLAS_GEMM_ALGO9_TENSOR_OP", - "CUBLAS_GEMM_ALGO9", - "CUBLAS_GEMM_ALGO8_TENSOR_OP", - "CUBLAS_GEMM_ALGO8", - "CUBLAS_GEMM_ALGO7_TENSOR_OP", - "CUBLAS_GEMM_ALGO7", - "CUBLAS_GEMM_ALGO6_TENSOR_OP", - "CUBLAS_GEMM_ALGO6", - "CUBLAS_GEMM_ALGO5_TENSOR_OP", - "CUBLAS_GEMM_ALGO5", - "CUBLAS_GEMM_ALGO4_TENSOR_OP", - "CUBLAS_GEMM_ALGO4", - "CUBLAS_GEMM_ALGO3_TENSOR_OP", - "CUBLAS_GEMM_ALGO3", - "CUBLAS_GEMM_ALGO2_TENSOR_OP", - "CUBLAS_GEMM_ALGO23", - "CUBLAS_GEMM_ALGO22", - "CUBLAS_GEMM_ALGO21", - "CUBLAS_GEMM_ALGO20", - "CUBLAS_GEMM_ALGO2", - "CUBLAS_GEMM_ALGO1_TENSOR_OP", - "CUBLAS_GEMM_ALGO19", - "CUBLAS_GEMM_ALGO18", - "CUBLAS_GEMM_ALGO17", - "CUBLAS_GEMM_ALGO16", - "CUBLAS_GEMM_ALGO15_TENSOR_OP", - "CUBLAS_GEMM_ALGO15", - "CUBLAS_GEMM_ALGO14_TENSOR_OP", - "CUBLAS_GEMM_ALGO14", - "CUBLAS_GEMM_ALGO13_TENSOR_OP", - "CUBLAS_GEMM_ALGO13", - "CUBLAS_GEMM_ALGO12_TENSOR_OP", - "CUBLAS_GEMM_ALGO12", - "CUBLAS_GEMM_ALGO11_TENSOR_OP", - "CUBLAS_GEMM_ALGO11", - "CUBLAS_GEMM_ALGO10_TENSOR_OP", - "CUBLAS_GEMM_ALGO10", - "CUBLAS_GEMM_ALGO1", - "CUBLAS_GEMM_ALGO0_TENSOR_OP", - "CUBLAS_GEMM_ALGO0", - "CUBLAS_COMPUTE_64F_PEDANTIC", - "CUBLAS_COMPUTE_64F", - "CUBLAS_COMPUTE_32I_PEDANTIC", - "CUBLAS_COMPUTE_32I", - "CUBLAS_COMPUTE_32F_PEDANTIC", - "CUBLAS_COMPUTE_32F_FAST_TF32", - "CUBLAS_COMPUTE_32F_FAST_16F", - "CUBLAS_COMPUTE_32F_FAST_16BF", - "CUBLAS_COMPUTE_16F_PEDANTIC", - "CUBLAS_COMPUTE_16F", - "CUBLASLT_SEARCH_RESERVED_09", - "CUBLASLT_SEARCH_RESERVED_08", - "CUBLASLT_SEARCH_RESERVED_07", - "CUBLASLT_SEARCH_RESERVED_06", - "CUBLASLT_SEARCH_RESERVED_05", - "CUBLASLT_SEARCH_RESERVED_04", - "CUBLASLT_SEARCH_RESERVED_03", - "CUBLASLT_SEARCH_RESERVED_02", - "CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID", - "CUBLASLT_SEARCH_BEST_FIT", - "CUBLASLT_REDUCTION_SCHEME_OUTPUT_TYPE", - "CUBLASLT_REDUCTION_SCHEME_NONE", - "CUBLASLT_REDUCTION_SCHEME_MASK", - "CUBLASLT_REDUCTION_SCHEME_INPLACE", - "CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE", - "CUBLASLT_POINTER_MODE_MASK_HOST", - "CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR", - "CUBLASLT_POINTER_MODE_MASK_DEVICE", - "CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO", - "CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_HOST", - "CUBLASLT_POINTER_MODE_HOST", - "CUBLASLT_POINTER_MODE_DEVICE_VECTOR", - "CUBLASLT_POINTER_MODE_DEVICE", - "CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO", - "CUBLASLT_ORDER_ROW", - "CUBLASLT_ORDER_COL4_4R2_8C", - "CUBLASLT_ORDER_COL32_2R_4R4", - "CUBLASLT_ORDER_COL32", - "CUBLASLT_ORDER_COL", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_TENSOR_OP_MASK", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_TYPE_MASK", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_OP_INPUT_TYPE_MASK", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_TF32", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8I", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E5M2", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_8F_E4M3", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_64F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_32F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_INPUT_16BF", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_IMMA", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_GAUSSIAN", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_FMA", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_DMMA", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_TYPE_MASK", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_64F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32I", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_32F", - "CUBLASLT_NUMERICAL_IMPL_FLAGS_ACCUMULATOR_16F", - "CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSB", - "CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSA", - "CUBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE", - "CUBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE", - "CUBLASLT_MATRIX_LAYOUT_TYPE", - "CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET", - "CUBLASLT_MATRIX_LAYOUT_ROWS", - "CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET", - "CUBLASLT_MATRIX_LAYOUT_ORDER", - "CUBLASLT_MATRIX_LAYOUT_LD", - "CUBLASLT_MATRIX_LAYOUT_COLS", - "CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT", - "CUBLASLT_MATMUL_TILE_UNDEFINED", - "CUBLASLT_MATMUL_TILE_END", - "CUBLASLT_MATMUL_TILE_96x64", - "CUBLASLT_MATMUL_TILE_96x512", - "CUBLASLT_MATMUL_TILE_96x448", - "CUBLASLT_MATMUL_TILE_96x384", - "CUBLASLT_MATMUL_TILE_96x320", - "CUBLASLT_MATMUL_TILE_96x256", - "CUBLASLT_MATMUL_TILE_96x192", - "CUBLASLT_MATMUL_TILE_96x128", - "CUBLASLT_MATMUL_TILE_8x8", - "CUBLASLT_MATMUL_TILE_8x768", - "CUBLASLT_MATMUL_TILE_8x704", - "CUBLASLT_MATMUL_TILE_8x640", - "CUBLASLT_MATMUL_TILE_8x64", - "CUBLASLT_MATMUL_TILE_8x576", - "CUBLASLT_MATMUL_TILE_8x512", - "CUBLASLT_MATMUL_TILE_8x448", - "CUBLASLT_MATMUL_TILE_8x384", - "CUBLASLT_MATMUL_TILE_8x320", - "CUBLASLT_MATMUL_TILE_8x32", - "CUBLASLT_MATMUL_TILE_8x256", - "CUBLASLT_MATMUL_TILE_8x192", - "CUBLASLT_MATMUL_TILE_8x16", - "CUBLASLT_MATMUL_TILE_8x128", - "CUBLASLT_MATMUL_TILE_88x64", - "CUBLASLT_MATMUL_TILE_88x512", - "CUBLASLT_MATMUL_TILE_88x448", - "CUBLASLT_MATMUL_TILE_88x384", - "CUBLASLT_MATMUL_TILE_88x320", - "CUBLASLT_MATMUL_TILE_88x256", - "CUBLASLT_MATMUL_TILE_88x192", - "CUBLASLT_MATMUL_TILE_88x128", - "CUBLASLT_MATMUL_TILE_80x64", - "CUBLASLT_MATMUL_TILE_80x576", - "CUBLASLT_MATMUL_TILE_80x512", - "CUBLASLT_MATMUL_TILE_80x448", - "CUBLASLT_MATMUL_TILE_80x384", - "CUBLASLT_MATMUL_TILE_80x320", - "CUBLASLT_MATMUL_TILE_80x256", - "CUBLASLT_MATMUL_TILE_80x192", - "CUBLASLT_MATMUL_TILE_80x128", - "CUBLASLT_MATMUL_TILE_768x80", - "CUBLASLT_MATMUL_TILE_768x8", - "CUBLASLT_MATMUL_TILE_768x72", - "CUBLASLT_MATMUL_TILE_768x64", - "CUBLASLT_MATMUL_TILE_768x56", - "CUBLASLT_MATMUL_TILE_768x48", - "CUBLASLT_MATMUL_TILE_768x40", - "CUBLASLT_MATMUL_TILE_768x32", - "CUBLASLT_MATMUL_TILE_768x24", - "CUBLASLT_MATMUL_TILE_768x16", - "CUBLASLT_MATMUL_TILE_760x64", - "CUBLASLT_MATMUL_TILE_752x64", - "CUBLASLT_MATMUL_TILE_744x64", - "CUBLASLT_MATMUL_TILE_736x64", - "CUBLASLT_MATMUL_TILE_72x640", - "CUBLASLT_MATMUL_TILE_72x64", - "CUBLASLT_MATMUL_TILE_72x576", - "CUBLASLT_MATMUL_TILE_72x512", - "CUBLASLT_MATMUL_TILE_72x448", - "CUBLASLT_MATMUL_TILE_72x384", - "CUBLASLT_MATMUL_TILE_72x320", - "CUBLASLT_MATMUL_TILE_72x256", - "CUBLASLT_MATMUL_TILE_72x192", - "CUBLASLT_MATMUL_TILE_72x128", - "CUBLASLT_MATMUL_TILE_728x64", - "CUBLASLT_MATMUL_TILE_720x64", - "CUBLASLT_MATMUL_TILE_712x64", - "CUBLASLT_MATMUL_TILE_704x88", - "CUBLASLT_MATMUL_TILE_704x80", - "CUBLASLT_MATMUL_TILE_704x8", - "CUBLASLT_MATMUL_TILE_704x72", - "CUBLASLT_MATMUL_TILE_704x64", - "CUBLASLT_MATMUL_TILE_704x56", - "CUBLASLT_MATMUL_TILE_704x48", - "CUBLASLT_MATMUL_TILE_704x40", - "CUBLASLT_MATMUL_TILE_704x32", - "CUBLASLT_MATMUL_TILE_704x24", - "CUBLASLT_MATMUL_TILE_704x16", - "CUBLASLT_MATMUL_TILE_696x64", - "CUBLASLT_MATMUL_TILE_688x64", - "CUBLASLT_MATMUL_TILE_680x64", - "CUBLASLT_MATMUL_TILE_672x64", - "CUBLASLT_MATMUL_TILE_664x64", - "CUBLASLT_MATMUL_TILE_656x64", - "CUBLASLT_MATMUL_TILE_64x96", - "CUBLASLT_MATMUL_TILE_64x88", - "CUBLASLT_MATMUL_TILE_64x80", - "CUBLASLT_MATMUL_TILE_64x8", - "CUBLASLT_MATMUL_TILE_64x768", - "CUBLASLT_MATMUL_TILE_64x760", - "CUBLASLT_MATMUL_TILE_64x752", - "CUBLASLT_MATMUL_TILE_64x744", - "CUBLASLT_MATMUL_TILE_64x736", - "CUBLASLT_MATMUL_TILE_64x728", - "CUBLASLT_MATMUL_TILE_64x720", - "CUBLASLT_MATMUL_TILE_64x72", - "CUBLASLT_MATMUL_TILE_64x712", - "CUBLASLT_MATMUL_TILE_64x704", - "CUBLASLT_MATMUL_TILE_64x696", - "CUBLASLT_MATMUL_TILE_64x688", - "CUBLASLT_MATMUL_TILE_64x680", - "CUBLASLT_MATMUL_TILE_64x672", - "CUBLASLT_MATMUL_TILE_64x664", - "CUBLASLT_MATMUL_TILE_64x656", - "CUBLASLT_MATMUL_TILE_64x648", - "CUBLASLT_MATMUL_TILE_64x640", - "CUBLASLT_MATMUL_TILE_64x64", - "CUBLASLT_MATMUL_TILE_64x632", - "CUBLASLT_MATMUL_TILE_64x624", - "CUBLASLT_MATMUL_TILE_64x616", - "CUBLASLT_MATMUL_TILE_64x608", - "CUBLASLT_MATMUL_TILE_64x600", - "CUBLASLT_MATMUL_TILE_64x592", - "CUBLASLT_MATMUL_TILE_64x584", - "CUBLASLT_MATMUL_TILE_64x576", - "CUBLASLT_MATMUL_TILE_64x568", - "CUBLASLT_MATMUL_TILE_64x560", - "CUBLASLT_MATMUL_TILE_64x56", - "CUBLASLT_MATMUL_TILE_64x552", - "CUBLASLT_MATMUL_TILE_64x544", - "CUBLASLT_MATMUL_TILE_64x536", - "CUBLASLT_MATMUL_TILE_64x528", - "CUBLASLT_MATMUL_TILE_64x520", - "CUBLASLT_MATMUL_TILE_64x512", - "CUBLASLT_MATMUL_TILE_64x504", - "CUBLASLT_MATMUL_TILE_64x496", - "CUBLASLT_MATMUL_TILE_64x488", - "CUBLASLT_MATMUL_TILE_64x480", - "CUBLASLT_MATMUL_TILE_64x48", - "CUBLASLT_MATMUL_TILE_64x472", - "CUBLASLT_MATMUL_TILE_64x464", - "CUBLASLT_MATMUL_TILE_64x456", - "CUBLASLT_MATMUL_TILE_64x448", - "CUBLASLT_MATMUL_TILE_64x440", - "CUBLASLT_MATMUL_TILE_64x432", - "CUBLASLT_MATMUL_TILE_64x424", - "CUBLASLT_MATMUL_TILE_64x416", - "CUBLASLT_MATMUL_TILE_64x408", - "CUBLASLT_MATMUL_TILE_64x400", - "CUBLASLT_MATMUL_TILE_64x40", - "CUBLASLT_MATMUL_TILE_64x392", - "CUBLASLT_MATMUL_TILE_64x384", - "CUBLASLT_MATMUL_TILE_64x376", - "CUBLASLT_MATMUL_TILE_64x368", - "CUBLASLT_MATMUL_TILE_64x360", - "CUBLASLT_MATMUL_TILE_64x352", - "CUBLASLT_MATMUL_TILE_64x344", - "CUBLASLT_MATMUL_TILE_64x336", - "CUBLASLT_MATMUL_TILE_64x328", - "CUBLASLT_MATMUL_TILE_64x320", - "CUBLASLT_MATMUL_TILE_64x32", - "CUBLASLT_MATMUL_TILE_64x312", - "CUBLASLT_MATMUL_TILE_64x304", - "CUBLASLT_MATMUL_TILE_64x296", - "CUBLASLT_MATMUL_TILE_64x288", - "CUBLASLT_MATMUL_TILE_64x280", - "CUBLASLT_MATMUL_TILE_64x272", - "CUBLASLT_MATMUL_TILE_64x264", - "CUBLASLT_MATMUL_TILE_64x256", - "CUBLASLT_MATMUL_TILE_64x248", - "CUBLASLT_MATMUL_TILE_64x240", - "CUBLASLT_MATMUL_TILE_64x24", - "CUBLASLT_MATMUL_TILE_64x232", - "CUBLASLT_MATMUL_TILE_64x224", - "CUBLASLT_MATMUL_TILE_64x216", - "CUBLASLT_MATMUL_TILE_64x208", - "CUBLASLT_MATMUL_TILE_64x200", - "CUBLASLT_MATMUL_TILE_64x192", - "CUBLASLT_MATMUL_TILE_64x184", - "CUBLASLT_MATMUL_TILE_64x176", - "CUBLASLT_MATMUL_TILE_64x168", - "CUBLASLT_MATMUL_TILE_64x160", - "CUBLASLT_MATMUL_TILE_64x16", - "CUBLASLT_MATMUL_TILE_64x152", - "CUBLASLT_MATMUL_TILE_64x144", - "CUBLASLT_MATMUL_TILE_64x136", - "CUBLASLT_MATMUL_TILE_64x128", - "CUBLASLT_MATMUL_TILE_64x120", - "CUBLASLT_MATMUL_TILE_64x112", - "CUBLASLT_MATMUL_TILE_64x104", - "CUBLASLT_MATMUL_TILE_648x64", - "CUBLASLT_MATMUL_TILE_640x96", - "CUBLASLT_MATMUL_TILE_640x88", - "CUBLASLT_MATMUL_TILE_640x80", - "CUBLASLT_MATMUL_TILE_640x8", - "CUBLASLT_MATMUL_TILE_640x72", - "CUBLASLT_MATMUL_TILE_640x64", - "CUBLASLT_MATMUL_TILE_640x56", - "CUBLASLT_MATMUL_TILE_640x48", - "CUBLASLT_MATMUL_TILE_640x40", - "CUBLASLT_MATMUL_TILE_640x32", - "CUBLASLT_MATMUL_TILE_640x24", - "CUBLASLT_MATMUL_TILE_640x16", - "CUBLASLT_MATMUL_TILE_632x64", - "CUBLASLT_MATMUL_TILE_624x64", - "CUBLASLT_MATMUL_TILE_616x64", - "CUBLASLT_MATMUL_TILE_608x64", - "CUBLASLT_MATMUL_TILE_600x64", - "CUBLASLT_MATMUL_TILE_592x64", - "CUBLASLT_MATMUL_TILE_584x64", - "CUBLASLT_MATMUL_TILE_576x96", - "CUBLASLT_MATMUL_TILE_576x88", - "CUBLASLT_MATMUL_TILE_576x80", - "CUBLASLT_MATMUL_TILE_576x8", - "CUBLASLT_MATMUL_TILE_576x72", - "CUBLASLT_MATMUL_TILE_576x64", - "CUBLASLT_MATMUL_TILE_576x56", - "CUBLASLT_MATMUL_TILE_576x48", - "CUBLASLT_MATMUL_TILE_576x40", - "CUBLASLT_MATMUL_TILE_576x32", - "CUBLASLT_MATMUL_TILE_576x24", - "CUBLASLT_MATMUL_TILE_576x16", - "CUBLASLT_MATMUL_TILE_576x112", - "CUBLASLT_MATMUL_TILE_576x104", - "CUBLASLT_MATMUL_TILE_56x768", - "CUBLASLT_MATMUL_TILE_56x704", - "CUBLASLT_MATMUL_TILE_56x640", - "CUBLASLT_MATMUL_TILE_56x576", - "CUBLASLT_MATMUL_TILE_56x512", - "CUBLASLT_MATMUL_TILE_56x448", - "CUBLASLT_MATMUL_TILE_56x384", - "CUBLASLT_MATMUL_TILE_56x320", - "CUBLASLT_MATMUL_TILE_56x256", - "CUBLASLT_MATMUL_TILE_56x192", - "CUBLASLT_MATMUL_TILE_56x128", - "CUBLASLT_MATMUL_TILE_568x64", - "CUBLASLT_MATMUL_TILE_560x64", - "CUBLASLT_MATMUL_TILE_552x64", - "CUBLASLT_MATMUL_TILE_544x64", - "CUBLASLT_MATMUL_TILE_536x64", - "CUBLASLT_MATMUL_TILE_528x64", - "CUBLASLT_MATMUL_TILE_520x64", - "CUBLASLT_MATMUL_TILE_512x96", - "CUBLASLT_MATMUL_TILE_512x88", - "CUBLASLT_MATMUL_TILE_512x80", - "CUBLASLT_MATMUL_TILE_512x8", - "CUBLASLT_MATMUL_TILE_512x72", - "CUBLASLT_MATMUL_TILE_512x64", - "CUBLASLT_MATMUL_TILE_512x56", - "CUBLASLT_MATMUL_TILE_512x48", - "CUBLASLT_MATMUL_TILE_512x40", - "CUBLASLT_MATMUL_TILE_512x32", - "CUBLASLT_MATMUL_TILE_512x24", - "CUBLASLT_MATMUL_TILE_512x16", - "CUBLASLT_MATMUL_TILE_512x128", - "CUBLASLT_MATMUL_TILE_512x120", - "CUBLASLT_MATMUL_TILE_512x112", - "CUBLASLT_MATMUL_TILE_512x104", - "CUBLASLT_MATMUL_TILE_504x64", - "CUBLASLT_MATMUL_TILE_496x64", - "CUBLASLT_MATMUL_TILE_48x768", - "CUBLASLT_MATMUL_TILE_48x704", - "CUBLASLT_MATMUL_TILE_48x640", - "CUBLASLT_MATMUL_TILE_48x64", - "CUBLASLT_MATMUL_TILE_48x576", - "CUBLASLT_MATMUL_TILE_48x512", - "CUBLASLT_MATMUL_TILE_48x448", - "CUBLASLT_MATMUL_TILE_48x384", - "CUBLASLT_MATMUL_TILE_48x320", - "CUBLASLT_MATMUL_TILE_48x256", - "CUBLASLT_MATMUL_TILE_48x192", - "CUBLASLT_MATMUL_TILE_48x128", - "CUBLASLT_MATMUL_TILE_488x64", - "CUBLASLT_MATMUL_TILE_480x64", - "CUBLASLT_MATMUL_TILE_472x64", - "CUBLASLT_MATMUL_TILE_464x64", - "CUBLASLT_MATMUL_TILE_456x64", - "CUBLASLT_MATMUL_TILE_448x96", - "CUBLASLT_MATMUL_TILE_448x88", - "CUBLASLT_MATMUL_TILE_448x80", - "CUBLASLT_MATMUL_TILE_448x8", - "CUBLASLT_MATMUL_TILE_448x72", - "CUBLASLT_MATMUL_TILE_448x64", - "CUBLASLT_MATMUL_TILE_448x56", - "CUBLASLT_MATMUL_TILE_448x48", - "CUBLASLT_MATMUL_TILE_448x40", - "CUBLASLT_MATMUL_TILE_448x32", - "CUBLASLT_MATMUL_TILE_448x24", - "CUBLASLT_MATMUL_TILE_448x16", - "CUBLASLT_MATMUL_TILE_448x144", - "CUBLASLT_MATMUL_TILE_448x136", - "CUBLASLT_MATMUL_TILE_448x128", - "CUBLASLT_MATMUL_TILE_448x120", - "CUBLASLT_MATMUL_TILE_448x112", - "CUBLASLT_MATMUL_TILE_448x104", - "CUBLASLT_MATMUL_TILE_440x64", - "CUBLASLT_MATMUL_TILE_432x64", - "CUBLASLT_MATMUL_TILE_424x64", - "CUBLASLT_MATMUL_TILE_416x64", - "CUBLASLT_MATMUL_TILE_40x768", - "CUBLASLT_MATMUL_TILE_40x704", - "CUBLASLT_MATMUL_TILE_40x640", - "CUBLASLT_MATMUL_TILE_40x64", - "CUBLASLT_MATMUL_TILE_40x576", - "CUBLASLT_MATMUL_TILE_40x512", - "CUBLASLT_MATMUL_TILE_40x448", - "CUBLASLT_MATMUL_TILE_40x384", - "CUBLASLT_MATMUL_TILE_40x320", - "CUBLASLT_MATMUL_TILE_40x256", - "CUBLASLT_MATMUL_TILE_40x192", - "CUBLASLT_MATMUL_TILE_40x128", - "CUBLASLT_MATMUL_TILE_408x64", - "CUBLASLT_MATMUL_TILE_400x64", - "CUBLASLT_MATMUL_TILE_392x64", - "CUBLASLT_MATMUL_TILE_384x96", - "CUBLASLT_MATMUL_TILE_384x88", - "CUBLASLT_MATMUL_TILE_384x80", - "CUBLASLT_MATMUL_TILE_384x8", - "CUBLASLT_MATMUL_TILE_384x72", - "CUBLASLT_MATMUL_TILE_384x64", - "CUBLASLT_MATMUL_TILE_384x56", - "CUBLASLT_MATMUL_TILE_384x48", - "CUBLASLT_MATMUL_TILE_384x40", - "CUBLASLT_MATMUL_TILE_384x32", - "CUBLASLT_MATMUL_TILE_384x24", - "CUBLASLT_MATMUL_TILE_384x168", - "CUBLASLT_MATMUL_TILE_384x160", - "CUBLASLT_MATMUL_TILE_384x16", - "CUBLASLT_MATMUL_TILE_384x152", - "CUBLASLT_MATMUL_TILE_384x144", - "CUBLASLT_MATMUL_TILE_384x136", - "CUBLASLT_MATMUL_TILE_384x128", - "CUBLASLT_MATMUL_TILE_384x120", - "CUBLASLT_MATMUL_TILE_384x112", - "CUBLASLT_MATMUL_TILE_384x104", - "CUBLASLT_MATMUL_TILE_376x64", - "CUBLASLT_MATMUL_TILE_376x128", - "CUBLASLT_MATMUL_TILE_368x64", - "CUBLASLT_MATMUL_TILE_368x128", - "CUBLASLT_MATMUL_TILE_360x64", - "CUBLASLT_MATMUL_TILE_360x128", - "CUBLASLT_MATMUL_TILE_352x64", - "CUBLASLT_MATMUL_TILE_352x128", - "CUBLASLT_MATMUL_TILE_344x64", - "CUBLASLT_MATMUL_TILE_344x128", - "CUBLASLT_MATMUL_TILE_336x64", - "CUBLASLT_MATMUL_TILE_336x128", - "CUBLASLT_MATMUL_TILE_32x8", - "CUBLASLT_MATMUL_TILE_32x768", - "CUBLASLT_MATMUL_TILE_32x704", - "CUBLASLT_MATMUL_TILE_32x640", - "CUBLASLT_MATMUL_TILE_32x64", - "CUBLASLT_MATMUL_TILE_32x576", - "CUBLASLT_MATMUL_TILE_32x512", - "CUBLASLT_MATMUL_TILE_32x448", - "CUBLASLT_MATMUL_TILE_32x384", - "CUBLASLT_MATMUL_TILE_32x320", - "CUBLASLT_MATMUL_TILE_32x32", - "CUBLASLT_MATMUL_TILE_32x256", - "CUBLASLT_MATMUL_TILE_32x192", - "CUBLASLT_MATMUL_TILE_32x16", - "CUBLASLT_MATMUL_TILE_32x128", - "CUBLASLT_MATMUL_TILE_328x64", - "CUBLASLT_MATMUL_TILE_328x128", - "CUBLASLT_MATMUL_TILE_320x96", - "CUBLASLT_MATMUL_TILE_320x88", - "CUBLASLT_MATMUL_TILE_320x80", - "CUBLASLT_MATMUL_TILE_320x8", - "CUBLASLT_MATMUL_TILE_320x72", - "CUBLASLT_MATMUL_TILE_320x64", - "CUBLASLT_MATMUL_TILE_320x56", - "CUBLASLT_MATMUL_TILE_320x48", - "CUBLASLT_MATMUL_TILE_320x40", - "CUBLASLT_MATMUL_TILE_320x32", - "CUBLASLT_MATMUL_TILE_320x24", - "CUBLASLT_MATMUL_TILE_320x200", - "CUBLASLT_MATMUL_TILE_320x192", - "CUBLASLT_MATMUL_TILE_320x184", - "CUBLASLT_MATMUL_TILE_320x176", - "CUBLASLT_MATMUL_TILE_320x168", - "CUBLASLT_MATMUL_TILE_320x160", - "CUBLASLT_MATMUL_TILE_320x16", - "CUBLASLT_MATMUL_TILE_320x152", - "CUBLASLT_MATMUL_TILE_320x144", - "CUBLASLT_MATMUL_TILE_320x136", - "CUBLASLT_MATMUL_TILE_320x128", - "CUBLASLT_MATMUL_TILE_320x120", - "CUBLASLT_MATMUL_TILE_320x112", - "CUBLASLT_MATMUL_TILE_320x104", - "CUBLASLT_MATMUL_TILE_312x64", - "CUBLASLT_MATMUL_TILE_312x128", - "CUBLASLT_MATMUL_TILE_304x64", - "CUBLASLT_MATMUL_TILE_304x128", - "CUBLASLT_MATMUL_TILE_296x64", - "CUBLASLT_MATMUL_TILE_296x128", - "CUBLASLT_MATMUL_TILE_288x64", - "CUBLASLT_MATMUL_TILE_288x128", - "CUBLASLT_MATMUL_TILE_280x64", - "CUBLASLT_MATMUL_TILE_280x128", - "CUBLASLT_MATMUL_TILE_272x64", - "CUBLASLT_MATMUL_TILE_272x128", - "CUBLASLT_MATMUL_TILE_264x64", - "CUBLASLT_MATMUL_TILE_264x128", - "CUBLASLT_MATMUL_TILE_256x96", - "CUBLASLT_MATMUL_TILE_256x88", - "CUBLASLT_MATMUL_TILE_256x80", - "CUBLASLT_MATMUL_TILE_256x8", - "CUBLASLT_MATMUL_TILE_256x72", - "CUBLASLT_MATMUL_TILE_256x64", - "CUBLASLT_MATMUL_TILE_256x56", - "CUBLASLT_MATMUL_TILE_256x48", - "CUBLASLT_MATMUL_TILE_256x40", - "CUBLASLT_MATMUL_TILE_256x32", - "CUBLASLT_MATMUL_TILE_256x256", - "CUBLASLT_MATMUL_TILE_256x248", - "CUBLASLT_MATMUL_TILE_256x240", - "CUBLASLT_MATMUL_TILE_256x24", - "CUBLASLT_MATMUL_TILE_256x232", - "CUBLASLT_MATMUL_TILE_256x224", - "CUBLASLT_MATMUL_TILE_256x216", - "CUBLASLT_MATMUL_TILE_256x208", - "CUBLASLT_MATMUL_TILE_256x200", - "CUBLASLT_MATMUL_TILE_256x192", - "CUBLASLT_MATMUL_TILE_256x184", - "CUBLASLT_MATMUL_TILE_256x176", - "CUBLASLT_MATMUL_TILE_256x168", - "CUBLASLT_MATMUL_TILE_256x160", - "CUBLASLT_MATMUL_TILE_256x16", - "CUBLASLT_MATMUL_TILE_256x152", - "CUBLASLT_MATMUL_TILE_256x144", - "CUBLASLT_MATMUL_TILE_256x136", - "CUBLASLT_MATMUL_TILE_256x128", - "CUBLASLT_MATMUL_TILE_256x120", - "CUBLASLT_MATMUL_TILE_256x112", - "CUBLASLT_MATMUL_TILE_256x104", - "CUBLASLT_MATMUL_TILE_24x768", - "CUBLASLT_MATMUL_TILE_24x704", - "CUBLASLT_MATMUL_TILE_24x640", - "CUBLASLT_MATMUL_TILE_24x64", - "CUBLASLT_MATMUL_TILE_24x576", - "CUBLASLT_MATMUL_TILE_24x512", - "CUBLASLT_MATMUL_TILE_24x448", - "CUBLASLT_MATMUL_TILE_24x384", - "CUBLASLT_MATMUL_TILE_24x320", - "CUBLASLT_MATMUL_TILE_24x256", - "CUBLASLT_MATMUL_TILE_24x192", - "CUBLASLT_MATMUL_TILE_24x128", - "CUBLASLT_MATMUL_TILE_248x64", - "CUBLASLT_MATMUL_TILE_248x192", - "CUBLASLT_MATMUL_TILE_248x128", - "CUBLASLT_MATMUL_TILE_240x64", - "CUBLASLT_MATMUL_TILE_240x192", - "CUBLASLT_MATMUL_TILE_240x128", - "CUBLASLT_MATMUL_TILE_232x64", - "CUBLASLT_MATMUL_TILE_232x192", - "CUBLASLT_MATMUL_TILE_232x128", - "CUBLASLT_MATMUL_TILE_224x64", - "CUBLASLT_MATMUL_TILE_224x192", - "CUBLASLT_MATMUL_TILE_224x128", - "CUBLASLT_MATMUL_TILE_216x64", - "CUBLASLT_MATMUL_TILE_216x192", - "CUBLASLT_MATMUL_TILE_216x128", - "CUBLASLT_MATMUL_TILE_208x64", - "CUBLASLT_MATMUL_TILE_208x192", - "CUBLASLT_MATMUL_TILE_208x128", - "CUBLASLT_MATMUL_TILE_200x64", - "CUBLASLT_MATMUL_TILE_200x192", - "CUBLASLT_MATMUL_TILE_200x128", - "CUBLASLT_MATMUL_TILE_192x96", - "CUBLASLT_MATMUL_TILE_192x88", - "CUBLASLT_MATMUL_TILE_192x80", - "CUBLASLT_MATMUL_TILE_192x8", - "CUBLASLT_MATMUL_TILE_192x72", - "CUBLASLT_MATMUL_TILE_192x64", - "CUBLASLT_MATMUL_TILE_192x56", - "CUBLASLT_MATMUL_TILE_192x48", - "CUBLASLT_MATMUL_TILE_192x40", - "CUBLASLT_MATMUL_TILE_192x336", - "CUBLASLT_MATMUL_TILE_192x328", - "CUBLASLT_MATMUL_TILE_192x320", - "CUBLASLT_MATMUL_TILE_192x32", - "CUBLASLT_MATMUL_TILE_192x312", - "CUBLASLT_MATMUL_TILE_192x304", - "CUBLASLT_MATMUL_TILE_192x296", - "CUBLASLT_MATMUL_TILE_192x288", - "CUBLASLT_MATMUL_TILE_192x280", - "CUBLASLT_MATMUL_TILE_192x272", - "CUBLASLT_MATMUL_TILE_192x264", - "CUBLASLT_MATMUL_TILE_192x256", - "CUBLASLT_MATMUL_TILE_192x248", - "CUBLASLT_MATMUL_TILE_192x240", - "CUBLASLT_MATMUL_TILE_192x24", - "CUBLASLT_MATMUL_TILE_192x232", - "CUBLASLT_MATMUL_TILE_192x224", - "CUBLASLT_MATMUL_TILE_192x216", - "CUBLASLT_MATMUL_TILE_192x208", - "CUBLASLT_MATMUL_TILE_192x200", - "CUBLASLT_MATMUL_TILE_192x192", - "CUBLASLT_MATMUL_TILE_192x184", - "CUBLASLT_MATMUL_TILE_192x176", - "CUBLASLT_MATMUL_TILE_192x168", - "CUBLASLT_MATMUL_TILE_192x160", - "CUBLASLT_MATMUL_TILE_192x16", - "CUBLASLT_MATMUL_TILE_192x152", - "CUBLASLT_MATMUL_TILE_192x144", - "CUBLASLT_MATMUL_TILE_192x136", - "CUBLASLT_MATMUL_TILE_192x128", - "CUBLASLT_MATMUL_TILE_192x120", - "CUBLASLT_MATMUL_TILE_192x112", - "CUBLASLT_MATMUL_TILE_192x104", - "CUBLASLT_MATMUL_TILE_184x64", - "CUBLASLT_MATMUL_TILE_184x256", - "CUBLASLT_MATMUL_TILE_184x192", - "CUBLASLT_MATMUL_TILE_184x128", - "CUBLASLT_MATMUL_TILE_176x64", - "CUBLASLT_MATMUL_TILE_176x256", - "CUBLASLT_MATMUL_TILE_176x192", - "CUBLASLT_MATMUL_TILE_176x128", - "CUBLASLT_MATMUL_TILE_16x8", - "CUBLASLT_MATMUL_TILE_16x768", - "CUBLASLT_MATMUL_TILE_16x704", - "CUBLASLT_MATMUL_TILE_16x640", - "CUBLASLT_MATMUL_TILE_16x64", - "CUBLASLT_MATMUL_TILE_16x576", - "CUBLASLT_MATMUL_TILE_16x512", - "CUBLASLT_MATMUL_TILE_16x448", - "CUBLASLT_MATMUL_TILE_16x384", - "CUBLASLT_MATMUL_TILE_16x320", - "CUBLASLT_MATMUL_TILE_16x32", - "CUBLASLT_MATMUL_TILE_16x256", - "CUBLASLT_MATMUL_TILE_16x192", - "CUBLASLT_MATMUL_TILE_16x16", - "CUBLASLT_MATMUL_TILE_16x128", - "CUBLASLT_MATMUL_TILE_168x64", - "CUBLASLT_MATMUL_TILE_168x256", - "CUBLASLT_MATMUL_TILE_168x192", - "CUBLASLT_MATMUL_TILE_168x128", - "CUBLASLT_MATMUL_TILE_160x64", - "CUBLASLT_MATMUL_TILE_160x256", - "CUBLASLT_MATMUL_TILE_160x192", - "CUBLASLT_MATMUL_TILE_160x128", - "CUBLASLT_MATMUL_TILE_152x64", - "CUBLASLT_MATMUL_TILE_152x320", - "CUBLASLT_MATMUL_TILE_152x256", - "CUBLASLT_MATMUL_TILE_152x192", - "CUBLASLT_MATMUL_TILE_152x128", - "CUBLASLT_MATMUL_TILE_144x64", - "CUBLASLT_MATMUL_TILE_144x320", - "CUBLASLT_MATMUL_TILE_144x256", - "CUBLASLT_MATMUL_TILE_144x192", - "CUBLASLT_MATMUL_TILE_144x128", - "CUBLASLT_MATMUL_TILE_136x64", - "CUBLASLT_MATMUL_TILE_136x320", - "CUBLASLT_MATMUL_TILE_136x256", - "CUBLASLT_MATMUL_TILE_136x192", - "CUBLASLT_MATMUL_TILE_136x128", - "CUBLASLT_MATMUL_TILE_128x96", - "CUBLASLT_MATMUL_TILE_128x88", - "CUBLASLT_MATMUL_TILE_128x80", - "CUBLASLT_MATMUL_TILE_128x8", - "CUBLASLT_MATMUL_TILE_128x72", - "CUBLASLT_MATMUL_TILE_128x64", - "CUBLASLT_MATMUL_TILE_128x56", - "CUBLASLT_MATMUL_TILE_128x512", - "CUBLASLT_MATMUL_TILE_128x504", - "CUBLASLT_MATMUL_TILE_128x496", - "CUBLASLT_MATMUL_TILE_128x488", - "CUBLASLT_MATMUL_TILE_128x480", - "CUBLASLT_MATMUL_TILE_128x48", - "CUBLASLT_MATMUL_TILE_128x472", - "CUBLASLT_MATMUL_TILE_128x464", - "CUBLASLT_MATMUL_TILE_128x456", - "CUBLASLT_MATMUL_TILE_128x448", - "CUBLASLT_MATMUL_TILE_128x440", - "CUBLASLT_MATMUL_TILE_128x432", - "CUBLASLT_MATMUL_TILE_128x424", - "CUBLASLT_MATMUL_TILE_128x416", - "CUBLASLT_MATMUL_TILE_128x408", - "CUBLASLT_MATMUL_TILE_128x400", - "CUBLASLT_MATMUL_TILE_128x40", - "CUBLASLT_MATMUL_TILE_128x392", - "CUBLASLT_MATMUL_TILE_128x384", - "CUBLASLT_MATMUL_TILE_128x376", - "CUBLASLT_MATMUL_TILE_128x368", - "CUBLASLT_MATMUL_TILE_128x360", - "CUBLASLT_MATMUL_TILE_128x352", - "CUBLASLT_MATMUL_TILE_128x344", - "CUBLASLT_MATMUL_TILE_128x336", - "CUBLASLT_MATMUL_TILE_128x328", - "CUBLASLT_MATMUL_TILE_128x320", - "CUBLASLT_MATMUL_TILE_128x32", - "CUBLASLT_MATMUL_TILE_128x312", - "CUBLASLT_MATMUL_TILE_128x304", - "CUBLASLT_MATMUL_TILE_128x296", - "CUBLASLT_MATMUL_TILE_128x288", - "CUBLASLT_MATMUL_TILE_128x280", - "CUBLASLT_MATMUL_TILE_128x272", - "CUBLASLT_MATMUL_TILE_128x264", - "CUBLASLT_MATMUL_TILE_128x256", - "CUBLASLT_MATMUL_TILE_128x248", - "CUBLASLT_MATMUL_TILE_128x240", - "CUBLASLT_MATMUL_TILE_128x24", - "CUBLASLT_MATMUL_TILE_128x232", - "CUBLASLT_MATMUL_TILE_128x224", - "CUBLASLT_MATMUL_TILE_128x216", - "CUBLASLT_MATMUL_TILE_128x208", - "CUBLASLT_MATMUL_TILE_128x200", - "CUBLASLT_MATMUL_TILE_128x192", - "CUBLASLT_MATMUL_TILE_128x184", - "CUBLASLT_MATMUL_TILE_128x176", - "CUBLASLT_MATMUL_TILE_128x168", - "CUBLASLT_MATMUL_TILE_128x160", - "CUBLASLT_MATMUL_TILE_128x16", - "CUBLASLT_MATMUL_TILE_128x152", - "CUBLASLT_MATMUL_TILE_128x144", - "CUBLASLT_MATMUL_TILE_128x136", - "CUBLASLT_MATMUL_TILE_128x128", - "CUBLASLT_MATMUL_TILE_128x120", - "CUBLASLT_MATMUL_TILE_128x112", - "CUBLASLT_MATMUL_TILE_128x104", - "CUBLASLT_MATMUL_TILE_120x64", - "CUBLASLT_MATMUL_TILE_120x384", - "CUBLASLT_MATMUL_TILE_120x320", - "CUBLASLT_MATMUL_TILE_120x256", - "CUBLASLT_MATMUL_TILE_120x192", - "CUBLASLT_MATMUL_TILE_120x128", - "CUBLASLT_MATMUL_TILE_112x64", - "CUBLASLT_MATMUL_TILE_112x384", - "CUBLASLT_MATMUL_TILE_112x320", - "CUBLASLT_MATMUL_TILE_112x256", - "CUBLASLT_MATMUL_TILE_112x192", - "CUBLASLT_MATMUL_TILE_112x128", - "CUBLASLT_MATMUL_TILE_104x64", - "CUBLASLT_MATMUL_TILE_104x448", - "CUBLASLT_MATMUL_TILE_104x384", - "CUBLASLT_MATMUL_TILE_104x320", - "CUBLASLT_MATMUL_TILE_104x256", - "CUBLASLT_MATMUL_TILE_104x192", - "CUBLASLT_MATMUL_TILE_104x128", - "CUBLASLT_MATMUL_STAGES_UNDEFINED", - "CUBLASLT_MATMUL_STAGES_END", - "CUBLASLT_MATMUL_STAGES_8xAUTO", - "CUBLASLT_MATMUL_STAGES_8x5", - "CUBLASLT_MATMUL_STAGES_8x4", - "CUBLASLT_MATMUL_STAGES_8x3", - "CUBLASLT_MATMUL_STAGES_64xAUTO", - "CUBLASLT_MATMUL_STAGES_64x6", - "CUBLASLT_MATMUL_STAGES_64x5", - "CUBLASLT_MATMUL_STAGES_64x4", - "CUBLASLT_MATMUL_STAGES_64x3", - "CUBLASLT_MATMUL_STAGES_64x2", - "CUBLASLT_MATMUL_STAGES_64x1", - "CUBLASLT_MATMUL_STAGES_32xAUTO", - "CUBLASLT_MATMUL_STAGES_32x6", - "CUBLASLT_MATMUL_STAGES_32x5", - "CUBLASLT_MATMUL_STAGES_32x4", - "CUBLASLT_MATMUL_STAGES_32x3", - "CUBLASLT_MATMUL_STAGES_32x2", - "CUBLASLT_MATMUL_STAGES_32x10", - "CUBLASLT_MATMUL_STAGES_32x1", - "CUBLASLT_MATMUL_STAGES_16xAUTO", - "CUBLASLT_MATMUL_STAGES_16x6", - "CUBLASLT_MATMUL_STAGES_16x5", - "CUBLASLT_MATMUL_STAGES_16x4", - "CUBLASLT_MATMUL_STAGES_16x3", - "CUBLASLT_MATMUL_STAGES_16x2", - "CUBLASLT_MATMUL_STAGES_16x10", - "CUBLASLT_MATMUL_STAGES_16x1", - "CUBLASLT_MATMUL_STAGES_128xAUTO", - "CUBLASLT_MATMUL_STAGES_128x6", - "CUBLASLT_MATMUL_STAGES_128x5", - "CUBLASLT_MATMUL_STAGES_128x4", - "CUBLASLT_MATMUL_STAGES_128x3", - "CUBLASLT_MATMUL_STAGES_128x2", - "CUBLASLT_MATMUL_STAGES_128x1", - "CUBLASLT_MATMUL_PREF_SEARCH_MODE", - "CUBLASLT_MATMUL_PREF_REDUCTION_SCHEME_MASK", - "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_D_BYTES", - "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_C_BYTES", - "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_B_BYTES", - "CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_A_BYTES", - "CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES", - "CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT", - "CUBLASLT_MATMUL_PREF_IMPL_MASK", - "CUBLASLT_MATMUL_INNER_SHAPE_UNDEFINED", - "CUBLASLT_MATMUL_INNER_SHAPE_MMA884", - "CUBLASLT_MATMUL_INNER_SHAPE_MMA1688", - "CUBLASLT_MATMUL_INNER_SHAPE_MMA1684", - "CUBLASLT_MATMUL_INNER_SHAPE_MMA16816", - "CUBLASLT_MATMUL_INNER_SHAPE_END", - "CUBLASLT_MATMUL_DESC_TRANSC", - "CUBLASLT_MATMUL_DESC_TRANSB", - "CUBLASLT_MATMUL_DESC_TRANSA", - "CUBLASLT_MATMUL_DESC_SM_COUNT_TARGET", - "CUBLASLT_MATMUL_DESC_SCALE_TYPE", - "CUBLASLT_MATMUL_DESC_POINTER_MODE", - "CUBLASLT_MATMUL_DESC_FILL_MODE", - "CUBLASLT_MATMUL_DESC_FAST_ACCUM", - "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_SCALE_POINTER", - "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER", - "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD", - "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_DATA_TYPE", - "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_BATCH_STRIDE", - "CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_AMAX_POINTER", - "CUBLASLT_MATMUL_DESC_EPILOGUE", - "CUBLASLT_MATMUL_DESC_D_SCALE_POINTER", - "CUBLASLT_MATMUL_DESC_C_SCALE_POINTER", - "CUBLASLT_MATMUL_DESC_COMPUTE_TYPE", - "CUBLASLT_MATMUL_DESC_B_SCALE_POINTER", - "CUBLASLT_MATMUL_DESC_BIAS_POINTER", - "CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE", - "CUBLASLT_MATMUL_DESC_BIAS_BATCH_STRIDE", - "CUBLASLT_MATMUL_DESC_A_SCALE_POINTER", - "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_OUT_COUNTERS_POINTER", - "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_NUM_CHUNKS_D_ROWS", - "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_NUM_CHUNKS_D_COLS", - "CUBLASLT_MATMUL_DESC_ATOMIC_SYNC_IN_COUNTERS_POINTER", - "CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE", - "CUBLASLT_EPILOGUE_RELU_BIAS", - "CUBLASLT_EPILOGUE_RELU_AUX_BIAS", - "CUBLASLT_EPILOGUE_RELU_AUX", - "CUBLASLT_EPILOGUE_RELU", - "CUBLASLT_EPILOGUE_GELU_BIAS", - "CUBLASLT_EPILOGUE_GELU_AUX_BIAS", - "CUBLASLT_EPILOGUE_GELU_AUX", - "CUBLASLT_EPILOGUE_GELU", - "CUBLASLT_EPILOGUE_DRELU_BGRAD", - "CUBLASLT_EPILOGUE_DRELU", - "CUBLASLT_EPILOGUE_DGELU_BGRAD", - "CUBLASLT_EPILOGUE_DGELU", - "CUBLASLT_EPILOGUE_DEFAULT", - "CUBLASLT_EPILOGUE_BIAS", - "CUBLASLT_EPILOGUE_BGRADB", - "CUBLASLT_EPILOGUE_BGRADA", - "CUBLASLT_CLUSTER_SHAPE_END", - "CUBLASLT_CLUSTER_SHAPE_AUTO", - "CUBLASLT_CLUSTER_SHAPE_9x1x1", - "CUBLASLT_CLUSTER_SHAPE_8x2x1", - "CUBLASLT_CLUSTER_SHAPE_8x1x1", - "CUBLASLT_CLUSTER_SHAPE_7x2x1", - "CUBLASLT_CLUSTER_SHAPE_7x1x1", - "CUBLASLT_CLUSTER_SHAPE_6x2x1", - "CUBLASLT_CLUSTER_SHAPE_6x1x1", - "CUBLASLT_CLUSTER_SHAPE_5x3x1", - "CUBLASLT_CLUSTER_SHAPE_5x2x1", - "CUBLASLT_CLUSTER_SHAPE_5x1x1", - "CUBLASLT_CLUSTER_SHAPE_4x4x1", - "CUBLASLT_CLUSTER_SHAPE_4x3x1", - "CUBLASLT_CLUSTER_SHAPE_4x2x1", - "CUBLASLT_CLUSTER_SHAPE_4x1x1", - "CUBLASLT_CLUSTER_SHAPE_3x5x1", - "CUBLASLT_CLUSTER_SHAPE_3x4x1", - "CUBLASLT_CLUSTER_SHAPE_3x3x1", - "CUBLASLT_CLUSTER_SHAPE_3x2x1", - "CUBLASLT_CLUSTER_SHAPE_3x1x1", - "CUBLASLT_CLUSTER_SHAPE_2x8x1", - "CUBLASLT_CLUSTER_SHAPE_2x7x1", - "CUBLASLT_CLUSTER_SHAPE_2x6x1", - "CUBLASLT_CLUSTER_SHAPE_2x5x1", - "CUBLASLT_CLUSTER_SHAPE_2x4x1", - "CUBLASLT_CLUSTER_SHAPE_2x3x1", - "CUBLASLT_CLUSTER_SHAPE_2x2x1", - "CUBLASLT_CLUSTER_SHAPE_2x1x1", - "CUBLASLT_CLUSTER_SHAPE_1x9x1", - "CUBLASLT_CLUSTER_SHAPE_1x8x1", - "CUBLASLT_CLUSTER_SHAPE_1x7x1", - "CUBLASLT_CLUSTER_SHAPE_1x6x1", - "CUBLASLT_CLUSTER_SHAPE_1x5x1", - "CUBLASLT_CLUSTER_SHAPE_1x4x1", - "CUBLASLT_CLUSTER_SHAPE_1x3x1", - "CUBLASLT_CLUSTER_SHAPE_1x2x1", - "CUBLASLT_CLUSTER_SHAPE_1x1x1", - "CUBLASLT_CLUSTER_SHAPE_1x16x1", - "CUBLASLT_CLUSTER_SHAPE_1x15x1", - "CUBLASLT_CLUSTER_SHAPE_1x14x1", - "CUBLASLT_CLUSTER_SHAPE_1x13x1", - "CUBLASLT_CLUSTER_SHAPE_1x12x1", - "CUBLASLT_CLUSTER_SHAPE_1x11x1", - "CUBLASLT_CLUSTER_SHAPE_1x10x1", - "CUBLASLT_CLUSTER_SHAPE_16x1x1", - "CUBLASLT_CLUSTER_SHAPE_15x1x1", - "CUBLASLT_CLUSTER_SHAPE_14x1x1", - "CUBLASLT_CLUSTER_SHAPE_13x1x1", - "CUBLASLT_CLUSTER_SHAPE_12x1x1", - "CUBLASLT_CLUSTER_SHAPE_11x1x1", - "CUBLASLT_CLUSTER_SHAPE_10x1x1", - "CUBLASLT_ALGO_CONFIG_TILE_ID", - "CUBLASLT_ALGO_CONFIG_STAGES_ID", - "CUBLASLT_ALGO_CONFIG_SPLITK_NUM", - "CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME", - "CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID", - "CUBLASLT_ALGO_CONFIG_ID", - "CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION", - "CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING", - "CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID", - "CUBLASLT_ALGO_CAP_UPLO_SUPPORT", - "CUBLASLT_ALGO_CAP_TILE_IDS", - "CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT", - "CUBLASLT_ALGO_CAP_STAGES_IDS", - "CUBLASLT_ALGO_CAP_SPLITK_SUPPORT", - "CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK", - "CUBLASLT_ALGO_CAP_POINTER_MODE_MASK", - "CUBLASLT_ALGO_CAP_OUT_OF_PLACE_RESULT_SUPPORT", - "CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS", - "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES", - "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES", - "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_B_BYTES", - "CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_A_BYTES", - "CUBLASLT_ALGO_CAP_LD_NEGATIVE", - "CUBLASLT_ALGO_CAP_EPILOGUE_MASK", - "CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX", - "CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER", - "CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT", - "CUBLASLT_ALGO_CAP_ATOMIC_SYNC" - ) + while (my($func) = each %RocOnlyUnsupportedFunctions) { my $mt = m/($func)/g; if ($mt) { @@ -12907,617 +12915,619 @@ sub warnRocOnlyUnsupportedFunctions { return $k; } +@MIOpenOnlyUnsupportedFunctions = ( + "cudnnWgradMode_t", + "cudnnTransformTensorEx", + "cudnnTransformFilter", + "cudnnTensorTransformStruct", + "cudnnTensorTransformDescriptor_t", + "cudnnTensorStruct", + "cudnnSpatialTransformerStruct", + "cudnnSpatialTransformerDescriptor_t", + "cudnnSpatialTfSamplerForward", + "cudnnSpatialTfSamplerBackward", + "cudnnSpatialTfGridGeneratorForward", + "cudnnSpatialTfGridGeneratorBackward", + "cudnnSignalMode_t", + "cudnnSeverity_t", + "cudnnSetTensorTransformDescriptor", + "cudnnSetTensorNdDescriptorEx", + "cudnnSetTensorNdDescriptor", + "cudnnSetTensor4dDescriptor", + "cudnnSetSpatialTransformerNdDescriptor", + "cudnnSetSeqDataDescriptor", + "cudnnSetRNNProjectionLayers", + "cudnnSetRNNPaddingMode", + "cudnnSetRNNMatrixMathType", + "cudnnSetRNNDescriptor_v8", + "cudnnSetRNNDescriptor_v5", + "cudnnSetRNNDescriptor", + "cudnnSetRNNDataDescriptor", + "cudnnSetRNNBiasMode", + "cudnnSetRNNAlgorithmDescriptor", + "cudnnSetPersistentRNNPlan", + "cudnnSetOpTensorDescriptor", + "cudnnSetFusedOpsVariantParamPackAttribute", + "cudnnSetFusedOpsConstParamPackAttribute", + "cudnnSetFilterNdDescriptor", + "cudnnSetFilter4dDescriptor", + "cudnnSetConvolutionReorderType", + "cudnnSetConvolutionNdDescriptor", + "cudnnSetConvolutionMathType", + "cudnnSetConvolution2dDescriptor", + "cudnnSetCallback", + "cudnnSetCTCLossDescriptor_v9", + "cudnnSetCTCLossDescriptor_v8", + "cudnnSetCTCLossDescriptorEx", + "cudnnSetAttnDescriptor", + "cudnnSetAlgorithmPerformance", + "cudnnSetAlgorithmDescriptor", + "cudnnSetActivationDescriptorSwishBeta", + "cudnnSetActivationDescriptor", + "cudnnSeqDataStruct", + "cudnnSeqDataDescriptor_t", + "cudnnSeqDataAxis_t", + "cudnnSaveAlgorithm", + "cudnnSamplerType_t", + "cudnnRuntimeTag_t", + "cudnnRestoreAlgorithm", + "cudnnResampleMode_t", + "cudnnReorderType_t", + "cudnnReorderFilterAndBias", + "cudnnReduceTensorStruct", + "cudnnRNNStruct", + "cudnnRNNSetClip_v9", + "cudnnRNNSetClip_v8", + "cudnnRNNSetClip", + "cudnnRNNGetClip_v9", + "cudnnRNNGetClip_v8", + "cudnnRNNGetClip", + "cudnnRNNForwardTrainingEx", + "cudnnRNNForwardInferenceEx", + "cudnnRNNForward", + "cudnnRNNDataStruct", + "cudnnRNNDataLayout_t", + "cudnnRNNDataDescriptor_t", + "cudnnRNNClipMode_t", + "cudnnRNNBackwardWeights_v8", + "cudnnRNNBackwardWeightsEx", + "cudnnRNNBackwardData_v8", + "cudnnRNNBackwardDataEx", + "cudnnQueryRuntimeError", + "cudnnPoolingStruct", + "cudnnPoolingForward", + "cudnnPoolingBackward", + "cudnnPersistentRNNPlan_t", + "cudnnPersistentRNNPlan", + "cudnnOpsVersionCheck", + "cudnnOpsTrainVersionCheck", + "cudnnOpsInferVersionCheck", + "cudnnOpTensorStruct", + "cudnnOpTensorDescriptor_t", + "cudnnOpTensor", + "cudnnNormalizationForwardTraining", + "cudnnNormalizationForwardInference", + "cudnnNormalizationBackward", + "cudnnNormOps_t", + "cudnnNormMode_t", + "cudnnNormAlgo_t", + "cudnnMultiHeadAttnWeightKind_t", + "cudnnMultiHeadAttnForward", + "cudnnMultiHeadAttnBackwardWeights", + "cudnnMultiHeadAttnBackwardData", + "cudnnMathType_t", + "cudnnMakeFusedOpsPlan", + "cudnnLossNormalizationMode_t", + "cudnnLRNStruct", + "cudnnLRNCrossChannelForward", + "cudnnLRNCrossChannelBackward", + "cudnnInitTransformDest", + "cudnnIm2Col", + "cudnnGraphVersionCheck", + "cudnnGetVersion", + "cudnnGetTensorTransformDescriptor", + "cudnnGetTensorSizeInBytes", + "cudnnGetTensorNdDescriptor", + "cudnnGetSeqDataDescriptor", + "cudnnGetRNNWeightSpaceSize", + "cudnnGetRNNWeightParams", + "cudnnGetRNNTempSpaceSizes", + "cudnnGetRNNProjectionLayers", + "cudnnGetRNNPaddingMode", + "cudnnGetRNNMatrixMathType", + "cudnnGetRNNLinLayerMatrixParams", + "cudnnGetRNNLinLayerBiasParams", + "cudnnGetRNNForwardTrainingAlgorithmMaxCount", + "cudnnGetRNNForwardInferenceAlgorithmMaxCount", + "cudnnGetRNNDescriptor_v8", + "cudnnGetRNNDataDescriptor", + "cudnnGetRNNBiasMode", + "cudnnGetRNNBackwardWeightsAlgorithmMaxCount", + "cudnnGetRNNBackwardDataAlgorithmMaxCount", + "cudnnGetProperty", + "cudnnGetOpTensorDescriptor", + "cudnnGetNormalizationTrainingReserveSpaceSize", + "cudnnGetNormalizationForwardTrainingWorkspaceSize", + "cudnnGetNormalizationBackwardWorkspaceSize", + "cudnnGetMultiHeadAttnWeights", + "cudnnGetMultiHeadAttnBuffers", + "cudnnGetMaxDeviceVersion", + "cudnnGetLastErrorString", + "cudnnGetFusedOpsVariantParamPackAttribute", + "cudnnGetFusedOpsConstParamPackAttribute", + "cudnnGetFoldedConvBackwardDataDescriptors", + "cudnnGetFilterSizeInBytes", + "cudnnGetFilterNdDescriptor", + "cudnnGetFilter4dDescriptor", + "cudnnGetCudartVersion", + "cudnnGetConvolutionReorderType", + "cudnnGetConvolutionNdForwardOutputDim", + "cudnnGetConvolutionNdDescriptor", + "cudnnGetConvolutionMathType", + "cudnnGetConvolutionGroupCount", + "cudnnGetConvolutionForwardAlgorithm_v7", + "cudnnGetConvolutionForwardAlgorithmMaxCount", + "cudnnGetConvolutionForwardAlgorithm", + "cudnnGetConvolutionBackwardFilterWorkspaceSize", + "cudnnGetConvolutionBackwardFilterAlgorithm_v7", + "cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", + "cudnnGetConvolutionBackwardFilterAlgorithm", + "cudnnGetConvolutionBackwardDataAlgorithm_v7", + "cudnnGetConvolutionBackwardDataAlgorithmMaxCount", + "cudnnGetConvolutionBackwardDataAlgorithm", + "cudnnGetConvolution2dDescriptor", + "cudnnGetCallback", + "cudnnGetCTCLossWorkspaceSize_v8", + "cudnnGetCTCLossDescriptor_v9", + "cudnnGetCTCLossDescriptor_v8", + "cudnnGetCTCLossDescriptorEx", + "cudnnGetBatchNormalizationTrainingExReserveSpaceSize", + "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", + "cudnnGetBatchNormalizationBackwardExWorkspaceSize", + "cudnnGetAttnDescriptor", + "cudnnGetAlgorithmSpaceSize", + "cudnnGetAlgorithmPerformance", + "cudnnGetAlgorithmDescriptor", + "cudnnGetActivationDescriptorSwishBeta", + "cudnnGetActivationDescriptor", + "cudnnGenStatsMode_t", + "cudnnFusedOps_t", + "cudnnFusedOpsVariantParamStruct", + "cudnnFusedOpsVariantParamPack_t", + "cudnnFusedOpsVariantParamLabel_t", + "cudnnFusedOpsPointerPlaceHolder_t", + "cudnnFusedOpsPlan_t", + "cudnnFusedOpsPlanStruct", + "cudnnFusedOpsExecute", + "cudnnFusedOpsConstParamStruct", + "cudnnFusedOpsConstParamPack_t", + "cudnnFusedOpsConstParamLabel_t", + "cudnnFraction_t", + "cudnnFractionStruct", + "cudnnFoldingDirection_t", + "cudnnFindRNNForwardTrainingAlgorithmEx", + "cudnnFindRNNForwardInferenceAlgorithmEx", + "cudnnFindRNNBackwardWeightsAlgorithmEx", + "cudnnFindRNNBackwardDataAlgorithmEx", + "cudnnFindConvolutionForwardAlgorithm", + "cudnnFindConvolutionBackwardFilterAlgorithmEx", + "cudnnFindConvolutionBackwardFilterAlgorithm", + "cudnnFindConvolutionBackwardDataAlgorithmEx", + "cudnnFindConvolutionBackwardDataAlgorithm", + "cudnnFilterStruct", + "cudnnErrQueryMode_t", + "cudnnDropoutStruct", + "cudnnDivisiveNormalizationForward", + "cudnnDivisiveNormalizationBackward", + "cudnnDivNormMode_t", + "cudnnDeterminism_t", + "cudnnDestroyTensorTransformDescriptor", + "cudnnDestroySpatialTransformerDescriptor", + "cudnnDestroySeqDataDescriptor", + "cudnnDestroyRNNDataDescriptor", + "cudnnDestroyPersistentRNNPlan", + "cudnnDestroyOpTensorDescriptor", + "cudnnDestroyFusedOpsVariantParamPack", + "cudnnDestroyFusedOpsPlan", + "cudnnDestroyFusedOpsConstParamPack", + "cudnnDestroyFilterDescriptor", + "cudnnDestroyAttnDescriptor", + "cudnnDestroyAlgorithmPerformance", + "cudnnDestroyAlgorithmDescriptor", + "cudnnDeriveNormTensorDescriptor", + "cudnnDebug_t", + "cudnnDebugStruct", + "cudnnCreateTensorTransformDescriptor", + "cudnnCreateSpatialTransformerDescriptor", + "cudnnCreateSeqDataDescriptor", + "cudnnCreateRNNDataDescriptor", + "cudnnCreatePersistentRNNPlan", + "cudnnCreateOpTensorDescriptor", + "cudnnCreateFusedOpsVariantParamPack", + "cudnnCreateFusedOpsPlan", + "cudnnCreateFusedOpsConstParamPack", + "cudnnCreateFilterDescriptor", + "cudnnCreateAttnDescriptor", + "cudnnCreateAlgorithmPerformance", + "cudnnCreateAlgorithmDescriptor", + "cudnnCopyAlgorithmDescriptor", + "cudnnConvolutionStruct", + "cudnnConvolutionFwdPreference_t", + "cudnnConvolutionBwdFilterPreference_t", + "cudnnConvolutionBwdFilterAlgo_t", + "cudnnConvolutionBwdFilterAlgoPerf_t", + "cudnnConvolutionBwdFilterAlgoPerfStruct", + "cudnnConvolutionBwdDataPreference_t", + "cudnnConvolutionBackwardFilter", + "cudnnCnnTrainVersionCheck", + "cudnnCnnInferVersionCheck", + "cudnnCallback_t", + "cudnnCTCLoss_v8", + "cudnnCTCLossStruct", + "cudnnCTCGradMode_t", + "cudnnBuildRNNDynamic", + "cudnnBnFinalizeStatsMode_t", + "cudnnBatchNormalizationForwardTrainingEx", + "cudnnBatchNormalizationBackwardEx", + "cudnnBatchNormOps_t", + "cudnnBackendUpdateCudaGraph", + "cudnnBackendTensorReordering_t", + "cudnnBackendPopulateCudaGraph", + "cudnnBackendNumericalNote_t", + "cudnnBackendNormMode_t", + "cudnnBackendNormFwdPhase_t", + "cudnnBackendLayoutType_t", + "cudnnBackendKnobType_t", + "cudnnBackendInitialize", + "cudnnBackendBehaviorNote_t", + "cudnnAttnStruct", + "cudnnAttnQueryMap_t", + "cudnnAttnDescriptor_t", + "cudnnAlgorithm_t", + "cudnnAlgorithmUnionStruct", + "cudnnAlgorithmStruct", + "cudnnAlgorithmPerformance_t", + "cudnnAlgorithmPerformanceStruct", + "cudnnAlgorithmDescriptor_t", + "cudnnAdvVersionCheck", + "cudnnAdvTrainVersionCheck", + "cudnnAdvInferVersionCheck", + "cudnnAddTensor", + "cudnnActivationStruct", + "CUDNN_WGRAD_MODE_SET", + "CUDNN_WGRAD_MODE_ADD", + "CUDNN_TRANSFORM_UNFOLD", + "CUDNN_TRANSFORM_FOLD", + "CUDNN_TENSOR_REORDERING_NONE", + "CUDNN_TENSOR_REORDERING_INT8x32", + "CUDNN_TENSOR_REORDERING_F16x16", + "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", + "CUDNN_TENSOR_OP_MATH", + "CUDNN_TENSOR_NCHW_VECT_C", + "CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", + "CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", + "CUDNN_STATUS_SPECIFIC_ERROR", + "CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", + "CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", + "CUDNN_STATUS_RUNTIME_IN_PROGRESS", + "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", + "CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE", + "CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", + "CUDNN_STATUS_NOT_SUPPORTED_SHAPE", + "CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING", + "CUDNN_STATUS_NOT_SUPPORTED_PADDING", + "CUDNN_STATUS_NOT_SUPPORTED_LAYOUT", + "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER", + "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART", + "CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", + "CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", + "CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", + "CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", + "CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH", + "CUDNN_STATUS_MAPPING_ERROR", + "CUDNN_STATUS_LICENSE_ERROR", + "CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", + "CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM", + "CUDNN_STATUS_FULL_ERROR_CODE", + "CUDNN_STATUS_EXECUTION_FAILED_CURAND", + "CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER", + "CUDNN_STATUS_EXECUTION_FAILED_CUDART", + "CUDNN_STATUS_EXECUTION_FAILED_CUBLAS", + "CUDNN_STATUS_EXECUTION_FAILED", + "CUDNN_STATUS_DEPRECATED", + "CUDNN_STATUS_CATEGORY", + "CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT", + "CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND", + "CUDNN_STATUS_BAD_PARAM_NULL_POINTER", + "CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED", + "CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER", + "CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", + "CUDNN_STATUS_BAD_PARAM_DESCRIPTOR_TYPE", + "CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", + "CUDNN_STATUS_ARCH_MISMATCH", + "CUDNN_SIGNAL_WAIT", + "CUDNN_SIGNAL_SET", + "CUDNN_SEV_WARNING_EN", + "CUDNN_SEV_WARNING", + "CUDNN_SEV_INFO_EN", + "CUDNN_SEV_INFO", + "CUDNN_SEV_FATAL", + "CUDNN_SEV_ERROR_EN", + "CUDNN_SEV_ERROR", + "CUDNN_SEQDATA_VECT_DIM", + "CUDNN_SEQDATA_TIME_DIM", + "CUDNN_SEQDATA_DIM_COUNT", + "CUDNN_SEQDATA_BEAM_DIM", + "CUDNN_SEQDATA_BATCH_DIM", + "CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", + "CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", + "CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", + "CUDNN_SCALAR_DOUBLE_BN_EPSILON", + "CUDNN_SAMPLER_BILINEAR", + "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", + "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", + "CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", + "CUDNN_RNN_CLIP_NONE", + "CUDNN_RNN_CLIP_MINMAX", + "CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H", + "CUDNN_RNN_ALGO_PERSIST_STATIC", + "CUDNN_RNN_ALGO_PERSIST_DYNAMIC", + "CUDNN_RNN_ALGO_COUNT", + "CUDNN_RMS_NORM", + "CUDNN_RESAMPLE_NEAREST", + "CUDNN_RESAMPLE_MAXPOOL", + "CUDNN_RESAMPLE_BILINEAR", + "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL", + "CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS", + "CUDNN_PTR_ZDATA", + "CUDNN_PTR_YSUM", + "CUDNN_PTR_YSQSUM", + "CUDNN_PTR_YDATA", + "CUDNN_PTR_XDATA", + "CUDNN_PTR_WORKSPACE", + "CUDNN_PTR_WDATA", + "CUDNN_PTR_NULL", + "CUDNN_PTR_ELEM_ALIGNED", + "CUDNN_PTR_DZDATA", + "CUDNN_PTR_DYDATA", + "CUDNN_PTR_DXDATA", + "CUDNN_PTR_DWDATA", + "CUDNN_PTR_BN_Z_EQSCALE", + "CUDNN_PTR_BN_Z_EQBIAS", + "CUDNN_PTR_BN_SCALE", + "CUDNN_PTR_BN_SAVED_MEAN", + "CUDNN_PTR_BN_SAVED_INVSTD", + "CUDNN_PTR_BN_RUNNING_VAR", + "CUDNN_PTR_BN_RUNNING_MEAN", + "CUDNN_PTR_BN_EQSCALE", + "CUDNN_PTR_BN_EQBIAS", + "CUDNN_PTR_BN_DSCALE", + "CUDNN_PTR_BN_DBIAS", + "CUDNN_PTR_BN_BIAS", + "CUDNN_PTR_ACTIVATION_BITMASK", + "CUDNN_PTR_16B_ALIGNED", + "CUDNN_POOLING_MAX_DETERMINISTIC", + "CUDNN_POINTWISE_ATAN2", + "CUDNN_PARAM_ZDESC", + "CUDNN_PARAM_ZDATA_PLACEHOLDER", + "CUDNN_PARAM_YSUM_PLACEHOLDER", + "CUDNN_PARAM_YSTATS_DESC", + "CUDNN_PARAM_YSQSUM_PLACEHOLDER", + "CUDNN_PARAM_YDESC", + "CUDNN_PARAM_YDATA_PLACEHOLDER", + "CUDNN_PARAM_XDESC", + "CUDNN_PARAM_XDATA_PLACEHOLDER", + "CUDNN_PARAM_WDESC", + "CUDNN_PARAM_WDATA_PLACEHOLDER", + "CUDNN_PARAM_DZDESC", + "CUDNN_PARAM_DZDATA_PLACEHOLDER", + "CUDNN_PARAM_DYDESC", + "CUDNN_PARAM_DYDATA_PLACEHOLDER", + "CUDNN_PARAM_DXDESC", + "CUDNN_PARAM_DXDATA_PLACEHOLDER", + "CUDNN_PARAM_DWDESC", + "CUDNN_PARAM_DWDATA_PLACEHOLDER", + "CUDNN_PARAM_CONV_DESC", + "CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", + "CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_SCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", + "CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", + "CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", + "CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", + "CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", + "CUDNN_PARAM_BN_MODE", + "CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_EQSCALEBIAS_DESC", + "CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_BIAS_PLACEHOLDER", + "CUDNN_PARAM_ACTIVATION_DESC", + "CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", + "CUDNN_PARAM_ACTIVATION_BITMASK_DESC", + "CUDNN_OP_TENSOR_SQRT", + "CUDNN_OP_TENSOR_NOT", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", + "CUDNN_NUMERICAL_NOTE_WINOGRAD", + "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", + "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", + "CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP", + "CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION", + "CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", + "CUDNN_NUMERICAL_NOTE_FFT", + "CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", + "CUDNN_NO_REORDER", + "CUDNN_NORM_PER_CHANNEL", + "CUDNN_NORM_PER_ACTIVATION", + "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", + "CUDNN_NORM_OPS_NORM_ACTIVATION", + "CUDNN_NORM_OPS_NORM", + "CUDNN_NORM_FWD_TRAINING", + "CUDNN_NORM_FWD_INFERENCE", + "CUDNN_NORM_ALGO_STANDARD", + "CUDNN_NORM_ALGO_PERSIST", + "CUDNN_NON_DETERMINISTIC", + "CUDNN_MH_ATTN_V_WEIGHTS", + "CUDNN_MH_ATTN_V_BIASES", + "CUDNN_MH_ATTN_Q_WEIGHTS", + "CUDNN_MH_ATTN_Q_BIASES", + "CUDNN_MH_ATTN_O_WEIGHTS", + "CUDNN_MH_ATTN_O_BIASES", + "CUDNN_MH_ATTN_K_WEIGHTS", + "CUDNN_MH_ATTN_K_BIASES", + "CUDNN_LRN_MIN_N", + "CUDNN_LRN_MIN_K", + "CUDNN_LRN_MIN_BETA", + "CUDNN_LRN_MAX_N", + "CUDNN_LOSS_NORMALIZATION_SOFTMAX", + "CUDNN_LOSS_NORMALIZATION_NONE", + "CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK", + "CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK", + "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", + "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", + "CUDNN_LAYOUT_TYPE_COUNT", + "CUDNN_LAYER_NORM", + "CUDNN_KNOB_TYPE_WORKSPACE", + "CUDNN_KNOB_TYPE_WINO_TILE", + "CUDNN_KNOB_TYPE_USE_TEX", + "CUDNN_KNOB_TYPE_TILE_SIZE", + "CUDNN_KNOB_TYPE_TILE_ROWS", + "CUDNN_KNOB_TYPE_TILE_COLS", + "CUDNN_KNOB_TYPE_TILE_CGA_N", + "CUDNN_KNOB_TYPE_TILE_CGA_M", + "CUDNN_KNOB_TYPE_TILE_CGA", + "CUDNN_KNOB_TYPE_TILEK", + "CUDNN_KNOB_TYPE_SWIZZLE", + "CUDNN_KNOB_TYPE_STAGES", + "CUDNN_KNOB_TYPE_SPLIT_RS", + "CUDNN_KNOB_TYPE_SPLIT_K_SLC", + "CUDNN_KNOB_TYPE_SPLIT_K_BUF", + "CUDNN_KNOB_TYPE_SPLIT_K", + "CUDNN_KNOB_TYPE_SPLIT_H", + "CUDNN_KNOB_TYPE_SPLIT_COLS", + "CUDNN_KNOB_TYPE_SPECFILT", + "CUDNN_KNOB_TYPE_SLICED", + "CUDNN_KNOB_TYPE_SINGLEBUFFER", + "CUDNN_KNOB_TYPE_REDUCTION_MODE", + "CUDNN_KNOB_TYPE_OCCUPANCY", + "CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK", + "CUDNN_KNOB_TYPE_MULTIPLY", + "CUDNN_KNOB_TYPE_LOAD_SIZE", + "CUDNN_KNOB_TYPE_LDGC", + "CUDNN_KNOB_TYPE_LDGB", + "CUDNN_KNOB_TYPE_LDGA", + "CUDNN_KNOB_TYPE_KERNEL_CFG", + "CUDNN_KNOB_TYPE_KBLOCK", + "CUDNN_KNOB_TYPE_IDX_MODE", + "CUDNN_KNOB_TYPE_EDGE", + "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", + "CUDNN_KNOB_TYPE_COUNTS", + "CUDNN_KNOB_TYPE_CHUNK_K", + "CUDNN_KNOB_TYPE_BLOCK_SIZE", + "CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD", + "CUDNN_INSTANCE_NORM", + "CUDNN_GROUP_NORM", + "CUDNN_GENSTATS_SUM_SQSUM", + "CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", + "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", + "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", + "CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", + "CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", + "CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", + "CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", + "CUDNN_FMA_MATH", + "CUDNN_ERRQUERY_RAWCODE", + "CUDNN_ERRQUERY_NONBLOCKING", + "CUDNN_ERRQUERY_BLOCKING", + "CUDNN_DIVNORM_PRECOMPUTED_MEANS", + "CUDNN_DIM_MAX", + "CUDNN_DETERMINISTIC", + "CUDNN_DEFAULT_REORDER", + "CUDNN_DEFAULT_MATH", + "CUDNN_DATA_UINT8x4", + "CUDNN_DATA_UINT8", + "CUDNN_DATA_INT8x32", + "CUDNN_DATA_FAST_FLOAT_FOR_FP8", + "CUDNN_DATA_BOOLEAN", + "CUDNN_CTC_ZERO_OOB_GRADIENTS", + "CUDNN_CTC_SKIP_OOB_GRADIENTS", + "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", + "CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", + "CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", + "CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", + "CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", + "CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", + "CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", + "CUDNN_CONVOLUTION_FWD_ALGO_COUNT", + "CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT", + "CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", + "CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", + "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", + "CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", + "CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", + "CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", + "CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", + "CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", + "CUDNN_BN_MIN_EPSILON", + "CUDNN_BN_FINALIZE_STATISTICS_TRAINING", + "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", + "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", + "CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", + "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", + "CUDNN_BATCH_NORM", + "CUDNN_BATCHNORM_SPATIAL_PERSISTENT", + "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", + "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", + "CUDNN_BATCHNORM_OPS_BN", + "CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", + "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", + "CUDNN_ATTR_OPERATIONGRAPH_IS_SAME_TOPOLOGY", + "CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", + "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", + "CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", + "CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", + "CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", + "CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", + "CUDNN_ATTN_WKIND_COUNT", + "CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", + "CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", + "CUDNN_ATTN_ENABLE_PROJ_BIASES", + "CUDNN_ATTN_DISABLE_PROJ_BIASES", + "CUDNN_ACTIVATION_SWISH" +); + sub warnMIOpenOnlyUnsupportedFunctions { my $line_num = shift; my $k = 0; - foreach $func ( - "cudnnWgradMode_t", - "cudnnTransformTensorEx", - "cudnnTransformFilter", - "cudnnTensorTransformStruct", - "cudnnTensorTransformDescriptor_t", - "cudnnTensorStruct", - "cudnnSpatialTransformerStruct", - "cudnnSpatialTransformerDescriptor_t", - "cudnnSpatialTfSamplerForward", - "cudnnSpatialTfSamplerBackward", - "cudnnSpatialTfGridGeneratorForward", - "cudnnSpatialTfGridGeneratorBackward", - "cudnnSignalMode_t", - "cudnnSeverity_t", - "cudnnSetTensorTransformDescriptor", - "cudnnSetTensorNdDescriptorEx", - "cudnnSetTensorNdDescriptor", - "cudnnSetTensor4dDescriptor", - "cudnnSetSpatialTransformerNdDescriptor", - "cudnnSetSeqDataDescriptor", - "cudnnSetRNNProjectionLayers", - "cudnnSetRNNPaddingMode", - "cudnnSetRNNMatrixMathType", - "cudnnSetRNNDescriptor_v8", - "cudnnSetRNNDescriptor_v5", - "cudnnSetRNNDescriptor", - "cudnnSetRNNDataDescriptor", - "cudnnSetRNNBiasMode", - "cudnnSetRNNAlgorithmDescriptor", - "cudnnSetPersistentRNNPlan", - "cudnnSetOpTensorDescriptor", - "cudnnSetFusedOpsVariantParamPackAttribute", - "cudnnSetFusedOpsConstParamPackAttribute", - "cudnnSetFilterNdDescriptor", - "cudnnSetFilter4dDescriptor", - "cudnnSetConvolutionReorderType", - "cudnnSetConvolutionNdDescriptor", - "cudnnSetConvolutionMathType", - "cudnnSetConvolution2dDescriptor", - "cudnnSetCallback", - "cudnnSetCTCLossDescriptor_v9", - "cudnnSetCTCLossDescriptor_v8", - "cudnnSetCTCLossDescriptorEx", - "cudnnSetAttnDescriptor", - "cudnnSetAlgorithmPerformance", - "cudnnSetAlgorithmDescriptor", - "cudnnSetActivationDescriptorSwishBeta", - "cudnnSetActivationDescriptor", - "cudnnSeqDataStruct", - "cudnnSeqDataDescriptor_t", - "cudnnSeqDataAxis_t", - "cudnnSaveAlgorithm", - "cudnnSamplerType_t", - "cudnnRuntimeTag_t", - "cudnnRestoreAlgorithm", - "cudnnResampleMode_t", - "cudnnReorderType_t", - "cudnnReorderFilterAndBias", - "cudnnReduceTensorStruct", - "cudnnRNNStruct", - "cudnnRNNSetClip_v9", - "cudnnRNNSetClip_v8", - "cudnnRNNSetClip", - "cudnnRNNGetClip_v9", - "cudnnRNNGetClip_v8", - "cudnnRNNGetClip", - "cudnnRNNForwardTrainingEx", - "cudnnRNNForwardInferenceEx", - "cudnnRNNForward", - "cudnnRNNDataStruct", - "cudnnRNNDataLayout_t", - "cudnnRNNDataDescriptor_t", - "cudnnRNNClipMode_t", - "cudnnRNNBackwardWeights_v8", - "cudnnRNNBackwardWeightsEx", - "cudnnRNNBackwardData_v8", - "cudnnRNNBackwardDataEx", - "cudnnQueryRuntimeError", - "cudnnPoolingStruct", - "cudnnPoolingForward", - "cudnnPoolingBackward", - "cudnnPersistentRNNPlan_t", - "cudnnPersistentRNNPlan", - "cudnnOpsVersionCheck", - "cudnnOpsTrainVersionCheck", - "cudnnOpsInferVersionCheck", - "cudnnOpTensorStruct", - "cudnnOpTensorDescriptor_t", - "cudnnOpTensor", - "cudnnNormalizationForwardTraining", - "cudnnNormalizationForwardInference", - "cudnnNormalizationBackward", - "cudnnNormOps_t", - "cudnnNormMode_t", - "cudnnNormAlgo_t", - "cudnnMultiHeadAttnWeightKind_t", - "cudnnMultiHeadAttnForward", - "cudnnMultiHeadAttnBackwardWeights", - "cudnnMultiHeadAttnBackwardData", - "cudnnMathType_t", - "cudnnMakeFusedOpsPlan", - "cudnnLossNormalizationMode_t", - "cudnnLRNStruct", - "cudnnLRNCrossChannelForward", - "cudnnLRNCrossChannelBackward", - "cudnnInitTransformDest", - "cudnnIm2Col", - "cudnnGraphVersionCheck", - "cudnnGetVersion", - "cudnnGetTensorTransformDescriptor", - "cudnnGetTensorSizeInBytes", - "cudnnGetTensorNdDescriptor", - "cudnnGetSeqDataDescriptor", - "cudnnGetRNNWeightSpaceSize", - "cudnnGetRNNWeightParams", - "cudnnGetRNNTempSpaceSizes", - "cudnnGetRNNProjectionLayers", - "cudnnGetRNNPaddingMode", - "cudnnGetRNNMatrixMathType", - "cudnnGetRNNLinLayerMatrixParams", - "cudnnGetRNNLinLayerBiasParams", - "cudnnGetRNNForwardTrainingAlgorithmMaxCount", - "cudnnGetRNNForwardInferenceAlgorithmMaxCount", - "cudnnGetRNNDescriptor_v8", - "cudnnGetRNNDataDescriptor", - "cudnnGetRNNBiasMode", - "cudnnGetRNNBackwardWeightsAlgorithmMaxCount", - "cudnnGetRNNBackwardDataAlgorithmMaxCount", - "cudnnGetProperty", - "cudnnGetOpTensorDescriptor", - "cudnnGetNormalizationTrainingReserveSpaceSize", - "cudnnGetNormalizationForwardTrainingWorkspaceSize", - "cudnnGetNormalizationBackwardWorkspaceSize", - "cudnnGetMultiHeadAttnWeights", - "cudnnGetMultiHeadAttnBuffers", - "cudnnGetMaxDeviceVersion", - "cudnnGetLastErrorString", - "cudnnGetFusedOpsVariantParamPackAttribute", - "cudnnGetFusedOpsConstParamPackAttribute", - "cudnnGetFoldedConvBackwardDataDescriptors", - "cudnnGetFilterSizeInBytes", - "cudnnGetFilterNdDescriptor", - "cudnnGetFilter4dDescriptor", - "cudnnGetCudartVersion", - "cudnnGetConvolutionReorderType", - "cudnnGetConvolutionNdForwardOutputDim", - "cudnnGetConvolutionNdDescriptor", - "cudnnGetConvolutionMathType", - "cudnnGetConvolutionGroupCount", - "cudnnGetConvolutionForwardAlgorithm_v7", - "cudnnGetConvolutionForwardAlgorithmMaxCount", - "cudnnGetConvolutionForwardAlgorithm", - "cudnnGetConvolutionBackwardFilterWorkspaceSize", - "cudnnGetConvolutionBackwardFilterAlgorithm_v7", - "cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", - "cudnnGetConvolutionBackwardFilterAlgorithm", - "cudnnGetConvolutionBackwardDataAlgorithm_v7", - "cudnnGetConvolutionBackwardDataAlgorithmMaxCount", - "cudnnGetConvolutionBackwardDataAlgorithm", - "cudnnGetConvolution2dDescriptor", - "cudnnGetCallback", - "cudnnGetCTCLossWorkspaceSize_v8", - "cudnnGetCTCLossDescriptor_v9", - "cudnnGetCTCLossDescriptor_v8", - "cudnnGetCTCLossDescriptorEx", - "cudnnGetBatchNormalizationTrainingExReserveSpaceSize", - "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", - "cudnnGetBatchNormalizationBackwardExWorkspaceSize", - "cudnnGetAttnDescriptor", - "cudnnGetAlgorithmSpaceSize", - "cudnnGetAlgorithmPerformance", - "cudnnGetAlgorithmDescriptor", - "cudnnGetActivationDescriptorSwishBeta", - "cudnnGetActivationDescriptor", - "cudnnGenStatsMode_t", - "cudnnFusedOps_t", - "cudnnFusedOpsVariantParamStruct", - "cudnnFusedOpsVariantParamPack_t", - "cudnnFusedOpsVariantParamLabel_t", - "cudnnFusedOpsPointerPlaceHolder_t", - "cudnnFusedOpsPlan_t", - "cudnnFusedOpsPlanStruct", - "cudnnFusedOpsExecute", - "cudnnFusedOpsConstParamStruct", - "cudnnFusedOpsConstParamPack_t", - "cudnnFusedOpsConstParamLabel_t", - "cudnnFraction_t", - "cudnnFractionStruct", - "cudnnFoldingDirection_t", - "cudnnFindRNNForwardTrainingAlgorithmEx", - "cudnnFindRNNForwardInferenceAlgorithmEx", - "cudnnFindRNNBackwardWeightsAlgorithmEx", - "cudnnFindRNNBackwardDataAlgorithmEx", - "cudnnFindConvolutionForwardAlgorithm", - "cudnnFindConvolutionBackwardFilterAlgorithmEx", - "cudnnFindConvolutionBackwardFilterAlgorithm", - "cudnnFindConvolutionBackwardDataAlgorithmEx", - "cudnnFindConvolutionBackwardDataAlgorithm", - "cudnnFilterStruct", - "cudnnErrQueryMode_t", - "cudnnDropoutStruct", - "cudnnDivisiveNormalizationForward", - "cudnnDivisiveNormalizationBackward", - "cudnnDivNormMode_t", - "cudnnDeterminism_t", - "cudnnDestroyTensorTransformDescriptor", - "cudnnDestroySpatialTransformerDescriptor", - "cudnnDestroySeqDataDescriptor", - "cudnnDestroyRNNDataDescriptor", - "cudnnDestroyPersistentRNNPlan", - "cudnnDestroyOpTensorDescriptor", - "cudnnDestroyFusedOpsVariantParamPack", - "cudnnDestroyFusedOpsPlan", - "cudnnDestroyFusedOpsConstParamPack", - "cudnnDestroyFilterDescriptor", - "cudnnDestroyAttnDescriptor", - "cudnnDestroyAlgorithmPerformance", - "cudnnDestroyAlgorithmDescriptor", - "cudnnDeriveNormTensorDescriptor", - "cudnnDebug_t", - "cudnnDebugStruct", - "cudnnCreateTensorTransformDescriptor", - "cudnnCreateSpatialTransformerDescriptor", - "cudnnCreateSeqDataDescriptor", - "cudnnCreateRNNDataDescriptor", - "cudnnCreatePersistentRNNPlan", - "cudnnCreateOpTensorDescriptor", - "cudnnCreateFusedOpsVariantParamPack", - "cudnnCreateFusedOpsPlan", - "cudnnCreateFusedOpsConstParamPack", - "cudnnCreateFilterDescriptor", - "cudnnCreateAttnDescriptor", - "cudnnCreateAlgorithmPerformance", - "cudnnCreateAlgorithmDescriptor", - "cudnnCopyAlgorithmDescriptor", - "cudnnConvolutionStruct", - "cudnnConvolutionFwdPreference_t", - "cudnnConvolutionBwdFilterPreference_t", - "cudnnConvolutionBwdFilterAlgo_t", - "cudnnConvolutionBwdFilterAlgoPerf_t", - "cudnnConvolutionBwdFilterAlgoPerfStruct", - "cudnnConvolutionBwdDataPreference_t", - "cudnnConvolutionBackwardFilter", - "cudnnCnnTrainVersionCheck", - "cudnnCnnInferVersionCheck", - "cudnnCallback_t", - "cudnnCTCLoss_v8", - "cudnnCTCLossStruct", - "cudnnCTCGradMode_t", - "cudnnBuildRNNDynamic", - "cudnnBnFinalizeStatsMode_t", - "cudnnBatchNormalizationForwardTrainingEx", - "cudnnBatchNormalizationBackwardEx", - "cudnnBatchNormOps_t", - "cudnnBackendUpdateCudaGraph", - "cudnnBackendTensorReordering_t", - "cudnnBackendPopulateCudaGraph", - "cudnnBackendNumericalNote_t", - "cudnnBackendNormMode_t", - "cudnnBackendNormFwdPhase_t", - "cudnnBackendLayoutType_t", - "cudnnBackendKnobType_t", - "cudnnBackendInitialize", - "cudnnBackendBehaviorNote_t", - "cudnnAttnStruct", - "cudnnAttnQueryMap_t", - "cudnnAttnDescriptor_t", - "cudnnAlgorithm_t", - "cudnnAlgorithmUnionStruct", - "cudnnAlgorithmStruct", - "cudnnAlgorithmPerformance_t", - "cudnnAlgorithmPerformanceStruct", - "cudnnAlgorithmDescriptor_t", - "cudnnAdvVersionCheck", - "cudnnAdvTrainVersionCheck", - "cudnnAdvInferVersionCheck", - "cudnnAddTensor", - "cudnnActivationStruct", - "CUDNN_WGRAD_MODE_SET", - "CUDNN_WGRAD_MODE_ADD", - "CUDNN_TRANSFORM_UNFOLD", - "CUDNN_TRANSFORM_FOLD", - "CUDNN_TENSOR_REORDERING_NONE", - "CUDNN_TENSOR_REORDERING_INT8x32", - "CUDNN_TENSOR_REORDERING_F16x16", - "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", - "CUDNN_TENSOR_OP_MATH", - "CUDNN_TENSOR_NCHW_VECT_C", - "CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", - "CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", - "CUDNN_STATUS_SPECIFIC_ERROR", - "CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", - "CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", - "CUDNN_STATUS_RUNTIME_IN_PROGRESS", - "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", - "CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE", - "CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", - "CUDNN_STATUS_NOT_SUPPORTED_SHAPE", - "CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING", - "CUDNN_STATUS_NOT_SUPPORTED_PADDING", - "CUDNN_STATUS_NOT_SUPPORTED_LAYOUT", - "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER", - "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART", - "CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", - "CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", - "CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", - "CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", - "CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH", - "CUDNN_STATUS_MAPPING_ERROR", - "CUDNN_STATUS_LICENSE_ERROR", - "CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", - "CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM", - "CUDNN_STATUS_FULL_ERROR_CODE", - "CUDNN_STATUS_EXECUTION_FAILED_CURAND", - "CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER", - "CUDNN_STATUS_EXECUTION_FAILED_CUDART", - "CUDNN_STATUS_EXECUTION_FAILED_CUBLAS", - "CUDNN_STATUS_EXECUTION_FAILED", - "CUDNN_STATUS_DEPRECATED", - "CUDNN_STATUS_CATEGORY", - "CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT", - "CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND", - "CUDNN_STATUS_BAD_PARAM_NULL_POINTER", - "CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED", - "CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER", - "CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", - "CUDNN_STATUS_BAD_PARAM_DESCRIPTOR_TYPE", - "CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", - "CUDNN_STATUS_ARCH_MISMATCH", - "CUDNN_SIGNAL_WAIT", - "CUDNN_SIGNAL_SET", - "CUDNN_SEV_WARNING_EN", - "CUDNN_SEV_WARNING", - "CUDNN_SEV_INFO_EN", - "CUDNN_SEV_INFO", - "CUDNN_SEV_FATAL", - "CUDNN_SEV_ERROR_EN", - "CUDNN_SEV_ERROR", - "CUDNN_SEQDATA_VECT_DIM", - "CUDNN_SEQDATA_TIME_DIM", - "CUDNN_SEQDATA_DIM_COUNT", - "CUDNN_SEQDATA_BEAM_DIM", - "CUDNN_SEQDATA_BATCH_DIM", - "CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", - "CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", - "CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", - "CUDNN_SCALAR_DOUBLE_BN_EPSILON", - "CUDNN_SAMPLER_BILINEAR", - "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", - "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", - "CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", - "CUDNN_RNN_CLIP_NONE", - "CUDNN_RNN_CLIP_MINMAX", - "CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H", - "CUDNN_RNN_ALGO_PERSIST_STATIC", - "CUDNN_RNN_ALGO_PERSIST_DYNAMIC", - "CUDNN_RNN_ALGO_COUNT", - "CUDNN_RMS_NORM", - "CUDNN_RESAMPLE_NEAREST", - "CUDNN_RESAMPLE_MAXPOOL", - "CUDNN_RESAMPLE_BILINEAR", - "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", - "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", - "CUDNN_RESAMPLE_AVGPOOL", - "CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS", - "CUDNN_PTR_ZDATA", - "CUDNN_PTR_YSUM", - "CUDNN_PTR_YSQSUM", - "CUDNN_PTR_YDATA", - "CUDNN_PTR_XDATA", - "CUDNN_PTR_WORKSPACE", - "CUDNN_PTR_WDATA", - "CUDNN_PTR_NULL", - "CUDNN_PTR_ELEM_ALIGNED", - "CUDNN_PTR_DZDATA", - "CUDNN_PTR_DYDATA", - "CUDNN_PTR_DXDATA", - "CUDNN_PTR_DWDATA", - "CUDNN_PTR_BN_Z_EQSCALE", - "CUDNN_PTR_BN_Z_EQBIAS", - "CUDNN_PTR_BN_SCALE", - "CUDNN_PTR_BN_SAVED_MEAN", - "CUDNN_PTR_BN_SAVED_INVSTD", - "CUDNN_PTR_BN_RUNNING_VAR", - "CUDNN_PTR_BN_RUNNING_MEAN", - "CUDNN_PTR_BN_EQSCALE", - "CUDNN_PTR_BN_EQBIAS", - "CUDNN_PTR_BN_DSCALE", - "CUDNN_PTR_BN_DBIAS", - "CUDNN_PTR_BN_BIAS", - "CUDNN_PTR_ACTIVATION_BITMASK", - "CUDNN_PTR_16B_ALIGNED", - "CUDNN_POOLING_MAX_DETERMINISTIC", - "CUDNN_POINTWISE_ATAN2", - "CUDNN_PARAM_ZDESC", - "CUDNN_PARAM_ZDATA_PLACEHOLDER", - "CUDNN_PARAM_YSUM_PLACEHOLDER", - "CUDNN_PARAM_YSTATS_DESC", - "CUDNN_PARAM_YSQSUM_PLACEHOLDER", - "CUDNN_PARAM_YDESC", - "CUDNN_PARAM_YDATA_PLACEHOLDER", - "CUDNN_PARAM_XDESC", - "CUDNN_PARAM_XDATA_PLACEHOLDER", - "CUDNN_PARAM_WDESC", - "CUDNN_PARAM_WDATA_PLACEHOLDER", - "CUDNN_PARAM_DZDESC", - "CUDNN_PARAM_DZDATA_PLACEHOLDER", - "CUDNN_PARAM_DYDESC", - "CUDNN_PARAM_DYDATA_PLACEHOLDER", - "CUDNN_PARAM_DXDESC", - "CUDNN_PARAM_DXDATA_PLACEHOLDER", - "CUDNN_PARAM_DWDESC", - "CUDNN_PARAM_DWDATA_PLACEHOLDER", - "CUDNN_PARAM_CONV_DESC", - "CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", - "CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_SCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", - "CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", - "CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", - "CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", - "CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", - "CUDNN_PARAM_BN_MODE", - "CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_EQSCALEBIAS_DESC", - "CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_BIAS_PLACEHOLDER", - "CUDNN_PARAM_ACTIVATION_DESC", - "CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", - "CUDNN_PARAM_ACTIVATION_BITMASK_DESC", - "CUDNN_OP_TENSOR_SQRT", - "CUDNN_OP_TENSOR_NOT", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", - "CUDNN_NUMERICAL_NOTE_WINOGRAD", - "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", - "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", - "CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP", - "CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION", - "CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", - "CUDNN_NUMERICAL_NOTE_FFT", - "CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", - "CUDNN_NO_REORDER", - "CUDNN_NORM_PER_CHANNEL", - "CUDNN_NORM_PER_ACTIVATION", - "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", - "CUDNN_NORM_OPS_NORM_ACTIVATION", - "CUDNN_NORM_OPS_NORM", - "CUDNN_NORM_FWD_TRAINING", - "CUDNN_NORM_FWD_INFERENCE", - "CUDNN_NORM_ALGO_STANDARD", - "CUDNN_NORM_ALGO_PERSIST", - "CUDNN_NON_DETERMINISTIC", - "CUDNN_MH_ATTN_V_WEIGHTS", - "CUDNN_MH_ATTN_V_BIASES", - "CUDNN_MH_ATTN_Q_WEIGHTS", - "CUDNN_MH_ATTN_Q_BIASES", - "CUDNN_MH_ATTN_O_WEIGHTS", - "CUDNN_MH_ATTN_O_BIASES", - "CUDNN_MH_ATTN_K_WEIGHTS", - "CUDNN_MH_ATTN_K_BIASES", - "CUDNN_LRN_MIN_N", - "CUDNN_LRN_MIN_K", - "CUDNN_LRN_MIN_BETA", - "CUDNN_LRN_MAX_N", - "CUDNN_LOSS_NORMALIZATION_SOFTMAX", - "CUDNN_LOSS_NORMALIZATION_NONE", - "CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK", - "CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK", - "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", - "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", - "CUDNN_LAYOUT_TYPE_COUNT", - "CUDNN_LAYER_NORM", - "CUDNN_KNOB_TYPE_WORKSPACE", - "CUDNN_KNOB_TYPE_WINO_TILE", - "CUDNN_KNOB_TYPE_USE_TEX", - "CUDNN_KNOB_TYPE_TILE_SIZE", - "CUDNN_KNOB_TYPE_TILE_ROWS", - "CUDNN_KNOB_TYPE_TILE_COLS", - "CUDNN_KNOB_TYPE_TILE_CGA_N", - "CUDNN_KNOB_TYPE_TILE_CGA_M", - "CUDNN_KNOB_TYPE_TILE_CGA", - "CUDNN_KNOB_TYPE_TILEK", - "CUDNN_KNOB_TYPE_SWIZZLE", - "CUDNN_KNOB_TYPE_STAGES", - "CUDNN_KNOB_TYPE_SPLIT_RS", - "CUDNN_KNOB_TYPE_SPLIT_K_SLC", - "CUDNN_KNOB_TYPE_SPLIT_K_BUF", - "CUDNN_KNOB_TYPE_SPLIT_K", - "CUDNN_KNOB_TYPE_SPLIT_H", - "CUDNN_KNOB_TYPE_SPLIT_COLS", - "CUDNN_KNOB_TYPE_SPECFILT", - "CUDNN_KNOB_TYPE_SLICED", - "CUDNN_KNOB_TYPE_SINGLEBUFFER", - "CUDNN_KNOB_TYPE_REDUCTION_MODE", - "CUDNN_KNOB_TYPE_OCCUPANCY", - "CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK", - "CUDNN_KNOB_TYPE_MULTIPLY", - "CUDNN_KNOB_TYPE_LOAD_SIZE", - "CUDNN_KNOB_TYPE_LDGC", - "CUDNN_KNOB_TYPE_LDGB", - "CUDNN_KNOB_TYPE_LDGA", - "CUDNN_KNOB_TYPE_KERNEL_CFG", - "CUDNN_KNOB_TYPE_KBLOCK", - "CUDNN_KNOB_TYPE_IDX_MODE", - "CUDNN_KNOB_TYPE_EDGE", - "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", - "CUDNN_KNOB_TYPE_COUNTS", - "CUDNN_KNOB_TYPE_CHUNK_K", - "CUDNN_KNOB_TYPE_BLOCK_SIZE", - "CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD", - "CUDNN_INSTANCE_NORM", - "CUDNN_GROUP_NORM", - "CUDNN_GENSTATS_SUM_SQSUM", - "CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", - "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", - "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", - "CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", - "CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", - "CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", - "CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", - "CUDNN_FMA_MATH", - "CUDNN_ERRQUERY_RAWCODE", - "CUDNN_ERRQUERY_NONBLOCKING", - "CUDNN_ERRQUERY_BLOCKING", - "CUDNN_DIVNORM_PRECOMPUTED_MEANS", - "CUDNN_DIM_MAX", - "CUDNN_DETERMINISTIC", - "CUDNN_DEFAULT_REORDER", - "CUDNN_DEFAULT_MATH", - "CUDNN_DATA_UINT8x4", - "CUDNN_DATA_UINT8", - "CUDNN_DATA_INT8x32", - "CUDNN_DATA_FAST_FLOAT_FOR_FP8", - "CUDNN_DATA_BOOLEAN", - "CUDNN_CTC_ZERO_OOB_GRADIENTS", - "CUDNN_CTC_SKIP_OOB_GRADIENTS", - "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", - "CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", - "CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", - "CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", - "CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", - "CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", - "CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", - "CUDNN_CONVOLUTION_FWD_ALGO_COUNT", - "CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT", - "CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", - "CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", - "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", - "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", - "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", - "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", - "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", - "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3", - "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", - "CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", - "CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", - "CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", - "CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", - "CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", - "CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", - "CUDNN_BN_MIN_EPSILON", - "CUDNN_BN_FINALIZE_STATISTICS_TRAINING", - "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", - "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", - "CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", - "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", - "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", - "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", - "CUDNN_BATCH_NORM", - "CUDNN_BATCHNORM_SPATIAL_PERSISTENT", - "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", - "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", - "CUDNN_BATCHNORM_OPS_BN", - "CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", - "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", - "CUDNN_ATTR_OPERATIONGRAPH_IS_SAME_TOPOLOGY", - "CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", - "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", - "CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", - "CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", - "CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", - "CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", - "CUDNN_ATTN_WKIND_COUNT", - "CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", - "CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", - "CUDNN_ATTN_ENABLE_PROJ_BIASES", - "CUDNN_ATTN_DISABLE_PROJ_BIASES", - "CUDNN_ACTIVATION_SWISH" - ) + while (my($func) = each %MIOpenOnlyUnsupportedFunctions) { my $mt = m/($func)/g; if ($mt) { @@ -13528,906 +13538,908 @@ sub warnMIOpenOnlyUnsupportedFunctions { return $k; } +@HipDNNOnlyUnsupportedFunctions = ( + "cudnnWgradMode_t", + "cudnnTransformTensorEx", + "cudnnTransformTensor", + "cudnnTransformFilter", + "cudnnTensorTransformStruct", + "cudnnTensorTransformDescriptor_t", + "cudnnTensorStruct", + "cudnnSpatialTransformerStruct", + "cudnnSpatialTransformerDescriptor_t", + "cudnnSpatialTfSamplerForward", + "cudnnSpatialTfSamplerBackward", + "cudnnSpatialTfGridGeneratorForward", + "cudnnSpatialTfGridGeneratorBackward", + "cudnnSignalMode_t", + "cudnnSeverity_t", + "cudnnSetTensorTransformDescriptor", + "cudnnSetTensorNdDescriptorEx", + "cudnnSetSpatialTransformerNdDescriptor", + "cudnnSetSeqDataDescriptor", + "cudnnSetRNNProjectionLayers", + "cudnnSetRNNPaddingMode", + "cudnnSetRNNMatrixMathType", + "cudnnSetRNNDescriptor_v8", + "cudnnSetRNNDataDescriptor", + "cudnnSetRNNBiasMode", + "cudnnSetRNNAlgorithmDescriptor", + "cudnnSetFusedOpsVariantParamPackAttribute", + "cudnnSetFusedOpsConstParamPackAttribute", + "cudnnSetConvolutionReorderType", + "cudnnSetCallback", + "cudnnSetCTCLossDescriptor_v9", + "cudnnSetCTCLossDescriptor_v8", + "cudnnSetCTCLossDescriptorEx", + "cudnnSetCTCLossDescriptor", + "cudnnSetAttnDescriptor", + "cudnnSetAlgorithmPerformance", + "cudnnSetAlgorithmDescriptor", + "cudnnSetActivationDescriptorSwishBeta", + "cudnnSeqDataStruct", + "cudnnSeqDataDescriptor_t", + "cudnnSeqDataAxis_t", + "cudnnSaveAlgorithm", + "cudnnSamplerType_t", + "cudnnRuntimeTag_t", + "cudnnRngDistribution_t", + "cudnnRestoreDropoutDescriptor", + "cudnnRestoreAlgorithm", + "cudnnResampleMode_t", + "cudnnReorderType_t", + "cudnnReorderFilterAndBias", + "cudnnReduceTensorStruct", + "cudnnRNNStruct", + "cudnnRNNSetClip_v9", + "cudnnRNNSetClip_v8", + "cudnnRNNSetClip", + "cudnnRNNPaddingMode_t", + "cudnnRNNGetClip_v9", + "cudnnRNNGetClip_v8", + "cudnnRNNGetClip", + "cudnnRNNForwardTrainingEx", + "cudnnRNNForwardInferenceEx", + "cudnnRNNForward", + "cudnnRNNDataStruct", + "cudnnRNNDataLayout_t", + "cudnnRNNDataDescriptor_t", + "cudnnRNNClipMode_t", + "cudnnRNNBackwardWeights_v8", + "cudnnRNNBackwardWeightsEx", + "cudnnRNNBackwardData_v8", + "cudnnRNNBackwardDataEx", + "cudnnQueryRuntimeError", + "cudnnPoolingStruct", + "cudnnPointwiseMode_t", + "cudnnPersistentRNNPlan", + "cudnnPaddingMode_t", + "cudnnOpsVersionCheck", + "cudnnOpsTrainVersionCheck", + "cudnnOpsInferVersionCheck", + "cudnnOpTensorStruct", + "cudnnNormalizationForwardTraining", + "cudnnNormalizationForwardInference", + "cudnnNormalizationBackward", + "cudnnNormOps_t", + "cudnnNormMode_t", + "cudnnNormAlgo_t", + "cudnnMultiHeadAttnWeightKind_t", + "cudnnMultiHeadAttnForward", + "cudnnMultiHeadAttnBackwardWeights", + "cudnnMultiHeadAttnBackwardData", + "cudnnMakeFusedOpsPlan", + "cudnnLossNormalizationMode_t", + "cudnnLRNStruct", + "cudnnInitTransformDest", + "cudnnIm2Col", + "cudnnGraphVersionCheck", + "cudnnGetTensorTransformDescriptor", + "cudnnGetTensorSizeInBytes", + "cudnnGetSeqDataDescriptor", + "cudnnGetReductionIndicesSize", + "cudnnGetRNNWeightSpaceSize", + "cudnnGetRNNWeightParams", + "cudnnGetRNNTempSpaceSizes", + "cudnnGetRNNProjectionLayers", + "cudnnGetRNNPaddingMode", + "cudnnGetRNNMatrixMathType", + "cudnnGetRNNForwardTrainingAlgorithmMaxCount", + "cudnnGetRNNForwardInferenceAlgorithmMaxCount", + "cudnnGetRNNDescriptor_v8", + "cudnnGetRNNDescriptor_v6", + "cudnnGetRNNDataDescriptor", + "cudnnGetRNNBiasMode", + "cudnnGetRNNBackwardWeightsAlgorithmMaxCount", + "cudnnGetRNNBackwardDataAlgorithmMaxCount", + "cudnnGetProperty", + "cudnnGetPoolingNdForwardOutputDim", + "cudnnGetPoolingNdDescriptor", + "cudnnGetNormalizationTrainingReserveSpaceSize", + "cudnnGetNormalizationForwardTrainingWorkspaceSize", + "cudnnGetNormalizationBackwardWorkspaceSize", + "cudnnGetMultiHeadAttnWeights", + "cudnnGetMultiHeadAttnBuffers", + "cudnnGetMaxDeviceVersion", + "cudnnGetLastErrorString", + "cudnnGetFusedOpsVariantParamPackAttribute", + "cudnnGetFusedOpsConstParamPackAttribute", + "cudnnGetFoldedConvBackwardDataDescriptors", + "cudnnGetFilterSizeInBytes", + "cudnnGetDropoutDescriptor", + "cudnnGetCudartVersion", + "cudnnGetConvolutionReorderType", + "cudnnGetConvolutionNdForwardOutputDim", + "cudnnGetConvolutionNdDescriptor", + "cudnnGetConvolutionMathType", + "cudnnGetConvolutionGroupCount", + "cudnnGetConvolutionForwardAlgorithm_v7", + "cudnnGetConvolutionForwardAlgorithmMaxCount", + "cudnnGetConvolutionBackwardFilterAlgorithm_v7", + "cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", + "cudnnGetConvolutionBackwardDataAlgorithm_v7", + "cudnnGetConvolutionBackwardDataAlgorithmMaxCount", + "cudnnGetCallback", + "cudnnGetCTCLossWorkspaceSize_v8", + "cudnnGetCTCLossWorkspaceSize", + "cudnnGetCTCLossDescriptor_v9", + "cudnnGetCTCLossDescriptor_v8", + "cudnnGetCTCLossDescriptorEx", + "cudnnGetCTCLossDescriptor", + "cudnnGetBatchNormalizationTrainingExReserveSpaceSize", + "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", + "cudnnGetBatchNormalizationBackwardExWorkspaceSize", + "cudnnGetAttnDescriptor", + "cudnnGetAlgorithmSpaceSize", + "cudnnGetAlgorithmPerformance", + "cudnnGetAlgorithmDescriptor", + "cudnnGetActivationDescriptorSwishBeta", + "cudnnGenStatsMode_t", + "cudnnFusedOps_t", + "cudnnFusedOpsVariantParamStruct", + "cudnnFusedOpsVariantParamPack_t", + "cudnnFusedOpsVariantParamLabel_t", + "cudnnFusedOpsPointerPlaceHolder_t", + "cudnnFusedOpsPlan_t", + "cudnnFusedOpsPlanStruct", + "cudnnFusedOpsExecute", + "cudnnFusedOpsConstParamStruct", + "cudnnFusedOpsConstParamPack_t", + "cudnnFusedOpsConstParamLabel_t", + "cudnnFraction_t", + "cudnnFractionStruct", + "cudnnForwardMode_t", + "cudnnFoldingDirection_t", + "cudnnFindRNNForwardTrainingAlgorithmEx", + "cudnnFindRNNForwardInferenceAlgorithmEx", + "cudnnFindRNNBackwardWeightsAlgorithmEx", + "cudnnFindRNNBackwardDataAlgorithmEx", + "cudnnFilterStruct", + "cudnnErrQueryMode_t", + "cudnnDropoutStruct", + "cudnnDropoutGetReserveSpaceSize", + "cudnnDropoutForward", + "cudnnDropoutBackward", + "cudnnDivisiveNormalizationForward", + "cudnnDivisiveNormalizationBackward", + "cudnnDivNormMode_t", + "cudnnDeterminism_t", + "cudnnDestroyTensorTransformDescriptor", + "cudnnDestroySpatialTransformerDescriptor", + "cudnnDestroySeqDataDescriptor", + "cudnnDestroyRNNDataDescriptor", + "cudnnDestroyFusedOpsVariantParamPack", + "cudnnDestroyFusedOpsPlan", + "cudnnDestroyFusedOpsConstParamPack", + "cudnnDestroyCTCLossDescriptor", + "cudnnDestroyAttnDescriptor", + "cudnnDestroyAlgorithmPerformance", + "cudnnDestroyAlgorithmDescriptor", + "cudnnDeriveNormTensorDescriptor", + "cudnnDebug_t", + "cudnnDebugStruct", + "cudnnCreateTensorTransformDescriptor", + "cudnnCreateSpatialTransformerDescriptor", + "cudnnCreateSeqDataDescriptor", + "cudnnCreateRNNDataDescriptor", + "cudnnCreateFusedOpsVariantParamPack", + "cudnnCreateFusedOpsPlan", + "cudnnCreateFusedOpsConstParamPack", + "cudnnCreateCTCLossDescriptor", + "cudnnCreateAttnDescriptor", + "cudnnCreateAlgorithmPerformance", + "cudnnCreateAlgorithmDescriptor", + "cudnnCopyAlgorithmDescriptor", + "cudnnConvolutionStruct", + "cudnnConvolutionBiasActivationForward", + "cudnnContext", + "cudnnCnnTrainVersionCheck", + "cudnnCnnInferVersionCheck", + "cudnnCallback_t", + "cudnnCTCLoss_v8", + "cudnnCTCLossStruct", + "cudnnCTCLossDescriptor_t", + "cudnnCTCLossAlgo_t", + "cudnnCTCLoss", + "cudnnCTCGradMode_t", + "cudnnBuildRNNDynamic", + "cudnnBnFinalizeStatsMode_t", + "cudnnBatchNormalizationForwardTrainingEx", + "cudnnBatchNormalizationBackwardEx", + "cudnnBatchNormOps_t", + "cudnnBackendUpdateCudaGraph", + "cudnnBackendTensorReordering_t", + "cudnnBackendSetAttribute", + "cudnnBackendPopulateCudaGraph", + "cudnnBackendNumericalNote_t", + "cudnnBackendNormMode_t", + "cudnnBackendNormFwdPhase_t", + "cudnnBackendLayoutType_t", + "cudnnBackendKnobType_t", + "cudnnBackendInitialize", + "cudnnBackendHeurMode_t", + "cudnnBackendGetAttribute", + "cudnnBackendFinalize", + "cudnnBackendExecute", + "cudnnBackendDestroyDescriptor", + "cudnnBackendDescriptor_t", + "cudnnBackendDescriptorType_t", + "cudnnBackendCreateDescriptor", + "cudnnBackendBehaviorNote_t", + "cudnnBackendAttributeType_t", + "cudnnBackendAttributeName_t", + "cudnnAttnStruct", + "cudnnAttnQueryMap_t", + "cudnnAttnDescriptor_t", + "cudnnAlgorithm_t", + "cudnnAlgorithmUnionStruct", + "cudnnAlgorithmStruct", + "cudnnAlgorithmPerformance_t", + "cudnnAlgorithmPerformanceStruct", + "cudnnAlgorithmDescriptor_t", + "cudnnAdvVersionCheck", + "cudnnAdvTrainVersionCheck", + "cudnnAdvInferVersionCheck", + "cudnnActivationStruct", + "CUDNN_ZERO_PAD", + "CUDNN_WGRAD_MODE_SET", + "CUDNN_WGRAD_MODE_ADD", + "CUDNN_TYPE_VOID_PTR", + "CUDNN_TYPE_TENSOR_REORDERING_MODE", + "CUDNN_TYPE_SIGNAL_MODE", + "CUDNN_TYPE_RNG_DISTRIBUTION", + "CUDNN_TYPE_RESAMPLE_MODE", + "CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", + "CUDNN_TYPE_POINTWISE_MODE", + "CUDNN_TYPE_PADDING_MODE", + "CUDNN_TYPE_NUMERICAL_NOTE", + "CUDNN_TYPE_NORM_MODE", + "CUDNN_TYPE_NORM_FWD_PHASE", + "CUDNN_TYPE_NAN_PROPOGATION", + "CUDNN_TYPE_LAYOUT_TYPE", + "CUDNN_TYPE_KNOB_TYPE", + "CUDNN_TYPE_INT64", + "CUDNN_TYPE_INT32", + "CUDNN_TYPE_HEUR_MODE", + "CUDNN_TYPE_HANDLE", + "CUDNN_TYPE_GENSTATS_MODE", + "CUDNN_TYPE_FRACTION", + "CUDNN_TYPE_FLOAT", + "CUDNN_TYPE_DOUBLE", + "CUDNN_TYPE_DATA_TYPE", + "CUDNN_TYPE_CONVOLUTION_MODE", + "CUDNN_TYPE_CHAR", + "CUDNN_TYPE_BOOLEAN", + "CUDNN_TYPE_BN_FINALIZE_STATS_MODE", + "CUDNN_TYPE_BEHAVIOR_NOTE", + "CUDNN_TYPE_BACKEND_DESCRIPTOR", + "CUDNN_TYPE_ATTRIB_NAME", + "CUDNN_TRANSFORM_UNFOLD", + "CUDNN_TRANSFORM_FOLD", + "CUDNN_TENSOR_REORDERING_NONE", + "CUDNN_TENSOR_REORDERING_INT8x32", + "CUDNN_TENSOR_REORDERING_F16x16", + "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", + "CUDNN_STATUS_VERSION_MISMATCH", + "CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", + "CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", + "CUDNN_STATUS_SPECIFIC_ERROR", + "CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", + "CUDNN_STATUS_RUNTIME_IN_PROGRESS", + "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", + "CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE", + "CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", + "CUDNN_STATUS_NOT_SUPPORTED_SHAPE", + "CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING", + "CUDNN_STATUS_NOT_SUPPORTED_PADDING", + "CUDNN_STATUS_NOT_SUPPORTED_LAYOUT", + "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER", + "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART", + "CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", + "CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", + "CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", + "CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", + "CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH", + "CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", + "CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", + "CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM", + "CUDNN_STATUS_FULL_ERROR_CODE", + "CUDNN_STATUS_EXECUTION_FAILED_CURAND", + "CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER", + "CUDNN_STATUS_EXECUTION_FAILED_CUDART", + "CUDNN_STATUS_EXECUTION_FAILED_CUBLAS", + "CUDNN_STATUS_DEPRECATED", + "CUDNN_STATUS_CATEGORY", + "CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT", + "CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND", + "CUDNN_STATUS_BAD_PARAM_NULL_POINTER", + "CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED", + "CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER", + "CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", + "CUDNN_STATUS_BAD_PARAM_DESCRIPTOR_TYPE", + "CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", + "CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", + "CUDNN_SIGNAL_WAIT", + "CUDNN_SIGNAL_SET", + "CUDNN_SEV_WARNING_EN", + "CUDNN_SEV_WARNING", + "CUDNN_SEV_INFO_EN", + "CUDNN_SEV_INFO", + "CUDNN_SEV_FATAL", + "CUDNN_SEV_ERROR_EN", + "CUDNN_SEV_ERROR", + "CUDNN_SEQDATA_VECT_DIM", + "CUDNN_SEQDATA_TIME_DIM", + "CUDNN_SEQDATA_DIM_COUNT", + "CUDNN_SEQDATA_BEAM_DIM", + "CUDNN_SEQDATA_BATCH_DIM", + "CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", + "CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", + "CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", + "CUDNN_SCALAR_DOUBLE_BN_EPSILON", + "CUDNN_SAMPLER_BILINEAR", + "CUDNN_RNN_PADDED_IO_ENABLED", + "CUDNN_RNN_PADDED_IO_DISABLED", + "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", + "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", + "CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", + "CUDNN_RNN_CLIP_NONE", + "CUDNN_RNN_CLIP_MINMAX", + "CUDNN_RNN_ALGO_COUNT", + "CUDNN_RNG_DISTRIBUTION_UNIFORM", + "CUDNN_RNG_DISTRIBUTION_NORMAL", + "CUDNN_RNG_DISTRIBUTION_BERNOULLI", + "CUDNN_RMS_NORM", + "CUDNN_RESAMPLE_NEAREST", + "CUDNN_RESAMPLE_MAXPOOL", + "CUDNN_RESAMPLE_BILINEAR", + "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", + "CUDNN_RESAMPLE_AVGPOOL", + "CUDNN_PTR_ZDATA", + "CUDNN_PTR_YSUM", + "CUDNN_PTR_YSQSUM", + "CUDNN_PTR_YDATA", + "CUDNN_PTR_XDATA", + "CUDNN_PTR_WORKSPACE", + "CUDNN_PTR_WDATA", + "CUDNN_PTR_NULL", + "CUDNN_PTR_ELEM_ALIGNED", + "CUDNN_PTR_DZDATA", + "CUDNN_PTR_DYDATA", + "CUDNN_PTR_DXDATA", + "CUDNN_PTR_DWDATA", + "CUDNN_PTR_BN_Z_EQSCALE", + "CUDNN_PTR_BN_Z_EQBIAS", + "CUDNN_PTR_BN_SCALE", + "CUDNN_PTR_BN_SAVED_MEAN", + "CUDNN_PTR_BN_SAVED_INVSTD", + "CUDNN_PTR_BN_RUNNING_VAR", + "CUDNN_PTR_BN_RUNNING_MEAN", + "CUDNN_PTR_BN_EQSCALE", + "CUDNN_PTR_BN_EQBIAS", + "CUDNN_PTR_BN_DSCALE", + "CUDNN_PTR_BN_DBIAS", + "CUDNN_PTR_BN_BIAS", + "CUDNN_PTR_ACTIVATION_BITMASK", + "CUDNN_PTR_16B_ALIGNED", + "CUDNN_POINTWISE_TANH_FWD", + "CUDNN_POINTWISE_TANH_BWD", + "CUDNN_POINTWISE_TAN", + "CUDNN_POINTWISE_SWISH_FWD", + "CUDNN_POINTWISE_SWISH_BWD", + "CUDNN_POINTWISE_SUB", + "CUDNN_POINTWISE_SQRT", + "CUDNN_POINTWISE_SOFTPLUS_FWD", + "CUDNN_POINTWISE_SOFTPLUS_BWD", + "CUDNN_POINTWISE_SIN", + "CUDNN_POINTWISE_SIGMOID_FWD", + "CUDNN_POINTWISE_SIGMOID_BWD", + "CUDNN_POINTWISE_RSQRT", + "CUDNN_POINTWISE_RELU_FWD", + "CUDNN_POINTWISE_RELU_BWD", + "CUDNN_POINTWISE_RECIPROCAL", + "CUDNN_POINTWISE_POW", + "CUDNN_POINTWISE_NEG", + "CUDNN_POINTWISE_MUL", + "CUDNN_POINTWISE_MOD", + "CUDNN_POINTWISE_MIN", + "CUDNN_POINTWISE_MAX", + "CUDNN_POINTWISE_LOGICAL_OR", + "CUDNN_POINTWISE_LOGICAL_NOT", + "CUDNN_POINTWISE_LOGICAL_AND", + "CUDNN_POINTWISE_LOG", + "CUDNN_POINTWISE_IDENTITY", + "CUDNN_POINTWISE_GEN_INDEX", + "CUDNN_POINTWISE_GELU_FWD", + "CUDNN_POINTWISE_GELU_BWD", + "CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", + "CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", + "CUDNN_POINTWISE_FLOOR", + "CUDNN_POINTWISE_EXP", + "CUDNN_POINTWISE_ERF", + "CUDNN_POINTWISE_ELU_FWD", + "CUDNN_POINTWISE_ELU_BWD", + "CUDNN_POINTWISE_DIV", + "CUDNN_POINTWISE_COS", + "CUDNN_POINTWISE_CMP_NEQ", + "CUDNN_POINTWISE_CMP_LT", + "CUDNN_POINTWISE_CMP_LE", + "CUDNN_POINTWISE_CMP_GT", + "CUDNN_POINTWISE_CMP_GE", + "CUDNN_POINTWISE_CMP_EQ", + "CUDNN_POINTWISE_CEIL", + "CUDNN_POINTWISE_BINARY_SELECT", + "CUDNN_POINTWISE_ATAN2", + "CUDNN_POINTWISE_ADD_SQUARE", + "CUDNN_POINTWISE_ADD", + "CUDNN_POINTWISE_ABS", + "CUDNN_PARAM_ZDESC", + "CUDNN_PARAM_ZDATA_PLACEHOLDER", + "CUDNN_PARAM_YSUM_PLACEHOLDER", + "CUDNN_PARAM_YSTATS_DESC", + "CUDNN_PARAM_YSQSUM_PLACEHOLDER", + "CUDNN_PARAM_YDESC", + "CUDNN_PARAM_YDATA_PLACEHOLDER", + "CUDNN_PARAM_XDESC", + "CUDNN_PARAM_XDATA_PLACEHOLDER", + "CUDNN_PARAM_WDESC", + "CUDNN_PARAM_WDATA_PLACEHOLDER", + "CUDNN_PARAM_DZDESC", + "CUDNN_PARAM_DZDATA_PLACEHOLDER", + "CUDNN_PARAM_DYDESC", + "CUDNN_PARAM_DYDATA_PLACEHOLDER", + "CUDNN_PARAM_DXDESC", + "CUDNN_PARAM_DXDATA_PLACEHOLDER", + "CUDNN_PARAM_DWDESC", + "CUDNN_PARAM_DWDATA_PLACEHOLDER", + "CUDNN_PARAM_CONV_DESC", + "CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", + "CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_SCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", + "CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", + "CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", + "CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", + "CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", + "CUDNN_PARAM_BN_MODE", + "CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_EQSCALEBIAS_DESC", + "CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", + "CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", + "CUDNN_PARAM_BN_BIAS_PLACEHOLDER", + "CUDNN_PARAM_ACTIVATION_DESC", + "CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", + "CUDNN_PARAM_ACTIVATION_BITMASK_DESC", + "CUDNN_OP_TENSOR_NOT", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", + "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", + "CUDNN_NUMERICAL_NOTE_WINOGRAD", + "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", + "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", + "CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP", + "CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION", + "CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", + "CUDNN_NUMERICAL_NOTE_FFT", + "CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", + "CUDNN_NO_REORDER", + "CUDNN_NORM_PER_CHANNEL", + "CUDNN_NORM_PER_ACTIVATION", + "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", + "CUDNN_NORM_OPS_NORM_ACTIVATION", + "CUDNN_NORM_OPS_NORM", + "CUDNN_NORM_FWD_TRAINING", + "CUDNN_NORM_FWD_INFERENCE", + "CUDNN_NORM_ALGO_STANDARD", + "CUDNN_NORM_ALGO_PERSIST", + "CUDNN_NON_DETERMINISTIC", + "CUDNN_NEG_INF_PAD", + "CUDNN_MH_ATTN_V_WEIGHTS", + "CUDNN_MH_ATTN_V_BIASES", + "CUDNN_MH_ATTN_Q_WEIGHTS", + "CUDNN_MH_ATTN_Q_BIASES", + "CUDNN_MH_ATTN_O_WEIGHTS", + "CUDNN_MH_ATTN_O_BIASES", + "CUDNN_MH_ATTN_K_WEIGHTS", + "CUDNN_MH_ATTN_K_BIASES", + "CUDNN_LRN_MIN_N", + "CUDNN_LRN_MIN_K", + "CUDNN_LRN_MIN_BETA", + "CUDNN_LRN_MAX_N", + "CUDNN_LOSS_NORMALIZATION_SOFTMAX", + "CUDNN_LOSS_NORMALIZATION_NONE", + "CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK", + "CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK", + "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", + "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", + "CUDNN_LAYOUT_TYPE_COUNT", + "CUDNN_LAYER_NORM", + "CUDNN_KNOB_TYPE_WORKSPACE", + "CUDNN_KNOB_TYPE_WINO_TILE", + "CUDNN_KNOB_TYPE_USE_TEX", + "CUDNN_KNOB_TYPE_TILE_SIZE", + "CUDNN_KNOB_TYPE_TILE_ROWS", + "CUDNN_KNOB_TYPE_TILE_COLS", + "CUDNN_KNOB_TYPE_TILE_CGA_N", + "CUDNN_KNOB_TYPE_TILE_CGA_M", + "CUDNN_KNOB_TYPE_TILE_CGA", + "CUDNN_KNOB_TYPE_TILEK", + "CUDNN_KNOB_TYPE_SWIZZLE", + "CUDNN_KNOB_TYPE_STAGES", + "CUDNN_KNOB_TYPE_SPLIT_RS", + "CUDNN_KNOB_TYPE_SPLIT_K_SLC", + "CUDNN_KNOB_TYPE_SPLIT_K_BUF", + "CUDNN_KNOB_TYPE_SPLIT_K", + "CUDNN_KNOB_TYPE_SPLIT_H", + "CUDNN_KNOB_TYPE_SPLIT_COLS", + "CUDNN_KNOB_TYPE_SPECFILT", + "CUDNN_KNOB_TYPE_SLICED", + "CUDNN_KNOB_TYPE_SINGLEBUFFER", + "CUDNN_KNOB_TYPE_REDUCTION_MODE", + "CUDNN_KNOB_TYPE_OCCUPANCY", + "CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK", + "CUDNN_KNOB_TYPE_MULTIPLY", + "CUDNN_KNOB_TYPE_LOAD_SIZE", + "CUDNN_KNOB_TYPE_LDGC", + "CUDNN_KNOB_TYPE_LDGB", + "CUDNN_KNOB_TYPE_LDGA", + "CUDNN_KNOB_TYPE_KERNEL_CFG", + "CUDNN_KNOB_TYPE_KBLOCK", + "CUDNN_KNOB_TYPE_IDX_MODE", + "CUDNN_KNOB_TYPE_EDGE", + "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", + "CUDNN_KNOB_TYPE_COUNTS", + "CUDNN_KNOB_TYPE_CHUNK_K", + "CUDNN_KNOB_TYPE_BLOCK_SIZE", + "CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD", + "CUDNN_INSTANCE_NORM", + "CUDNN_HEUR_MODE_INSTANT", + "CUDNN_HEUR_MODE_FALLBACK", + "CUDNN_HEUR_MODE_B", + "CUDNN_HEUR_MODE_A", + "CUDNN_HEUR_MODES_COUNT", + "CUDNN_GROUP_NORM", + "CUDNN_GENSTATS_SUM_SQSUM", + "CUDNN_FWD_MODE_TRAINING", + "CUDNN_FWD_MODE_INFERENCE", + "CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", + "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", + "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", + "CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", + "CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", + "CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", + "CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", + "CUDNN_FMA_MATH", + "CUDNN_ERRQUERY_RAWCODE", + "CUDNN_ERRQUERY_NONBLOCKING", + "CUDNN_ERRQUERY_BLOCKING", + "CUDNN_EDGE_VAL_PAD", + "CUDNN_DIVNORM_PRECOMPUTED_MEANS", + "CUDNN_DIM_MAX", + "CUDNN_DETERMINISTIC", + "CUDNN_DEFAULT_REORDER", + "CUDNN_DATA_UINT8x4", + "CUDNN_DATA_UINT8", + "CUDNN_DATA_INT8x32", + "CUDNN_DATA_INT64", + "CUDNN_DATA_FP8_E5M2", + "CUDNN_DATA_FP8_E4M3", + "CUDNN_DATA_FAST_FLOAT_FOR_FP8", + "CUDNN_DATA_BOOLEAN", + "CUDNN_DATA_BFLOAT16", + "CUDNN_CTC_ZERO_OOB_GRADIENTS", + "CUDNN_CTC_SKIP_OOB_GRADIENTS", + "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", + "CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", + "CUDNN_BN_FINALIZE_STATISTICS_TRAINING", + "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", + "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", + "CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", + "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", + "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", + "CUDNN_BATCH_NORM", + "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", + "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", + "CUDNN_BATCHNORM_OPS_BN", + "CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR", + "CUDNN_BACKEND_TENSOR_DESCRIPTOR", + "CUDNN_BACKEND_RNG_DESCRIPTOR", + "CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", + "CUDNN_BACKEND_REDUCTION_DESCRIPTOR", + "CUDNN_BACKEND_POINTWISE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", + "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", + "CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR", + "CUDNN_BACKEND_MATMUL_DESCRIPTOR", + "CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR", + "CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR", + "CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR", + "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", + "CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR", + "CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR", + "CUDNN_BACKEND_ENGINE_DESCRIPTOR", + "CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR", + "CUDNN_BACKEND_ENGINECFG_DESCRIPTOR", + "CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR", + "CUDNN_ATTR_VARIANT_PACK_WORKSPACE", + "CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", + "CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES", + "CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS", + "CUDNN_ATTR_TENSOR_VECTOR_COUNT", + "CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION", + "CUDNN_ATTR_TENSOR_UNIQUE_ID", + "CUDNN_ATTR_TENSOR_STRIDES", + "CUDNN_ATTR_TENSOR_REORDERING_MODE", + "CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC", + "CUDNN_ATTR_TENSOR_IS_VIRTUAL", + "CUDNN_ATTR_TENSOR_IS_BY_VALUE", + "CUDNN_ATTR_TENSOR_DIMENSIONS", + "CUDNN_ATTR_TENSOR_DATA_TYPE", + "CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT", + "CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM", + "CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM", + "CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION", + "CUDNN_ATTR_RNG_NORMAL_DIST_MEAN", + "CUDNN_ATTR_RNG_DISTRIBUTION", + "CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY", + "CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", + "CUDNN_ATTR_RESAMPLE_STRIDES", + "CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", + "CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", + "CUDNN_ATTR_RESAMPLE_POST_PADDINGS", + "CUDNN_ATTR_RESAMPLE_PADDING_MODE", + "CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", + "CUDNN_ATTR_RESAMPLE_MODE", + "CUDNN_ATTR_RESAMPLE_COMP_TYPE", + "CUDNN_ATTR_REDUCTION_OPERATOR", + "CUDNN_ATTR_REDUCTION_COMP_TYPE", + "CUDNN_ATTR_POINTWISE_SWISH_BETA", + "CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA", + "CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP", + "CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE", + "CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP", + "CUDNN_ATTR_POINTWISE_NAN_PROPAGATION", + "CUDNN_ATTR_POINTWISE_MODE", + "CUDNN_ATTR_POINTWISE_MATH_PREC", + "CUDNN_ATTR_POINTWISE_ELU_ALPHA", + "CUDNN_ATTR_POINTWISE_AXIS", + "CUDNN_ATTR_OPERATION_SIGNAL_YDESC", + "CUDNN_ATTR_OPERATION_SIGNAL_XDESC", + "CUDNN_ATTR_OPERATION_SIGNAL_VALUE", + "CUDNN_ATTR_OPERATION_SIGNAL_MODE", + "CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", + "CUDNN_ATTR_OPERATION_RNG_YDESC", + "CUDNN_ATTR_OPERATION_RNG_SEED", + "CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC", + "CUDNN_ATTR_OPERATION_RNG_DESC", + "CUDNN_ATTR_OPERATION_RESHAPE_YDESC", + "CUDNN_ATTR_OPERATION_RESHAPE_XDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", + "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", + "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", + "CUDNN_ATTR_OPERATION_REDUCTION_YDESC", + "CUDNN_ATTR_OPERATION_REDUCTION_XDESC", + "CUDNN_ATTR_OPERATION_REDUCTION_DESC", + "CUDNN_ATTR_OPERATION_POINTWISE_YDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_XDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_TDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR", + "CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_BDESC", + "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", + "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", + "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", + "CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", + "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_MODE", + "CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", + "CUDNN_ATTR_OPERATION_NORM_BWD_MODE", + "CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", + "CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT", + "CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_DESC", + "CUDNN_ATTR_OPERATION_MATMUL_CDESC", + "CUDNN_ATTR_OPERATION_MATMUL_BDESC", + "CUDNN_ATTR_OPERATION_MATMUL_ADESC", + "CUDNN_ATTR_OPERATION_GENSTATS_XDESC", + "CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC", + "CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC", + "CUDNN_ATTR_OPERATION_GENSTATS_MODE", + "CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA", + "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA", + "CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", + "CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", + "CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", + "CUDNN_ATTR_OPERATION_CONCAT_AXIS", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC", + "CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC", + "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC", + "CUDNN_ATTR_OPERATIONGRAPH_OPS", + "CUDNN_ATTR_OPERATIONGRAPH_IS_SAME_TOPOLOGY", + "CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", + "CUDNN_ATTR_OPERATIONGRAPH_HANDLE", + "CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT", + "CUDNN_ATTR_MATMUL_PADDING_VALUE", + "CUDNN_ATTR_MATMUL_COMP_TYPE", + "CUDNN_ATTR_LAYOUT_INFO_TYPES", + "CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID", + "CUDNN_ATTR_KNOB_INFO_TYPE", + "CUDNN_ATTR_KNOB_INFO_STRIDE", + "CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE", + "CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE", + "CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE", + "CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE", + "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", + "CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", + "CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", + "CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", + "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", + "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES", + "CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", + "CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", + "CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", + "CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", + "CUDNN_ATTR_EXECUTION_PLAN_HANDLE", + "CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG", + "CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", + "CUDNN_ATTR_ENGINE_SM_COUNT_TARGET", + "CUDNN_ATTR_ENGINE_OPERATION_GRAPH", + "CUDNN_ATTR_ENGINE_NUMERICAL_NOTE", + "CUDNN_ATTR_ENGINE_LAYOUT_INFO", + "CUDNN_ATTR_ENGINE_KNOB_INFO", + "CUDNN_ATTR_ENGINE_GLOBAL_INDEX", + "CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE", + "CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET", + "CUDNN_ATTR_ENGINEHEUR_RESULTS", + "CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH", + "CUDNN_ATTR_ENGINEHEUR_MODE", + "CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", + "CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", + "CUDNN_ATTR_ENGINECFG_KNOB_CHOICES", + "CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO", + "CUDNN_ATTR_ENGINECFG_ENGINE", + "CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS", + "CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS", + "CUDNN_ATTR_CONVOLUTION_POST_PADDINGS", + "CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES", + "CUDNN_ATTR_CONVOLUTION_DILATIONS", + "CUDNN_ATTR_CONVOLUTION_CONV_MODE", + "CUDNN_ATTR_CONVOLUTION_COMP_TYPE", + "CUDNN_ATTN_WKIND_COUNT", + "CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", + "CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", + "CUDNN_ATTN_ENABLE_PROJ_BIASES", + "CUDNN_ATTN_DISABLE_PROJ_BIASES" +); + sub warnHipDNNOnlyUnsupportedFunctions { my $line_num = shift; my $k = 0; - foreach $func ( - "cudnnWgradMode_t", - "cudnnTransformTensorEx", - "cudnnTransformTensor", - "cudnnTransformFilter", - "cudnnTensorTransformStruct", - "cudnnTensorTransformDescriptor_t", - "cudnnTensorStruct", - "cudnnSpatialTransformerStruct", - "cudnnSpatialTransformerDescriptor_t", - "cudnnSpatialTfSamplerForward", - "cudnnSpatialTfSamplerBackward", - "cudnnSpatialTfGridGeneratorForward", - "cudnnSpatialTfGridGeneratorBackward", - "cudnnSignalMode_t", - "cudnnSeverity_t", - "cudnnSetTensorTransformDescriptor", - "cudnnSetTensorNdDescriptorEx", - "cudnnSetSpatialTransformerNdDescriptor", - "cudnnSetSeqDataDescriptor", - "cudnnSetRNNProjectionLayers", - "cudnnSetRNNPaddingMode", - "cudnnSetRNNMatrixMathType", - "cudnnSetRNNDescriptor_v8", - "cudnnSetRNNDataDescriptor", - "cudnnSetRNNBiasMode", - "cudnnSetRNNAlgorithmDescriptor", - "cudnnSetFusedOpsVariantParamPackAttribute", - "cudnnSetFusedOpsConstParamPackAttribute", - "cudnnSetConvolutionReorderType", - "cudnnSetCallback", - "cudnnSetCTCLossDescriptor_v9", - "cudnnSetCTCLossDescriptor_v8", - "cudnnSetCTCLossDescriptorEx", - "cudnnSetCTCLossDescriptor", - "cudnnSetAttnDescriptor", - "cudnnSetAlgorithmPerformance", - "cudnnSetAlgorithmDescriptor", - "cudnnSetActivationDescriptorSwishBeta", - "cudnnSeqDataStruct", - "cudnnSeqDataDescriptor_t", - "cudnnSeqDataAxis_t", - "cudnnSaveAlgorithm", - "cudnnSamplerType_t", - "cudnnRuntimeTag_t", - "cudnnRngDistribution_t", - "cudnnRestoreDropoutDescriptor", - "cudnnRestoreAlgorithm", - "cudnnResampleMode_t", - "cudnnReorderType_t", - "cudnnReorderFilterAndBias", - "cudnnReduceTensorStruct", - "cudnnRNNStruct", - "cudnnRNNSetClip_v9", - "cudnnRNNSetClip_v8", - "cudnnRNNSetClip", - "cudnnRNNPaddingMode_t", - "cudnnRNNGetClip_v9", - "cudnnRNNGetClip_v8", - "cudnnRNNGetClip", - "cudnnRNNForwardTrainingEx", - "cudnnRNNForwardInferenceEx", - "cudnnRNNForward", - "cudnnRNNDataStruct", - "cudnnRNNDataLayout_t", - "cudnnRNNDataDescriptor_t", - "cudnnRNNClipMode_t", - "cudnnRNNBackwardWeights_v8", - "cudnnRNNBackwardWeightsEx", - "cudnnRNNBackwardData_v8", - "cudnnRNNBackwardDataEx", - "cudnnQueryRuntimeError", - "cudnnPoolingStruct", - "cudnnPointwiseMode_t", - "cudnnPersistentRNNPlan", - "cudnnPaddingMode_t", - "cudnnOpsVersionCheck", - "cudnnOpsTrainVersionCheck", - "cudnnOpsInferVersionCheck", - "cudnnOpTensorStruct", - "cudnnNormalizationForwardTraining", - "cudnnNormalizationForwardInference", - "cudnnNormalizationBackward", - "cudnnNormOps_t", - "cudnnNormMode_t", - "cudnnNormAlgo_t", - "cudnnMultiHeadAttnWeightKind_t", - "cudnnMultiHeadAttnForward", - "cudnnMultiHeadAttnBackwardWeights", - "cudnnMultiHeadAttnBackwardData", - "cudnnMakeFusedOpsPlan", - "cudnnLossNormalizationMode_t", - "cudnnLRNStruct", - "cudnnInitTransformDest", - "cudnnIm2Col", - "cudnnGraphVersionCheck", - "cudnnGetTensorTransformDescriptor", - "cudnnGetTensorSizeInBytes", - "cudnnGetSeqDataDescriptor", - "cudnnGetReductionIndicesSize", - "cudnnGetRNNWeightSpaceSize", - "cudnnGetRNNWeightParams", - "cudnnGetRNNTempSpaceSizes", - "cudnnGetRNNProjectionLayers", - "cudnnGetRNNPaddingMode", - "cudnnGetRNNMatrixMathType", - "cudnnGetRNNForwardTrainingAlgorithmMaxCount", - "cudnnGetRNNForwardInferenceAlgorithmMaxCount", - "cudnnGetRNNDescriptor_v8", - "cudnnGetRNNDescriptor_v6", - "cudnnGetRNNDataDescriptor", - "cudnnGetRNNBiasMode", - "cudnnGetRNNBackwardWeightsAlgorithmMaxCount", - "cudnnGetRNNBackwardDataAlgorithmMaxCount", - "cudnnGetProperty", - "cudnnGetPoolingNdForwardOutputDim", - "cudnnGetPoolingNdDescriptor", - "cudnnGetNormalizationTrainingReserveSpaceSize", - "cudnnGetNormalizationForwardTrainingWorkspaceSize", - "cudnnGetNormalizationBackwardWorkspaceSize", - "cudnnGetMultiHeadAttnWeights", - "cudnnGetMultiHeadAttnBuffers", - "cudnnGetMaxDeviceVersion", - "cudnnGetLastErrorString", - "cudnnGetFusedOpsVariantParamPackAttribute", - "cudnnGetFusedOpsConstParamPackAttribute", - "cudnnGetFoldedConvBackwardDataDescriptors", - "cudnnGetFilterSizeInBytes", - "cudnnGetDropoutDescriptor", - "cudnnGetCudartVersion", - "cudnnGetConvolutionReorderType", - "cudnnGetConvolutionNdForwardOutputDim", - "cudnnGetConvolutionNdDescriptor", - "cudnnGetConvolutionMathType", - "cudnnGetConvolutionGroupCount", - "cudnnGetConvolutionForwardAlgorithm_v7", - "cudnnGetConvolutionForwardAlgorithmMaxCount", - "cudnnGetConvolutionBackwardFilterAlgorithm_v7", - "cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", - "cudnnGetConvolutionBackwardDataAlgorithm_v7", - "cudnnGetConvolutionBackwardDataAlgorithmMaxCount", - "cudnnGetCallback", - "cudnnGetCTCLossWorkspaceSize_v8", - "cudnnGetCTCLossWorkspaceSize", - "cudnnGetCTCLossDescriptor_v9", - "cudnnGetCTCLossDescriptor_v8", - "cudnnGetCTCLossDescriptorEx", - "cudnnGetCTCLossDescriptor", - "cudnnGetBatchNormalizationTrainingExReserveSpaceSize", - "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", - "cudnnGetBatchNormalizationBackwardExWorkspaceSize", - "cudnnGetAttnDescriptor", - "cudnnGetAlgorithmSpaceSize", - "cudnnGetAlgorithmPerformance", - "cudnnGetAlgorithmDescriptor", - "cudnnGetActivationDescriptorSwishBeta", - "cudnnGenStatsMode_t", - "cudnnFusedOps_t", - "cudnnFusedOpsVariantParamStruct", - "cudnnFusedOpsVariantParamPack_t", - "cudnnFusedOpsVariantParamLabel_t", - "cudnnFusedOpsPointerPlaceHolder_t", - "cudnnFusedOpsPlan_t", - "cudnnFusedOpsPlanStruct", - "cudnnFusedOpsExecute", - "cudnnFusedOpsConstParamStruct", - "cudnnFusedOpsConstParamPack_t", - "cudnnFusedOpsConstParamLabel_t", - "cudnnFraction_t", - "cudnnFractionStruct", - "cudnnForwardMode_t", - "cudnnFoldingDirection_t", - "cudnnFindRNNForwardTrainingAlgorithmEx", - "cudnnFindRNNForwardInferenceAlgorithmEx", - "cudnnFindRNNBackwardWeightsAlgorithmEx", - "cudnnFindRNNBackwardDataAlgorithmEx", - "cudnnFilterStruct", - "cudnnErrQueryMode_t", - "cudnnDropoutStruct", - "cudnnDropoutGetReserveSpaceSize", - "cudnnDropoutForward", - "cudnnDropoutBackward", - "cudnnDivisiveNormalizationForward", - "cudnnDivisiveNormalizationBackward", - "cudnnDivNormMode_t", - "cudnnDeterminism_t", - "cudnnDestroyTensorTransformDescriptor", - "cudnnDestroySpatialTransformerDescriptor", - "cudnnDestroySeqDataDescriptor", - "cudnnDestroyRNNDataDescriptor", - "cudnnDestroyFusedOpsVariantParamPack", - "cudnnDestroyFusedOpsPlan", - "cudnnDestroyFusedOpsConstParamPack", - "cudnnDestroyCTCLossDescriptor", - "cudnnDestroyAttnDescriptor", - "cudnnDestroyAlgorithmPerformance", - "cudnnDestroyAlgorithmDescriptor", - "cudnnDeriveNormTensorDescriptor", - "cudnnDebug_t", - "cudnnDebugStruct", - "cudnnCreateTensorTransformDescriptor", - "cudnnCreateSpatialTransformerDescriptor", - "cudnnCreateSeqDataDescriptor", - "cudnnCreateRNNDataDescriptor", - "cudnnCreateFusedOpsVariantParamPack", - "cudnnCreateFusedOpsPlan", - "cudnnCreateFusedOpsConstParamPack", - "cudnnCreateCTCLossDescriptor", - "cudnnCreateAttnDescriptor", - "cudnnCreateAlgorithmPerformance", - "cudnnCreateAlgorithmDescriptor", - "cudnnCopyAlgorithmDescriptor", - "cudnnConvolutionStruct", - "cudnnConvolutionBiasActivationForward", - "cudnnContext", - "cudnnCnnTrainVersionCheck", - "cudnnCnnInferVersionCheck", - "cudnnCallback_t", - "cudnnCTCLoss_v8", - "cudnnCTCLossStruct", - "cudnnCTCLossDescriptor_t", - "cudnnCTCLossAlgo_t", - "cudnnCTCLoss", - "cudnnCTCGradMode_t", - "cudnnBuildRNNDynamic", - "cudnnBnFinalizeStatsMode_t", - "cudnnBatchNormalizationForwardTrainingEx", - "cudnnBatchNormalizationBackwardEx", - "cudnnBatchNormOps_t", - "cudnnBackendUpdateCudaGraph", - "cudnnBackendTensorReordering_t", - "cudnnBackendSetAttribute", - "cudnnBackendPopulateCudaGraph", - "cudnnBackendNumericalNote_t", - "cudnnBackendNormMode_t", - "cudnnBackendNormFwdPhase_t", - "cudnnBackendLayoutType_t", - "cudnnBackendKnobType_t", - "cudnnBackendInitialize", - "cudnnBackendHeurMode_t", - "cudnnBackendGetAttribute", - "cudnnBackendFinalize", - "cudnnBackendExecute", - "cudnnBackendDestroyDescriptor", - "cudnnBackendDescriptor_t", - "cudnnBackendDescriptorType_t", - "cudnnBackendCreateDescriptor", - "cudnnBackendBehaviorNote_t", - "cudnnBackendAttributeType_t", - "cudnnBackendAttributeName_t", - "cudnnAttnStruct", - "cudnnAttnQueryMap_t", - "cudnnAttnDescriptor_t", - "cudnnAlgorithm_t", - "cudnnAlgorithmUnionStruct", - "cudnnAlgorithmStruct", - "cudnnAlgorithmPerformance_t", - "cudnnAlgorithmPerformanceStruct", - "cudnnAlgorithmDescriptor_t", - "cudnnAdvVersionCheck", - "cudnnAdvTrainVersionCheck", - "cudnnAdvInferVersionCheck", - "cudnnActivationStruct", - "CUDNN_ZERO_PAD", - "CUDNN_WGRAD_MODE_SET", - "CUDNN_WGRAD_MODE_ADD", - "CUDNN_TYPE_VOID_PTR", - "CUDNN_TYPE_TENSOR_REORDERING_MODE", - "CUDNN_TYPE_SIGNAL_MODE", - "CUDNN_TYPE_RNG_DISTRIBUTION", - "CUDNN_TYPE_RESAMPLE_MODE", - "CUDNN_TYPE_REDUCTION_OPERATOR_TYPE", - "CUDNN_TYPE_POINTWISE_MODE", - "CUDNN_TYPE_PADDING_MODE", - "CUDNN_TYPE_NUMERICAL_NOTE", - "CUDNN_TYPE_NORM_MODE", - "CUDNN_TYPE_NORM_FWD_PHASE", - "CUDNN_TYPE_NAN_PROPOGATION", - "CUDNN_TYPE_LAYOUT_TYPE", - "CUDNN_TYPE_KNOB_TYPE", - "CUDNN_TYPE_INT64", - "CUDNN_TYPE_INT32", - "CUDNN_TYPE_HEUR_MODE", - "CUDNN_TYPE_HANDLE", - "CUDNN_TYPE_GENSTATS_MODE", - "CUDNN_TYPE_FRACTION", - "CUDNN_TYPE_FLOAT", - "CUDNN_TYPE_DOUBLE", - "CUDNN_TYPE_DATA_TYPE", - "CUDNN_TYPE_CONVOLUTION_MODE", - "CUDNN_TYPE_CHAR", - "CUDNN_TYPE_BOOLEAN", - "CUDNN_TYPE_BN_FINALIZE_STATS_MODE", - "CUDNN_TYPE_BEHAVIOR_NOTE", - "CUDNN_TYPE_BACKEND_DESCRIPTOR", - "CUDNN_TYPE_ATTRIB_NAME", - "CUDNN_TRANSFORM_UNFOLD", - "CUDNN_TRANSFORM_FOLD", - "CUDNN_TENSOR_REORDERING_NONE", - "CUDNN_TENSOR_REORDERING_INT8x32", - "CUDNN_TENSOR_REORDERING_F16x16", - "CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", - "CUDNN_STATUS_VERSION_MISMATCH", - "CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH", - "CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED", - "CUDNN_STATUS_SPECIFIC_ERROR", - "CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH", - "CUDNN_STATUS_RUNTIME_IN_PROGRESS", - "CUDNN_STATUS_RUNTIME_FP_OVERFLOW", - "CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE", - "CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT", - "CUDNN_STATUS_NOT_SUPPORTED_SHAPE", - "CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING", - "CUDNN_STATUS_NOT_SUPPORTED_PADDING", - "CUDNN_STATUS_NOT_SUPPORTED_LAYOUT", - "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER", - "CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART", - "CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN", - "CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE", - "CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API", - "CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM", - "CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH", - "CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE", - "CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED", - "CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM", - "CUDNN_STATUS_FULL_ERROR_CODE", - "CUDNN_STATUS_EXECUTION_FAILED_CURAND", - "CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER", - "CUDNN_STATUS_EXECUTION_FAILED_CUDART", - "CUDNN_STATUS_EXECUTION_FAILED_CUBLAS", - "CUDNN_STATUS_DEPRECATED", - "CUDNN_STATUS_CATEGORY", - "CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT", - "CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND", - "CUDNN_STATUS_BAD_PARAM_NULL_POINTER", - "CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED", - "CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER", - "CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES", - "CUDNN_STATUS_BAD_PARAM_DESCRIPTOR_TYPE", - "CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH", - "CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE", - "CUDNN_SIGNAL_WAIT", - "CUDNN_SIGNAL_SET", - "CUDNN_SEV_WARNING_EN", - "CUDNN_SEV_WARNING", - "CUDNN_SEV_INFO_EN", - "CUDNN_SEV_INFO", - "CUDNN_SEV_FATAL", - "CUDNN_SEV_ERROR_EN", - "CUDNN_SEV_ERROR", - "CUDNN_SEQDATA_VECT_DIM", - "CUDNN_SEQDATA_TIME_DIM", - "CUDNN_SEQDATA_DIM_COUNT", - "CUDNN_SEQDATA_BEAM_DIM", - "CUDNN_SEQDATA_BATCH_DIM", - "CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", - "CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", - "CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", - "CUDNN_SCALAR_DOUBLE_BN_EPSILON", - "CUDNN_SAMPLER_BILINEAR", - "CUDNN_RNN_PADDED_IO_ENABLED", - "CUDNN_RNN_PADDED_IO_DISABLED", - "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", - "CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", - "CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", - "CUDNN_RNN_CLIP_NONE", - "CUDNN_RNN_CLIP_MINMAX", - "CUDNN_RNN_ALGO_COUNT", - "CUDNN_RNG_DISTRIBUTION_UNIFORM", - "CUDNN_RNG_DISTRIBUTION_NORMAL", - "CUDNN_RNG_DISTRIBUTION_BERNOULLI", - "CUDNN_RMS_NORM", - "CUDNN_RESAMPLE_NEAREST", - "CUDNN_RESAMPLE_MAXPOOL", - "CUDNN_RESAMPLE_BILINEAR", - "CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING", - "CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING", - "CUDNN_RESAMPLE_AVGPOOL", - "CUDNN_PTR_ZDATA", - "CUDNN_PTR_YSUM", - "CUDNN_PTR_YSQSUM", - "CUDNN_PTR_YDATA", - "CUDNN_PTR_XDATA", - "CUDNN_PTR_WORKSPACE", - "CUDNN_PTR_WDATA", - "CUDNN_PTR_NULL", - "CUDNN_PTR_ELEM_ALIGNED", - "CUDNN_PTR_DZDATA", - "CUDNN_PTR_DYDATA", - "CUDNN_PTR_DXDATA", - "CUDNN_PTR_DWDATA", - "CUDNN_PTR_BN_Z_EQSCALE", - "CUDNN_PTR_BN_Z_EQBIAS", - "CUDNN_PTR_BN_SCALE", - "CUDNN_PTR_BN_SAVED_MEAN", - "CUDNN_PTR_BN_SAVED_INVSTD", - "CUDNN_PTR_BN_RUNNING_VAR", - "CUDNN_PTR_BN_RUNNING_MEAN", - "CUDNN_PTR_BN_EQSCALE", - "CUDNN_PTR_BN_EQBIAS", - "CUDNN_PTR_BN_DSCALE", - "CUDNN_PTR_BN_DBIAS", - "CUDNN_PTR_BN_BIAS", - "CUDNN_PTR_ACTIVATION_BITMASK", - "CUDNN_PTR_16B_ALIGNED", - "CUDNN_POINTWISE_TANH_FWD", - "CUDNN_POINTWISE_TANH_BWD", - "CUDNN_POINTWISE_TAN", - "CUDNN_POINTWISE_SWISH_FWD", - "CUDNN_POINTWISE_SWISH_BWD", - "CUDNN_POINTWISE_SUB", - "CUDNN_POINTWISE_SQRT", - "CUDNN_POINTWISE_SOFTPLUS_FWD", - "CUDNN_POINTWISE_SOFTPLUS_BWD", - "CUDNN_POINTWISE_SIN", - "CUDNN_POINTWISE_SIGMOID_FWD", - "CUDNN_POINTWISE_SIGMOID_BWD", - "CUDNN_POINTWISE_RSQRT", - "CUDNN_POINTWISE_RELU_FWD", - "CUDNN_POINTWISE_RELU_BWD", - "CUDNN_POINTWISE_RECIPROCAL", - "CUDNN_POINTWISE_POW", - "CUDNN_POINTWISE_NEG", - "CUDNN_POINTWISE_MUL", - "CUDNN_POINTWISE_MOD", - "CUDNN_POINTWISE_MIN", - "CUDNN_POINTWISE_MAX", - "CUDNN_POINTWISE_LOGICAL_OR", - "CUDNN_POINTWISE_LOGICAL_NOT", - "CUDNN_POINTWISE_LOGICAL_AND", - "CUDNN_POINTWISE_LOG", - "CUDNN_POINTWISE_IDENTITY", - "CUDNN_POINTWISE_GEN_INDEX", - "CUDNN_POINTWISE_GELU_FWD", - "CUDNN_POINTWISE_GELU_BWD", - "CUDNN_POINTWISE_GELU_APPROX_TANH_FWD", - "CUDNN_POINTWISE_GELU_APPROX_TANH_BWD", - "CUDNN_POINTWISE_FLOOR", - "CUDNN_POINTWISE_EXP", - "CUDNN_POINTWISE_ERF", - "CUDNN_POINTWISE_ELU_FWD", - "CUDNN_POINTWISE_ELU_BWD", - "CUDNN_POINTWISE_DIV", - "CUDNN_POINTWISE_COS", - "CUDNN_POINTWISE_CMP_NEQ", - "CUDNN_POINTWISE_CMP_LT", - "CUDNN_POINTWISE_CMP_LE", - "CUDNN_POINTWISE_CMP_GT", - "CUDNN_POINTWISE_CMP_GE", - "CUDNN_POINTWISE_CMP_EQ", - "CUDNN_POINTWISE_CEIL", - "CUDNN_POINTWISE_BINARY_SELECT", - "CUDNN_POINTWISE_ATAN2", - "CUDNN_POINTWISE_ADD_SQUARE", - "CUDNN_POINTWISE_ADD", - "CUDNN_POINTWISE_ABS", - "CUDNN_PARAM_ZDESC", - "CUDNN_PARAM_ZDATA_PLACEHOLDER", - "CUDNN_PARAM_YSUM_PLACEHOLDER", - "CUDNN_PARAM_YSTATS_DESC", - "CUDNN_PARAM_YSQSUM_PLACEHOLDER", - "CUDNN_PARAM_YDESC", - "CUDNN_PARAM_YDATA_PLACEHOLDER", - "CUDNN_PARAM_XDESC", - "CUDNN_PARAM_XDATA_PLACEHOLDER", - "CUDNN_PARAM_WDESC", - "CUDNN_PARAM_WDATA_PLACEHOLDER", - "CUDNN_PARAM_DZDESC", - "CUDNN_PARAM_DZDATA_PLACEHOLDER", - "CUDNN_PARAM_DYDESC", - "CUDNN_PARAM_DYDATA_PLACEHOLDER", - "CUDNN_PARAM_DXDESC", - "CUDNN_PARAM_DXDATA_PLACEHOLDER", - "CUDNN_PARAM_DWDESC", - "CUDNN_PARAM_DWDATA_PLACEHOLDER", - "CUDNN_PARAM_CONV_DESC", - "CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", - "CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_SCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", - "CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", - "CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", - "CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", - "CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", - "CUDNN_PARAM_BN_MODE", - "CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_EQSCALEBIAS_DESC", - "CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", - "CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", - "CUDNN_PARAM_BN_BIAS_PLACEHOLDER", - "CUDNN_PARAM_ACTIVATION_DESC", - "CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", - "CUDNN_PARAM_ACTIVATION_BITMASK_DESC", - "CUDNN_OP_TENSOR_NOT", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4", - "CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13", - "CUDNN_NUMERICAL_NOTE_WINOGRAD", - "CUDNN_NUMERICAL_NOTE_TYPE_COUNT", - "CUDNN_NUMERICAL_NOTE_TENSOR_CORE", - "CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP", - "CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION", - "CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC", - "CUDNN_NUMERICAL_NOTE_FFT", - "CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS", - "CUDNN_NO_REORDER", - "CUDNN_NORM_PER_CHANNEL", - "CUDNN_NORM_PER_ACTIVATION", - "CUDNN_NORM_OPS_NORM_ADD_ACTIVATION", - "CUDNN_NORM_OPS_NORM_ACTIVATION", - "CUDNN_NORM_OPS_NORM", - "CUDNN_NORM_FWD_TRAINING", - "CUDNN_NORM_FWD_INFERENCE", - "CUDNN_NORM_ALGO_STANDARD", - "CUDNN_NORM_ALGO_PERSIST", - "CUDNN_NON_DETERMINISTIC", - "CUDNN_NEG_INF_PAD", - "CUDNN_MH_ATTN_V_WEIGHTS", - "CUDNN_MH_ATTN_V_BIASES", - "CUDNN_MH_ATTN_Q_WEIGHTS", - "CUDNN_MH_ATTN_Q_BIASES", - "CUDNN_MH_ATTN_O_WEIGHTS", - "CUDNN_MH_ATTN_O_BIASES", - "CUDNN_MH_ATTN_K_WEIGHTS", - "CUDNN_MH_ATTN_K_BIASES", - "CUDNN_LRN_MIN_N", - "CUDNN_LRN_MIN_K", - "CUDNN_LRN_MIN_BETA", - "CUDNN_LRN_MAX_N", - "CUDNN_LOSS_NORMALIZATION_SOFTMAX", - "CUDNN_LOSS_NORMALIZATION_NONE", - "CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK", - "CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK", - "CUDNN_LAYOUT_TYPE_PREFERRED_NHWC", - "CUDNN_LAYOUT_TYPE_PREFERRED_NCHW", - "CUDNN_LAYOUT_TYPE_COUNT", - "CUDNN_LAYER_NORM", - "CUDNN_KNOB_TYPE_WORKSPACE", - "CUDNN_KNOB_TYPE_WINO_TILE", - "CUDNN_KNOB_TYPE_USE_TEX", - "CUDNN_KNOB_TYPE_TILE_SIZE", - "CUDNN_KNOB_TYPE_TILE_ROWS", - "CUDNN_KNOB_TYPE_TILE_COLS", - "CUDNN_KNOB_TYPE_TILE_CGA_N", - "CUDNN_KNOB_TYPE_TILE_CGA_M", - "CUDNN_KNOB_TYPE_TILE_CGA", - "CUDNN_KNOB_TYPE_TILEK", - "CUDNN_KNOB_TYPE_SWIZZLE", - "CUDNN_KNOB_TYPE_STAGES", - "CUDNN_KNOB_TYPE_SPLIT_RS", - "CUDNN_KNOB_TYPE_SPLIT_K_SLC", - "CUDNN_KNOB_TYPE_SPLIT_K_BUF", - "CUDNN_KNOB_TYPE_SPLIT_K", - "CUDNN_KNOB_TYPE_SPLIT_H", - "CUDNN_KNOB_TYPE_SPLIT_COLS", - "CUDNN_KNOB_TYPE_SPECFILT", - "CUDNN_KNOB_TYPE_SLICED", - "CUDNN_KNOB_TYPE_SINGLEBUFFER", - "CUDNN_KNOB_TYPE_REDUCTION_MODE", - "CUDNN_KNOB_TYPE_OCCUPANCY", - "CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK", - "CUDNN_KNOB_TYPE_MULTIPLY", - "CUDNN_KNOB_TYPE_LOAD_SIZE", - "CUDNN_KNOB_TYPE_LDGC", - "CUDNN_KNOB_TYPE_LDGB", - "CUDNN_KNOB_TYPE_LDGA", - "CUDNN_KNOB_TYPE_KERNEL_CFG", - "CUDNN_KNOB_TYPE_KBLOCK", - "CUDNN_KNOB_TYPE_IDX_MODE", - "CUDNN_KNOB_TYPE_EDGE", - "CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE", - "CUDNN_KNOB_TYPE_COUNTS", - "CUDNN_KNOB_TYPE_CHUNK_K", - "CUDNN_KNOB_TYPE_BLOCK_SIZE", - "CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD", - "CUDNN_INSTANCE_NORM", - "CUDNN_HEUR_MODE_INSTANT", - "CUDNN_HEUR_MODE_FALLBACK", - "CUDNN_HEUR_MODE_B", - "CUDNN_HEUR_MODE_A", - "CUDNN_HEUR_MODES_COUNT", - "CUDNN_GROUP_NORM", - "CUDNN_GENSTATS_SUM_SQSUM", - "CUDNN_FWD_MODE_TRAINING", - "CUDNN_FWD_MODE_INFERENCE", - "CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", - "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", - "CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", - "CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", - "CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", - "CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", - "CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", - "CUDNN_FMA_MATH", - "CUDNN_ERRQUERY_RAWCODE", - "CUDNN_ERRQUERY_NONBLOCKING", - "CUDNN_ERRQUERY_BLOCKING", - "CUDNN_EDGE_VAL_PAD", - "CUDNN_DIVNORM_PRECOMPUTED_MEANS", - "CUDNN_DIM_MAX", - "CUDNN_DETERMINISTIC", - "CUDNN_DEFAULT_REORDER", - "CUDNN_DATA_UINT8x4", - "CUDNN_DATA_UINT8", - "CUDNN_DATA_INT8x32", - "CUDNN_DATA_INT64", - "CUDNN_DATA_FP8_E5M2", - "CUDNN_DATA_FP8_E4M3", - "CUDNN_DATA_FAST_FLOAT_FOR_FP8", - "CUDNN_DATA_BOOLEAN", - "CUDNN_DATA_BFLOAT16", - "CUDNN_CTC_ZERO_OOB_GRADIENTS", - "CUDNN_CTC_SKIP_OOB_GRADIENTS", - "CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", - "CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", - "CUDNN_BN_FINALIZE_STATISTICS_TRAINING", - "CUDNN_BN_FINALIZE_STATISTICS_INFERENCE", - "CUDNN_BEHAVIOR_NOTE_TYPE_COUNT", - "CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API", - "CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION", - "CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER", - "CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER", - "CUDNN_BATCH_NORM", - "CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", - "CUDNN_BATCHNORM_OPS_BN_ACTIVATION", - "CUDNN_BATCHNORM_OPS_BN", - "CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR", - "CUDNN_BACKEND_TENSOR_DESCRIPTOR", - "CUDNN_BACKEND_RNG_DESCRIPTOR", - "CUDNN_BACKEND_RESAMPLE_DESCRIPTOR", - "CUDNN_BACKEND_REDUCTION_DESCRIPTOR", - "CUDNN_BACKEND_POINTWISE_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR", - "CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR", - "CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR", - "CUDNN_BACKEND_MATMUL_DESCRIPTOR", - "CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR", - "CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR", - "CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR", - "CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR", - "CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR", - "CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR", - "CUDNN_BACKEND_ENGINE_DESCRIPTOR", - "CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR", - "CUDNN_BACKEND_ENGINECFG_DESCRIPTOR", - "CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR", - "CUDNN_ATTR_VARIANT_PACK_WORKSPACE", - "CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS", - "CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES", - "CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS", - "CUDNN_ATTR_TENSOR_VECTOR_COUNT", - "CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION", - "CUDNN_ATTR_TENSOR_UNIQUE_ID", - "CUDNN_ATTR_TENSOR_STRIDES", - "CUDNN_ATTR_TENSOR_REORDERING_MODE", - "CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC", - "CUDNN_ATTR_TENSOR_IS_VIRTUAL", - "CUDNN_ATTR_TENSOR_IS_BY_VALUE", - "CUDNN_ATTR_TENSOR_DIMENSIONS", - "CUDNN_ATTR_TENSOR_DATA_TYPE", - "CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT", - "CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM", - "CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM", - "CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION", - "CUDNN_ATTR_RNG_NORMAL_DIST_MEAN", - "CUDNN_ATTR_RNG_DISTRIBUTION", - "CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY", - "CUDNN_ATTR_RESAMPLE_WINDOW_DIMS", - "CUDNN_ATTR_RESAMPLE_STRIDES", - "CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS", - "CUDNN_ATTR_RESAMPLE_PRE_PADDINGS", - "CUDNN_ATTR_RESAMPLE_POST_PADDINGS", - "CUDNN_ATTR_RESAMPLE_PADDING_MODE", - "CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION", - "CUDNN_ATTR_RESAMPLE_MODE", - "CUDNN_ATTR_RESAMPLE_COMP_TYPE", - "CUDNN_ATTR_REDUCTION_OPERATOR", - "CUDNN_ATTR_REDUCTION_COMP_TYPE", - "CUDNN_ATTR_POINTWISE_SWISH_BETA", - "CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA", - "CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP", - "CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE", - "CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP", - "CUDNN_ATTR_POINTWISE_NAN_PROPAGATION", - "CUDNN_ATTR_POINTWISE_MODE", - "CUDNN_ATTR_POINTWISE_MATH_PREC", - "CUDNN_ATTR_POINTWISE_ELU_ALPHA", - "CUDNN_ATTR_POINTWISE_AXIS", - "CUDNN_ATTR_OPERATION_SIGNAL_YDESC", - "CUDNN_ATTR_OPERATION_SIGNAL_XDESC", - "CUDNN_ATTR_OPERATION_SIGNAL_VALUE", - "CUDNN_ATTR_OPERATION_SIGNAL_MODE", - "CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC", - "CUDNN_ATTR_OPERATION_RNG_YDESC", - "CUDNN_ATTR_OPERATION_RNG_SEED", - "CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC", - "CUDNN_ATTR_OPERATION_RNG_DESC", - "CUDNN_ATTR_OPERATION_RESHAPE_YDESC", - "CUDNN_ATTR_OPERATION_RESHAPE_XDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA", - "CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA", - "CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA", - "CUDNN_ATTR_OPERATION_REDUCTION_YDESC", - "CUDNN_ATTR_OPERATION_REDUCTION_XDESC", - "CUDNN_ATTR_OPERATION_REDUCTION_DESC", - "CUDNN_ATTR_OPERATION_POINTWISE_YDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_XDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_TDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR", - "CUDNN_ATTR_OPERATION_POINTWISE_DYDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_DXDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_BDESC", - "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2", - "CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC", - "CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_YDESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_XDESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_PHASE", - "CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS", - "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_MODE", - "CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC", - "CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_XDESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS", - "CUDNN_ATTR_OPERATION_NORM_BWD_MODE", - "CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC", - "CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT", - "CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_DESC", - "CUDNN_ATTR_OPERATION_MATMUL_CDESC", - "CUDNN_ATTR_OPERATION_MATMUL_BDESC", - "CUDNN_ATTR_OPERATION_MATMUL_ADESC", - "CUDNN_ATTR_OPERATION_GENSTATS_XDESC", - "CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC", - "CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC", - "CUDNN_ATTR_OPERATION_GENSTATS_MODE", - "CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA", - "CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA", - "CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC", - "CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS", - "CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX", - "CUDNN_ATTR_OPERATION_CONCAT_AXIS", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC", - "CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC", - "CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC", - "CUDNN_ATTR_OPERATIONGRAPH_OPS", - "CUDNN_ATTR_OPERATIONGRAPH_IS_SAME_TOPOLOGY", - "CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED", - "CUDNN_ATTR_OPERATIONGRAPH_HANDLE", - "CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT", - "CUDNN_ATTR_MATMUL_PADDING_VALUE", - "CUDNN_ATTR_MATMUL_COMP_TYPE", - "CUDNN_ATTR_LAYOUT_INFO_TYPES", - "CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID", - "CUDNN_ATTR_KNOB_INFO_TYPE", - "CUDNN_ATTR_KNOB_INFO_STRIDE", - "CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE", - "CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE", - "CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE", - "CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE", - "CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH", - "CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED", - "CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID", - "CUDNN_ATTR_INTERMEDIATE_INFO_SIZE", - "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS", - "CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES", - "CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE", - "CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS", - "CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE", - "CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION", - "CUDNN_ATTR_EXECUTION_PLAN_HANDLE", - "CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG", - "CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS", - "CUDNN_ATTR_ENGINE_SM_COUNT_TARGET", - "CUDNN_ATTR_ENGINE_OPERATION_GRAPH", - "CUDNN_ATTR_ENGINE_NUMERICAL_NOTE", - "CUDNN_ATTR_ENGINE_LAYOUT_INFO", - "CUDNN_ATTR_ENGINE_KNOB_INFO", - "CUDNN_ATTR_ENGINE_GLOBAL_INDEX", - "CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE", - "CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET", - "CUDNN_ATTR_ENGINEHEUR_RESULTS", - "CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH", - "CUDNN_ATTR_ENGINEHEUR_MODE", - "CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE", - "CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED", - "CUDNN_ATTR_ENGINECFG_KNOB_CHOICES", - "CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO", - "CUDNN_ATTR_ENGINECFG_ENGINE", - "CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS", - "CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS", - "CUDNN_ATTR_CONVOLUTION_POST_PADDINGS", - "CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES", - "CUDNN_ATTR_CONVOLUTION_DILATIONS", - "CUDNN_ATTR_CONVOLUTION_CONV_MODE", - "CUDNN_ATTR_CONVOLUTION_COMP_TYPE", - "CUDNN_ATTN_WKIND_COUNT", - "CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", - "CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", - "CUDNN_ATTN_ENABLE_PROJ_BIASES", - "CUDNN_ATTN_DISABLE_PROJ_BIASES" - ) + while (my($func) = each %HipDNNOnlyUnsupportedFunctions) { my $mt = m/($func)/g; if ($mt) { diff --git a/src/CUDA2HIP_Perl.cpp b/src/CUDA2HIP_Perl.cpp index a143f09c..7f398fa3 100644 --- a/src/CUDA2HIP_Perl.cpp +++ b/src/CUDA2HIP_Perl.cpp @@ -87,8 +87,10 @@ namespace perl { const string unless_ = "unless "; const string foreach = "foreach "; const string foreach_func = foreach + "$func (\n"; + const string while_func = while_ + "(my($func) = each %"; const string print = "print STDERR "; const string printf = "printf STDERR "; + const string warn = "warn"; const string no_warns = "no warnings qw/uninitialized/;"; const string hipify_perl = "hipify-perl"; const string warning = "$fileName:$line_num: warning: "; @@ -96,11 +98,16 @@ namespace perl { const string sWarnExperimentalFunctions = "warnExperimentalFunctions"; const string sWarnDeprecatedFunctions = "warnDeprecatedFunctions"; const string sWarnRemovedFunctions = "warnRemovedFunctions"; - const string sWarnRocOnlyUnsupportedFunctions = "warnRocOnlyUnsupportedFunctions"; - const string sWarnMIOpenOnlyUnsupportedFunctions = "warnMIOpenOnlyUnsupportedFunctions"; - const string sWarnHipOnlyUnsupportedFunctions = "warnHipOnlyUnsupportedFunctions"; - const string sWarnHipDNNOnlyUnsupportedFunctions = "warnHipDNNOnlyUnsupportedFunctions"; - const string sWarnUnsupportedDeviceFunctions = "warnUnsupportedDeviceFunctions"; + const string sRocOnlyUnsupportedFunctions = "RocOnlyUnsupportedFunctions"; + const string sWarnRocOnlyUnsupportedFunctions = warn + sRocOnlyUnsupportedFunctions; + const string sMIOpenOnlyUnsupportedFunctions = "MIOpenOnlyUnsupportedFunctions"; + const string sWarnMIOpenOnlyUnsupportedFunctions = warn + sMIOpenOnlyUnsupportedFunctions; + const string sHipOnlyUnsupportedFunctions = "HipOnlyUnsupportedFunctions"; + const string sWarnHipOnlyUnsupportedFunctions = warn + sHipOnlyUnsupportedFunctions; + const string sHipDNNOnlyUnsupportedFunctions = "HipDNNOnlyUnsupportedFunctions"; + const string sWarnHipDNNOnlyUnsupportedFunctions = warn + sHipDNNOnlyUnsupportedFunctions; + const string sUnsupportedDeviceFunctions = "UnsupportedDeviceFunctions"; + const string sWarnUnsupportedDeviceFunctions = warn + sUnsupportedDeviceFunctions; const string sSimpleSubstitutions = "simpleSubstitutions"; const string sRocSubstitutions = "rocSubstitutions"; const string sMIOpenSubstitutions = "MIOpenSubstitutions"; @@ -108,7 +115,9 @@ namespace perl { const string sExperimentalSubstitutions = "experimentalSubstitutions"; const string sTransformKernelLaunch = "transformKernelLaunch"; const string sTransformCubNamespace = "transformCubNamespace"; - const string sCountSupportedDeviceFunctions = "countSupportedDeviceFunctions"; + const string count = "count"; + const string sSupportedDeviceFunctions = "SupportedDeviceFunctions"; + const string sCountSupportedDeviceFunctions = count + sSupportedDeviceFunctions; const string sCudaDevice = "cudaDevice"; const string sCudaDeviceId = "cudaDeviceId"; @@ -595,17 +604,17 @@ namespace perl { } void generateDeprecatedAndUnsupportedFunctions(unique_ptr &streamPtr) { - stringstream sDeprecated, sRemoved, sRocUnsupported, sHipUnsupported, sMIOpenUnsupported, sHipDNNUnsupported, sExperimental, sCommon, sCommon1; + stringstream sDeprecated, sRemoved, sRocUnsupported, roc_unsupported, sHipUnsupported, hip_unsupported, sMIOpenUnsupported, miopen_unsupported, sHipDNNUnsupported, hipdnn_unsupported, sExperimental, sCommon, sCommon1; sCommon << tab << my << "$line_num = shift;" << endl; sCommon << tab << my_k << endl; string sWhile = "while (my($func, $val) = each "; sExperimental << endl << sub << sWarnExperimentalFunctions << " {" << endl << sCommon.str() << tab << sWhile << "%experimental_funcs)" << endl; sDeprecated << endl << sub << sWarnDeprecatedFunctions << " {" << endl << sCommon.str() << tab << sWhile << "%deprecated_funcs)" << endl; sRemoved << endl << sub << sWarnRemovedFunctions << " {" << endl << sCommon.str() << tab << sWhile << "%removed_funcs)" << endl; - sRocUnsupported << endl << sub << sWarnRocOnlyUnsupportedFunctions << " {" << endl << sCommon.str() << tab << foreach_func; - sMIOpenUnsupported << endl << sub << sWarnMIOpenOnlyUnsupportedFunctions << " {" << endl << sCommon.str() << tab << foreach_func; - sHipUnsupported << endl << sub << sWarnHipOnlyUnsupportedFunctions << " {" << endl << sCommon.str() << tab << foreach_func; - sHipDNNUnsupported << endl << sub << sWarnHipDNNOnlyUnsupportedFunctions << " {" << endl << sCommon.str() << tab << foreach_func; + sRocUnsupported << endl << sub << sWarnRocOnlyUnsupportedFunctions << " {" << endl << sCommon.str() << tab << while_func << sRocOnlyUnsupportedFunctions << ")\n"; + sMIOpenUnsupported << endl << sub << sWarnMIOpenOnlyUnsupportedFunctions << " {" << endl << sCommon.str() << tab << while_func << sMIOpenOnlyUnsupportedFunctions << ")\n"; + sHipUnsupported << endl << sub << sWarnHipOnlyUnsupportedFunctions << " {" << endl << sCommon.str() << tab << while_func << sHipOnlyUnsupportedFunctions << ")\n"; + sHipDNNUnsupported << endl << sub << sWarnHipDNNOnlyUnsupportedFunctions << " {" << endl << sCommon.str() << tab << while_func << sHipDNNOnlyUnsupportedFunctions << ")\n"; unsigned countRocOnlyUnsupported = 0, countHipOnlyUnsupported = 0, countMIOpenOnlyUnsupported = 0, countHipDNNOnlyUnsupported = 0; bool bTranslateToRoc = TranslateToRoc; bool bTranslateToMIOpen = TranslateToMIOpen; @@ -613,14 +622,14 @@ namespace perl { TranslateToRoc = false; if (Statistics::isUnsupported(ma->second)) { if (ma->second.apiType == API_BLAS || ma->second.apiType == API_SPARSE || ma->second.apiType == API_RAND || ma->second.apiType == API_TENSOR) { - sHipUnsupported << (countHipOnlyUnsupported ? ",\n" : "") << tab_2 << "\"" << ma->first.str() << "\""; + hip_unsupported << (countHipOnlyUnsupported ? ",\n" : "") << tab << "\"" << ma->first.str() << "\""; countHipOnlyUnsupported++; } } TranslateToRoc = true; if (Statistics::isUnsupported(ma->second)) { if (ma->second.apiType == API_BLAS || ma->second.apiType == API_SPARSE || ma->second.apiType == API_RAND || ma->second.apiType == API_TENSOR) { - sRocUnsupported << (countRocOnlyUnsupported ? ",\n" : "") << tab_2 << "\"" << ma->first.str() << "\""; + roc_unsupported << (countRocOnlyUnsupported ? ",\n" : "") << tab << "\"" << ma->first.str() << "\""; countRocOnlyUnsupported++; } } @@ -628,14 +637,14 @@ namespace perl { TranslateToMIOpen = true; if (Statistics::isUnsupported(ma->second)) { if (ma->second.apiType == API_DNN) { - sMIOpenUnsupported << (countMIOpenOnlyUnsupported ? ",\n" : "") << tab_2 << "\"" << ma->first.str() << "\""; + miopen_unsupported << (countMIOpenOnlyUnsupported ? ",\n" : "") << tab << "\"" << ma->first.str() << "\""; countMIOpenOnlyUnsupported++; } } TranslateToMIOpen = false; if (Statistics::isUnsupported(ma->second)) { if (ma->second.apiType == API_DNN) { - sHipDNNUnsupported << (countHipDNNOnlyUnsupported ? ",\n" : "") << tab_2 << "\"" << ma->first.str() << "\""; + hipdnn_unsupported << (countHipDNNOnlyUnsupported ? ",\n" : "") << tab << "\"" << ma->first.str() << "\""; countHipDNNOnlyUnsupported++; } } @@ -643,10 +652,10 @@ namespace perl { TranslateToRoc = bTranslateToRoc; TranslateToMIOpen = bTranslateToMIOpen; sCommon.str(std::string()); - sHipUnsupported << endl_tab << ")" << endl; - sRocUnsupported << endl_tab << ")" << endl; - sMIOpenUnsupported << endl_tab << ")" << endl; - sHipDNNUnsupported << endl_tab << ")" << endl; + hip_unsupported << endl << ");" << endl; + roc_unsupported << endl << ");" << endl; + miopen_unsupported << endl << ");" << endl; + hipdnn_unsupported << endl << ");" << endl; sCommon << tab << "{" << endl; sCommon << tab_2 << my << "$mt = m/($func)/g;" << endl; sCommon << tab_2 << "if ($mt) {" << endl; @@ -675,9 +684,13 @@ namespace perl { *streamPtr.get() << sExperimental.str(); *streamPtr.get() << sDeprecated.str(); *streamPtr.get() << sRemoved.str(); + *streamPtr.get() << "\n@" << sHipOnlyUnsupportedFunctions << " = (\n" << hip_unsupported.str(); *streamPtr.get() << sHipUnsupported.str(); + *streamPtr.get() << "\n@" << sRocOnlyUnsupportedFunctions << " = (\n" << roc_unsupported.str(); *streamPtr.get() << sRocUnsupported.str(); + *streamPtr.get() << "\n@" << sMIOpenOnlyUnsupportedFunctions << " = (\n" << miopen_unsupported.str(); *streamPtr.get() << sMIOpenUnsupported.str(); + *streamPtr.get() << "\n@" << sHipDNNOnlyUnsupportedFunctions << " = (\n" << hipdnn_unsupported.str(); *streamPtr.get() << sHipDNNUnsupported.str(); } @@ -688,18 +701,20 @@ namespace perl { stringstream sUnsupported; for (auto ma = CUDA_DEVICE_FUNCTION_MAP.rbegin(); ma != CUDA_DEVICE_FUNCTION_MAP.rend(); ++ma) { bool isUnsupported = Statistics::isUnsupported(ma->second); - (isUnsupported ? sUnsupported : sSupported) << ((isUnsupported && countUnsupported) || (!isUnsupported && countSupported) ? ",\n" : "") << tab_2 << "\"" << ma->first.str() << "\""; + (isUnsupported ? sUnsupported : sSupported) << ((isUnsupported && countUnsupported) || (!isUnsupported && countSupported) ? ",\n" : "") << tab << "\"" << ma->first.str() << "\""; if (isUnsupported) countUnsupported++; else countSupported++; } + stringstream supported; + stringstream unsupported; stringstream subCountSupported; stringstream subWarnUnsupported; stringstream subCommon; - string sCommon = tab + my_k + "\n" + tab + foreach_func; - subCountSupported << endl << sub << sCountSupportedDeviceFunctions << " {" << endl << (countSupported ? sCommon : tab + return_0); - subWarnUnsupported << endl << sub << sWarnUnsupportedDeviceFunctions << " {" << endl << (countUnsupported ? tab + my + "$line_num = shift;\n" + sCommon : tab + return_0); - if (countSupported) subCountSupported << sSupported.str() << endl_tab << ")" << endl; - if (countUnsupported) subWarnUnsupported << sUnsupported.str() << endl_tab << ")" << endl; + string sCommon = tab + my_k + "\n" + tab + while_func; + subCountSupported << endl << sub << sCountSupportedDeviceFunctions << " {" << endl << (countSupported ? sCommon + sSupportedDeviceFunctions + ")\n" : tab + return_0); + subWarnUnsupported << endl << sub << sWarnUnsupportedDeviceFunctions << " {" << endl << (countUnsupported ? tab + my + "$line_num = shift;\n" + sCommon + sUnsupportedDeviceFunctions + ")\n" : tab + return_0); + if (countSupported) supported << sSupported.str() << endl_tab << ");" << endl; + if (countUnsupported) unsupported << sUnsupported.str() << endl_tab << ");" << endl; if (countSupported || countUnsupported) { subCommon << tab << "{" << endl; subCommon << tab_2 << "# match device function from the list, except those, which have a namespace prefix (aka somenamespace::umin(...));" << endl; @@ -719,7 +734,9 @@ namespace perl { if (countUnsupported) subWarnUnsupported << sCommon; subCountSupported << "}" << endl; subWarnUnsupported << "}" << endl; + *streamPtr.get() << "\n@" << sSupportedDeviceFunctions << " = (\n" << supported.str(); *streamPtr.get() << subCountSupported.str(); + *streamPtr.get() << "\n@" << sUnsupportedDeviceFunctions << " = (\n" << unsupported.str(); *streamPtr.get() << subWarnUnsupported.str(); }