Skip to content

Commit

Permalink
Relax FP8 gated activations requirements
Browse files Browse the repository at this point in the history
Expanded MXFP8 and FP8 tests coverage

Signed-off-by: Przemek Tredak <[email protected]>
  • Loading branch information
ptrendx committed Feb 1, 2025
1 parent f5f2872 commit 0e1ea26
Show file tree
Hide file tree
Showing 12 changed files with 440 additions and 324 deletions.
8 changes: 6 additions & 2 deletions tests/cpp/operator/test_act.cu
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,12 @@ void performTestGLU(const size_t N, const size_t H) {
ASSERT_EQ(err, cudaSuccess) << cudaGetErrorString(err);

if (otype == DType::kFloat8E4M3 || otype == DType::kFloat8E5M2) {
auto [atol_amax, rtol_amax] = getTolerances(DType::kFloat32);
compareResults("amax", output.amax(), ref_amax, atol_amax, rtol_amax);
auto [atol, rtol] = getTolerances(DType::kFloat32);
compareResults("amax", output.amax(), ref_amax, atol, rtol);
if (output.scaling_mode() == NVTE_DELAYED_TENSOR_SCALING) {
const float ref_scale = 1.f; // assuming input scale is 1.f
compareResults("scale_inv", *output.rowwise_cpu_scale_inv_ptr<float>(), ref_scale, atol, rtol);
}
}
auto [atol, rtol] = getTolerances(otype);
compareResults("output_gelu", output, ref_output.get(), atol, rtol);
Expand Down
40 changes: 22 additions & 18 deletions tests/cpp/operator/test_cast.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,31 @@ namespace {

template <typename InputType, typename OutputType>
void compute_ref(const InputType *data, OutputType *output_c,
const size_t N, const size_t H,
const size_t size,
float *amax, float scale) {
using compute_t = float;
compute_t current_max = -1e100;
for (size_t i = 0; i < N; ++i) {
for (size_t j = 0; j < H; ++j) {
compute_t current = static_cast<compute_t>(data[i * H + j]);
for (size_t i = 0; i < size; ++i) {
compute_t current = static_cast<compute_t>(data[i]);
current_max = fmaxf(current_max, fabsf(current));
output_c[i * H + j] = OutputType(scale * current);
}
output_c[i] = OutputType(scale * current);
}
*amax = current_max;
}

template <typename InputType, typename OutputType>
void performTest(const size_t N, const size_t H) {
void performTest(const std::vector<size_t>& shape) {
using namespace test;

const size_t full_size = product(shape);

DType itype = TypeInfo<InputType>::dtype;
DType otype = TypeInfo<OutputType>::dtype;

Tensor input({ N, H }, itype);
Tensor output_c({ N, H }, otype);
Tensor input(shape, itype);
Tensor output_c(shape, otype);

std::unique_ptr<OutputType[]> ref_output_c = std::make_unique<OutputType[]>(N * H);
std::unique_ptr<OutputType[]> ref_output_c = std::make_unique<OutputType[]>(full_size);

fillUniform(&input);
setRandomScale(&output_c);
Expand All @@ -56,7 +56,7 @@ void performTest(const size_t N, const size_t H) {

float ref_amax;
compute_ref<InputType, OutputType>(input.rowwise_cpu_dptr<InputType>(), ref_output_c.get(),
N, H, &ref_amax, output_c.scale());
full_size, &ref_amax, output_c.scale());

cudaDeviceSynchronize();
auto err = cudaGetLastError();
Expand All @@ -71,27 +71,29 @@ void performTest(const size_t N, const size_t H) {
compareResults("output_c", output_c, ref_output_c.get(), true, atol, rtol);
}

std::vector<std::pair<size_t, size_t>> test_cases = {
std::vector<std::vector<size_t>> test_cases = {
{16},
{16000},
{128, 128},
{256, 256},
{768, 1024},
{256, 65536},
{2048, 12288},
{65536, 128},
{65536, 160},
{16384, 6144},
{16384, 1616},
{1, 128},
{1, 1296},
{1, 16},
{5, 160},
{5, 4, 3, 160},
{217, 256},
};
} // namespace

class CastTestSuite : public ::testing::TestWithParam<std::tuple<transformer_engine::DType,
transformer_engine::DType,
std::pair<size_t, size_t>>> {};
std::vector<size_t>>> {};

TEST_P(CastTestSuite, TestCast) {
using namespace transformer_engine;
Expand All @@ -103,7 +105,7 @@ TEST_P(CastTestSuite, TestCast) {

TRANSFORMER_ENGINE_TYPE_SWITCH_ALL(input_type, InputType,
TRANSFORMER_ENGINE_TYPE_SWITCH_ALL(output_type, OutputType,
performTest<InputType, OutputType>(size.first, size.second);
performTest<InputType, OutputType>(size);
);
);
}
Expand All @@ -119,8 +121,10 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn(test_cases)),
[](const testing::TestParamInfo<CastTestSuite::ParamType>& info) {
std::string name = test::typeName(std::get<0>(info.param)) + "X" +
test::typeName(std::get<1>(info.param)) + "X" +
std::to_string(std::get<2>(info.param).first) + "X" +
std::to_string(std::get<2>(info.param).second);
test::typeName(std::get<1>(info.param));
const auto& shape = std::get<2>(info.param);
for ( const auto& s: shape) {
name += "X" + std::to_string(s);
}
return name;
});
25 changes: 15 additions & 10 deletions tests/cpp/operator/test_cast_dbias.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,19 @@ void compute_ref_cast_dbias(const IT *input_h,
}

template <typename IType, typename OType>
void performTest(const size_t N, const size_t H) {
void performTest(const std::vector<size_t>& shape) {
using namespace test;
using CType = fp32;

DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype;

Tensor input({N, H}, itype);
const size_t N = first_dimension(shape);
const size_t H = last_dimension(shape);

Tensor output_c({N, H}, otype);
Tensor input(shape, itype);

Tensor output_c(shape, otype);
// dbias has the same data type with "output grad"
Tensor dbias({H}, itype);

Expand Down Expand Up @@ -117,20 +120,20 @@ void performTest(const size_t N, const size_t H) {
compareResults("output_dbias", dbias, ref_output_dbias.get(), true, atol_dbias, rtol_dbias);
}

std::vector<std::pair<size_t, size_t>> test_cases = {
std::vector<std::vector<size_t>> test_cases = {
{128, 128},
{256, 256},
{768, 1024},
{256, 65536},
{2048, 12288},
{65536, 128},
{65536, 160},
{16384, 6144},
{16384, 1616},
{1, 128},
{1, 1296},
{1, 16},
{5, 160},
{5, 4, 3, 160},
{217, 256},
};

Expand All @@ -139,7 +142,7 @@ std::vector<std::pair<size_t, size_t>> test_cases = {

class CastDBiasTestSuite : public ::testing::TestWithParam<std::tuple<transformer_engine::DType,
transformer_engine::DType,
std::pair<size_t, size_t>>> {};
std::vector<size_t>>> {};

TEST_P(CastDBiasTestSuite, TestCastDBias) {
using namespace transformer_engine;
Expand All @@ -155,7 +158,7 @@ TEST_P(CastDBiasTestSuite, TestCastDBias) {

TRANSFORMER_ENGINE_TYPE_SWITCH_ALL(input_type, InputType,
TRANSFORMER_ENGINE_TYPE_SWITCH_ALL(output_type, OutputType,
performTest<InputType, OutputType>(size.first, size.second);
performTest<InputType, OutputType>(size);
);
);
}
Expand All @@ -169,8 +172,10 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn(test_cases)),
[](const testing::TestParamInfo<CastDBiasTestSuite::ParamType>& info) {
std::string name = test::typeName(std::get<0>(info.param)) + "X" +
test::typeName(std::get<1>(info.param)) + "X" +
std::to_string(std::get<2>(info.param).first) + "X" +
std::to_string(std::get<2>(info.param).second);
test::typeName(std::get<1>(info.param));
const auto& shape = std::get<2>(info.param);
for ( const auto& s: shape) {
name += "X" + std::to_string(s);
}
return name;
});
27 changes: 16 additions & 11 deletions tests/cpp/operator/test_cast_dbias_dgelu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,20 @@ void compute_ref_cast_dbias_dgelu(const IT *input,
}

template <typename IType, typename OType>
void performTest(const size_t N, const size_t H) {
void performTest(const std::vector<size_t>& shape) {
using namespace test;
using CType = fp32;

DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype;

Tensor input({N, H}, itype);
Tensor gelu_input({N, H}, itype);
const size_t N = first_dimension(shape);
const size_t H = last_dimension(shape);

Tensor output_c({N, H}, otype);
Tensor input(shape, itype);
Tensor gelu_input(shape, itype);

Tensor output_c(shape, otype);
// dbias has the same data type with "output grad"
Tensor dbias({H}, itype);

Expand Down Expand Up @@ -132,20 +135,20 @@ void performTest(const size_t N, const size_t H) {
compareResults("output_dbias", dbias, ref_output_dbias.get(), true, atol_dbias, rtol_dbias);
}

std::vector<std::pair<size_t, size_t>> test_cases = {
std::vector<std::vector<size_t>> test_cases = {
{128, 128},
{256, 256},
{768, 1024},
{256, 65536},
{2048, 12288},
{65536, 128},
{65536, 160},
{16384, 6144},
{16384, 1616},
{1, 128},
{1, 1296},
{1, 16},
{5, 160},
{5, 4, 3, 160},
{217, 256},
};

Expand All @@ -154,7 +157,7 @@ std::vector<std::pair<size_t, size_t>> test_cases = {

class CastDBiasDGeluTestSuite : public ::testing::TestWithParam<std::tuple<transformer_engine::DType,
transformer_engine::DType,
std::pair<size_t, size_t>>> {};
std::vector<size_t>>> {};

TEST_P(CastDBiasDGeluTestSuite, TestCastDBiasDgelu) {
using namespace transformer_engine;
Expand All @@ -170,7 +173,7 @@ TEST_P(CastDBiasDGeluTestSuite, TestCastDBiasDgelu) {

TRANSFORMER_ENGINE_TYPE_SWITCH_ALL(input_type, InputType,
TRANSFORMER_ENGINE_TYPE_SWITCH_ALL(output_type, OutputType,
performTest<InputType, OutputType>(size.first, size.second);
performTest<InputType, OutputType>(size);
);
);
}
Expand All @@ -184,8 +187,10 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn(test_cases)),
[](const testing::TestParamInfo<CastDBiasDGeluTestSuite::ParamType>& info) {
std::string name = test::typeName(std::get<0>(info.param)) + "X" +
test::typeName(std::get<1>(info.param)) + "X" +
std::to_string(std::get<2>(info.param).first) + "X" +
std::to_string(std::get<2>(info.param).second);
test::typeName(std::get<1>(info.param));
const auto& shape = std::get<2>(info.param);
for ( const auto& s: shape) {
name += "X" + std::to_string(s);
}
return name;
});
47 changes: 32 additions & 15 deletions tests/cpp/operator/test_cast_gated_swiglu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,29 @@ void compute_ref_cast_dgated_swiglu(const IType * const grad,
}

template <typename IType, typename OType>
void performTest(const size_t rows, const size_t cols) {
void performTest(const std::vector<size_t>& shape) {
using namespace test;

DType itype = TypeInfo<IType>::dtype;
DType otype = TypeInfo<OType>::dtype;

Tensor grad({rows, cols}, itype);
Tensor input({rows, cols * 2}, itype);
Tensor output_c({rows, cols * 2}, otype);
std::vector<size_t> input_shape = shape;
input_shape[input_shape.size() - 1] *= 2;

const size_t input_size = product(input_shape);

const size_t rows = first_dimension(shape);
const size_t cols = last_dimension(shape);

Tensor grad(shape, itype);
Tensor input(input_shape, itype);
Tensor output_c(input_shape, otype);

fillUniform(&grad);
fillUniform(&input);
setRandomScale(&output_c);

std::unique_ptr<OType[]> ref_output_c = std::make_unique<OType[]>(rows * cols * 2);
std::unique_ptr<OType[]> ref_output_c = std::make_unique<OType[]>(input_size);

nvte_dswiglu(grad.data(), input.data(), output_c.data(), 0);
cudaDeviceSynchronize();
Expand Down Expand Up @@ -100,21 +108,28 @@ void performTest(const size_t rows, const size_t cols) {
compareResults("output_c", output_c, ref_output_c.get(), true, atol, rtol);
}

std::vector<std::pair<size_t, size_t>> test_cases = {
std::vector<std::vector<size_t>> test_cases = {
{128, 128},
{256, 256},
{768, 1024},
// {256, 65536},
// {2048, 12288},
// {65536, 128},
// {16384, 6144},
{256, 65536},
{2048, 12288},
{65536, 128},
{65536, 160},
{16384, 1616},
{1, 128},
{1, 1296},
{1, 16},
{5, 160},
{5, 4, 3, 160},
{217, 256},
};

} // namespace

class CastSwiGLUTestSuite
: public ::testing::TestWithParam<std::tuple<
transformer_engine::DType, transformer_engine::DType, std::pair<size_t, size_t>>> {};
transformer_engine::DType, transformer_engine::DType, std::vector<size_t>>> {};

TEST_P(CastSwiGLUTestSuite, TestCastSwiGLU) {
using namespace transformer_engine;
Expand All @@ -131,7 +146,7 @@ TEST_P(CastSwiGLUTestSuite, TestCastSwiGLU) {
TRANSFORMER_ENGINE_TYPE_SWITCH_ALL(
input_type, InputType,
TRANSFORMER_ENGINE_TYPE_SWITCH_ALL(
output_type, OutputType, performTest<InputType, OutputType>(size.first, size.second);););
output_type, OutputType, performTest<InputType, OutputType>(size);););
}

INSTANTIATE_TEST_SUITE_P(
Expand All @@ -142,8 +157,10 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn(test_cases)),
[](const testing::TestParamInfo<CastSwiGLUTestSuite::ParamType> &info) {
std::string name = test::typeName(std::get<0>(info.param)) + "X" +
test::typeName(std::get<1>(info.param)) + "X" +
std::to_string(std::get<2>(info.param).first) + "X" +
std::to_string(std::get<2>(info.param).second);
test::typeName(std::get<1>(info.param));
const auto& shape = std::get<2>(info.param);
for ( const auto& s: shape) {
name += "X" + std::to_string(s);
}
return name;
});
Loading

0 comments on commit 0e1ea26

Please sign in to comment.