From 35c97787816951cf43bbcd2b6f74f2a49a34449c Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 11 Feb 2025 22:54:15 +0000 Subject: [PATCH] ICU-22781 Support Arbitrary Constant Unit Formatting See #3381 --- icu4c/source/i18n/measunit_extra.cpp | 20 +++++++- icu4c/source/i18n/number_longnames.cpp | 50 +++++++++++++++++- icu4c/source/test/intltest/numbertest.h | 1 + icu4c/source/test/intltest/numbertest_api.cpp | 51 +++++++++++++++++++ .../java/com/ibm/icu/util/MeasureUnit.java | 14 ++++- 5 files changed, 131 insertions(+), 5 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index dcba02129a94..8eb9fe551670 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -1322,7 +1322,7 @@ void MeasureUnitImpl::serialize(UErrorCode &status) { return; } - if (this->singleUnits.length() == 0) { + if (this->singleUnits.length() == 0 && this->constantDenominator == 0) { // Dimensionless, constructed by the default constructor. return; } @@ -1497,9 +1497,25 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) { impl.appendSingleUnit(*otherImpl.singleUnits[i], status); } - if (impl.singleUnits.length() > 1) { + + uint64_t currentConstatDenominator = this->getConstantDenominator(status); + uint64_t otherConstantDenominator = other.getConstantDenominator(status); + + // TODO: we can also multiply the constant denominators instead of returning an error. + if (currentConstatDenominator != 0 && otherConstantDenominator != 0) { + // There is only `one` constant denominator in a compound unit. + // Therefore, we Cannot multiply units that both of them have a constant denominator + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + + // Because either one of the constant denominators is zero, we can use the maximum of them. + impl.constantDenominator = uprv_max(currentConstatDenominator, otherConstantDenominator); + + if (impl.singleUnits.length() > 1 || impl.constantDenominator > 0) { impl.complexity = UMEASURE_UNIT_COMPOUND; } + return std::move(impl).build(status); } diff --git a/icu4c/source/i18n/number_longnames.cpp b/icu4c/source/i18n/number_longnames.cpp index de6aad7c3e8a..bf2617e773d0 100644 --- a/icu4c/source/i18n/number_longnames.cpp +++ b/icu4c/source/i18n/number_longnames.cpp @@ -48,8 +48,12 @@ constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; * Gender of the word, in languages with grammatical gender. */ constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2; +/** + * Denominator constant of the unit. + */ +constexpr int32_t CONSTANT_DENOMINATOR_INDEX = StandardPlural::Form::COUNT + 3; // Number of keys in the array populated by PluralTableSink. -constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3; +constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 4; // TODO(icu-units#28): load this list from resources, after creating a "&set" // function for use in ldml2icu rules. @@ -1010,6 +1014,11 @@ void LongNameHandler::forArbitraryUnit(const Locale &loc, // denominator (the part after the "-per-). If both are empty, fail MeasureUnitImpl unit; MeasureUnitImpl perUnit; + + if (unitRef.getConstantDenominator(status) != 0) { + perUnit.constantDenominator = unitRef.getConstantDenominator(status); + } + { MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status); if (U_FAILURE(status)) { @@ -1196,6 +1205,12 @@ void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit, DerivedComponents derivedTimesCases(loc, "case", "times"); DerivedComponents derivedPowerCases(loc, "case", "power"); + if (productUnit.constantDenominator != 0) { + CharString constantString; + constantString.appendNumber(productUnit.constantDenominator, status); + outArray[CONSTANT_DENOMINATOR_INDEX] = UnicodeString::fromUTF8(constantString.toStringPiece()); + } + // 4. For each single_unit in product_unit for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length(); singleUnitIndex++) { @@ -1454,6 +1469,39 @@ void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit, } } } + + // 5. Handling constant denominator if it exists. + if (productUnit.constantDenominator != 0) { + int32_t pluralIndex = -1; + for (int32_t index = 0; index < StandardPlural::Form::COUNT; index++) { + if (!outArray[index].isBogus()) { + pluralIndex = index; + break; + } + } + + U_ASSERT(pluralIndex >= 0); // "No plural form found for constant denominator" + + // TODO(ICU-23039): + // Improve the handling of constant_denominator representation. + // For instance, a constant_denominator of 1000000 should be adaptable to + // formats like + // 1,000,000, 1e6, or 1 million. + // Furthermore, ensure consistent pluralization rules for units. For example, + // "meter per 100 seconds" should be evaluated for correct singular/plural + // usage: "second" or "seconds"? + // Similarly, "kilogram per 1000 meters" should be checked for "meter" or + // "meters"? + if (outArray[pluralIndex].length() == 0) { + outArray[pluralIndex] = outArray[CONSTANT_DENOMINATOR_INDEX]; + } else { + UnicodeString tmp; + timesPatternFormatter.format(outArray[CONSTANT_DENOMINATOR_INDEX], outArray[pluralIndex], + tmp, status); + outArray[pluralIndex] = tmp; + } + } + for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { if (globalPlaceholder[pluralIndex] == PH_BEGINNING) { UnicodeString tmp; diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index fe0d63393006..f386eb2fda52 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -103,6 +103,7 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition { void toDecimalNumber(); void microPropsInternals(); void formatUnitsAliases(); + void formatArbitraryConstant(); void TestPortionFormat(); void testIssue22378(); diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index b44997a98168..c1c2b8082a6b 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -133,6 +133,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha TESTCASE_AUTO(toDecimalNumber); TESTCASE_AUTO(microPropsInternals); TESTCASE_AUTO(formatUnitsAliases); + TESTCASE_AUTO(formatArbitraryConstant); TESTCASE_AUTO(TestPortionFormat); TESTCASE_AUTO(testIssue22378); TESTCASE_AUTO_END; @@ -6086,6 +6087,56 @@ void NumberFormatterApiTest::formatUnitsAliases() { } } +void NumberFormatterApiTest::formatArbitraryConstant() { + IcuTestErrorCode status(*this, "formatArbitraryConstant"); + + struct TestCase { + const char *unitIdentifier; + int32_t inputValue; + UNumberUnitWidth width; + Locale locale; + const UnicodeString expectedOutput; + } testCases[]{ + {"meter-per-kelvin-second", 2, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getEnglish(), + "2 meters per second-kelvin"}, + {"meter-per-100-kelvin-second", 3, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getEnglish(), + u"3 meters per 100-second-kelvin"}, + {"meter-per-1000", 1, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getEnglish(), u"1 meter per 1000"}, + {"meter-per-1000-second", 1, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getEnglish(), + u"1 meter per 1000-second"}, + {"meter-per-1000-second-kelvin", 1, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getEnglish(), + u"1 meter per 1000-second-kelvin"}, + {"meter-per-1-second-kelvin-per-kilogram", 1, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getEnglish(), + u"1 meter per 1-kilogram-second-kelvin"}, + {"meter-second-per-kilogram-kelvin", 1, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getEnglish(), + u"1 meter-second per kilogram-kelvin"}, + {"meter-second-per-1000-kilogram-kelvin", 1, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getEnglish(), + u"1 meter-second per 1000-kilogram-kelvin"}, + {"meter-second-per-1000-kilogram-kelvin", 1, UNUM_UNIT_WIDTH_SHORT, Locale::getEnglish(), + u"1 m⋅sec/1000⋅kg⋅K"}, + {"meter-second-per-1000-kilogram-kelvin", 1, UNUM_UNIT_WIDTH_FULL_NAME, Locale::getGerman(), + u"1 Meter⋅Sekunde pro 1000⋅Kilogramm⋅Kelvin"}, + {"meter-second-per-1000-kilogram-kelvin", 1, UNUM_UNIT_WIDTH_SHORT, Locale::getGerman(), + u"1 m⋅Sek./1000⋅kg⋅K"}, + }; + + for (auto testCase : testCases) { + auto unit = MeasureUnit::forIdentifier(testCase.unitIdentifier, status); + UnicodeString actualFormat = NumberFormatter::withLocale(testCase.locale) + .unit(unit) + .unitWidth(testCase.width) + .formatDouble(testCase.inputValue, status) + .toString(status); + + if (status.errIfFailureAndReset()) { + continue; + } + + assertEquals(UnicodeString("test arbitrary constant \"") + testCase.unitIdentifier + "\"", + testCase.expectedOutput, actualFormat); + } +} + void NumberFormatterApiTest::TestPortionFormat() { IcuTestErrorCode status(*this, "TestPortionFormat"); diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/util/MeasureUnit.java b/icu4j/main/core/src/main/java/com/ibm/icu/util/MeasureUnit.java index 2c72e1116b3e..4ad82786f297 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/util/MeasureUnit.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/util/MeasureUnit.java @@ -716,12 +716,22 @@ public MeasureUnit product(MeasureUnit other) { implCopy.appendSingleUnit(singleUnit); } - if (this.getConstantDenominator() != 0 && other.getConstantDenominator() != 0) { + long thisConstantDenominator = this.getConstantDenominator(); + long otherConstantDenominator = other.getConstantDenominator(); + + // TODO: we can also multiply the constant denominators instead of throwing an + // exception. + if (thisConstantDenominator != 0 && otherConstantDenominator != 0) { + // There is only `one` constant denominator in a compound unit. + // Therefore, we cannot multiply units that both of them have a constant + // denominator. throw new UnsupportedOperationException( "Cannot multiply units that both of them have a constant denominator"); } - implCopy.setConstantDenominator(this.getConstantDenominator() + other.getConstantDenominator()); + // Because either one of the constant denominators is zero, we can use the + // maximum of them. + implCopy.setConstantDenominator(Math.max(thisConstantDenominator, otherConstantDenominator)); return implCopy.build(); }