From 00f8e00492acf94902773de1d574499ed4ee9f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Thu, 15 Aug 2024 01:38:41 +0000 Subject: [PATCH] Split default icu data into lazily deserialized parts (#3948) * Split default icu data into lazily deserialized parts * FIx no_std compilation * Lazily load more ICU tools * Fix regressions and use more stable constructors --- Cargo.lock | 11 +- core/engine/src/builtins/intl/collator/mod.rs | 33 +++-- .../src/builtins/intl/list_format/mod.rs | 44 ++++--- core/engine/src/builtins/intl/locale/mod.rs | 16 ++- core/engine/src/builtins/intl/locale/tests.rs | 11 +- core/engine/src/builtins/intl/locale/utils.rs | 23 ++-- .../src/builtins/intl/number_format/mod.rs | 30 +++-- .../src/builtins/intl/plural_rules/mod.rs | 28 ++-- .../engine/src/builtins/intl/segmenter/mod.rs | 41 ++++-- core/engine/src/builtins/string/mod.rs | 19 ++- core/engine/src/context/icu.rs | 123 ++++++++++++------ core/engine/src/context/mod.rs | 5 +- core/icu_provider/Cargo.toml | 10 ++ core/icu_provider/data/icu_casemap.postcard | Bin 0 -> 23402 bytes core/icu_provider/data/icu_collator.postcard | Bin 0 -> 1122330 bytes ...icudata.postcard => icu_datetime.postcard} | Bin 16164195 -> 10624417 bytes core/icu_provider/data/icu_decimal.postcard | Bin 0 -> 4346 bytes core/icu_provider/data/icu_list.postcard | Bin 0 -> 17322 bytes .../data/icu_locid_transform.postcard | Bin 0 -> 96289 bytes .../icu_provider/data/icu_normalizer.postcard | Bin 0 -> 105473 bytes core/icu_provider/data/icu_plurals.postcard | Bin 0 -> 6290 bytes core/icu_provider/data/icu_segmenter.postcard | Bin 0 -> 4152706 bytes core/icu_provider/src/lib.rs | 97 ++++++++++++-- tools/gen-icu4x-data/Cargo.toml | 1 - tools/gen-icu4x-data/src/main.rs | 100 +++++++------- 25 files changed, 385 insertions(+), 207 deletions(-) create mode 100644 core/icu_provider/data/icu_casemap.postcard create mode 100644 core/icu_provider/data/icu_collator.postcard rename core/icu_provider/data/{icudata.postcard => icu_datetime.postcard} (61%) create mode 100644 core/icu_provider/data/icu_decimal.postcard create mode 100644 core/icu_provider/data/icu_list.postcard create mode 100644 core/icu_provider/data/icu_locid_transform.postcard create mode 100644 core/icu_provider/data/icu_normalizer.postcard create mode 100644 core/icu_provider/data/icu_plurals.postcard create mode 100644 core/icu_provider/data/icu_segmenter.postcard diff --git a/Cargo.lock b/Cargo.lock index 8e454872306..7f208113935 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -443,10 +443,20 @@ dependencies = [ name = "boa_icu_provider" version = "0.19.0" dependencies = [ + "icu_casemap", + "icu_collator", + "icu_datetime", + "icu_decimal", + "icu_list", + "icu_locid_transform", + "icu_normalizer", + "icu_plurals", "icu_provider", "icu_provider_adapters", "icu_provider_blob", + "icu_segmenter", "once_cell", + "paste", ] [[package]] @@ -1364,7 +1374,6 @@ dependencies = [ "icu_locid_transform", "icu_normalizer", "icu_plurals", - "icu_provider", "icu_segmenter", "log", "simple_logger", diff --git a/core/engine/src/builtins/intl/collator/mod.rs b/core/engine/src/builtins/intl/collator/mod.rs index 2181de61820..ea700d78b95 100644 --- a/core/engine/src/builtins/intl/collator/mod.rs +++ b/core/engine/src/builtins/intl/collator/mod.rs @@ -17,7 +17,7 @@ use crate::{ OrdinaryObject, }, context::{ - icu::IntlProvider, + icu::{ErasedProvider, IntlProvider}, intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, }, js_string, @@ -279,7 +279,7 @@ impl BuiltInConstructor for Collator { requested_locales, &mut intl_options, context.intl_provider(), - ); + )?; let collator_locale = { // `collator_locale` needs to be different from the resolved locale because ECMA402 doesn't @@ -335,18 +335,23 @@ impl BuiltInConstructor for Collator { .then_some((AlternateHandling::Shifted, MaxVariable::Punctuation)) .unzip(); - let collator = - icu_collator::Collator::try_new_unstable(context.intl_provider(), &collator_locale, { - let mut options = icu_collator::CollatorOptions::new(); - options.strength = strength; - options.case_level = case_level; - options.case_first = case_first; - options.numeric = Some(if numeric { Numeric::On } else { Numeric::Off }); - options.alternate_handling = alternate_handling; - options.max_variable = max_variable; - options - }) - .map_err(|e| JsNativeError::typ().with_message(e.to_string()))?; + let mut options = icu_collator::CollatorOptions::new(); + options.strength = strength; + options.case_level = case_level; + options.case_first = case_first; + options.numeric = Some(if numeric { Numeric::On } else { Numeric::Off }); + options.alternate_handling = alternate_handling; + options.max_variable = max_variable; + + let collator = match context.intl_provider().erased_provider() { + ErasedProvider::Any(a) => { + icu_collator::Collator::try_new_with_any_provider(a, &collator_locale, options) + } + ErasedProvider::Buffer(b) => { + icu_collator::Collator::try_new_with_buffer_provider(b, &collator_locale, options) + } + } + .map_err(|e| JsNativeError::typ().with_message(e.to_string()))?; let prototype = get_prototype_from_constructor(new_target, StandardConstructors::collator, context)?; diff --git a/core/engine/src/builtins/intl/list_format/mod.rs b/core/engine/src/builtins/intl/list_format/mod.rs index 58695ab1e5c..94c838326d2 100644 --- a/core/engine/src/builtins/intl/list_format/mod.rs +++ b/core/engine/src/builtins/intl/list_format/mod.rs @@ -12,7 +12,10 @@ use crate::{ options::{get_option, get_options_object}, Array, BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject, OrdinaryObject, }, - context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, + context::{ + icu::ErasedProvider, + intrinsics::{Intrinsics, StandardConstructor, StandardConstructors}, + }, js_string, object::{internal_methods::get_prototype_from_constructor, JsObject}, property::Attribute, @@ -128,7 +131,7 @@ impl BuiltInConstructor for ListFormat { ..Default::default() }, context.intl_provider(), - ); + )?; // 11. Let type be ? GetOption(options, "type", string, « "conjunction", "disjunction", "unit" », "conjunction"). // 12. Set listFormat.[[Type]] to type. @@ -142,23 +145,26 @@ impl BuiltInConstructor for ListFormat { // 16. Let dataLocaleData be localeData.[[]]. // 17. Let dataLocaleTypes be dataLocaleData.[[]]. // 18. Set listFormat.[[Templates]] to dataLocaleTypes.[[