From f5e3a4c7067f4882c5faa9c5224035ae3a188aba Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 19 Jan 2024 20:53:34 -0800 Subject: [PATCH 1/9] Change interior data model in select_locales_for_key --- provider/datagen/src/driver.rs | 84 +++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/provider/datagen/src/driver.rs b/provider/datagen/src/driver.rs index 3c0b5323d8e..5ffe8a3c038 100644 --- a/provider/datagen/src/driver.rs +++ b/provider/datagen/src/driver.rs @@ -482,28 +482,43 @@ fn select_locales_for_key( impl FnOnce() -> Result, >, ) -> Result, DataError> { - let mut result = provider - .supported_locales_for_key(key) - .map_err(|e| e.with_key(key))? - .into_iter() - .collect::>(); + // A map from langid to data locales. Keys that have aux keys or extension keywords + // may have multiple data locales per langid. + let mut supported_map: HashMap> = Default::default(); + for locale in provider.supported_locales_for_key(key).map_err(|e| e.with_key(key))? { + use std::collections::hash_map::Entry; + match supported_map.entry(locale.get_langid()) { + Entry::Occupied(mut entry) => { + entry.get_mut().insert(locale) + } + Entry::Vacant(entry) => { + entry.insert(Default::default()).insert(locale) + } + }; + } if key == icu_segmenter::provider::DictionaryForWordOnlyAutoV1Marker::KEY || key == icu_segmenter::provider::DictionaryForWordLineExtendedV1Marker::KEY { - result.retain(|locale| { - let model = crate::transform::segmenter::dictionary::data_locale_to_model_name(locale); - segmenter_models.iter().any(|m| Some(m.as_ref()) == model) + supported_map.retain(|_, locales| { + locales.retain(|locale| { + let model = crate::transform::segmenter::dictionary::data_locale_to_model_name(locale); + segmenter_models.iter().any(|m| Some(m.as_ref()) == model) + }); + !locales.is_empty() }); // Don't perform additional locale filtering - return Ok(result); + return Ok(supported_map.into_values().flatten().collect()); } else if key == icu_segmenter::provider::LstmForWordLineAutoV1Marker::KEY { - result.retain(|locale| { - let model = crate::transform::segmenter::lstm::data_locale_to_model_name(locale); - segmenter_models.iter().any(|m| Some(m.as_ref()) == model) + supported_map.retain(|_, locales| { + locales.retain(|locale| { + let model = crate::transform::segmenter::lstm::data_locale_to_model_name(locale); + segmenter_models.iter().any(|m| Some(m.as_ref()) == model) + }); + !locales.is_empty() }); // Don't perform additional locale filtering - return Ok(result); + return Ok(supported_map.into_values().flatten().collect()); } else if key == icu_collator::provider::CollationDataV1Marker::KEY || key == icu_collator::provider::CollationDiacriticsV1Marker::KEY || key == icu_collator::provider::CollationJamoV1Marker::KEY @@ -511,31 +526,35 @@ fn select_locales_for_key( || key == icu_collator::provider::CollationReorderingV1Marker::KEY || key == icu_collator::provider::CollationSpecialPrimariesV1Marker::KEY { - result.retain(|locale| { - let Some(collation) = locale - .get_unicode_ext(&key!("co")) - .and_then(|co| co.as_single_subtag().copied()) - else { - return true; - }; - additional_collations.contains(collation.as_str()) - || if collation.starts_with("search") { - additional_collations.contains("search*") - } else { - !["big5han", "gb2312"].contains(&collation.as_str()) - } + supported_map.retain(|_, locales| { + locales.retain(|locale| { + let Some(collation) = locale + .get_unicode_ext(&key!("co")) + .and_then(|co| co.as_single_subtag().copied()) + else { + return true; + }; + additional_collations.contains(collation.as_str()) + || if collation.starts_with("search") { + additional_collations.contains("search*") + } else { + !["big5han", "gb2312"].contains(&collation.as_str()) + } + }); + !locales.is_empty() }); } - result = match (locales, fallback) { + let result = match (locales, fallback) { // Case 1: `None` simply exports all supported locales for this key. - (None, _) => result, + (None, _) => supported_map.into_values().flatten().collect(), // Case 2: `FallbackMode::Preresolved` exports all supported locales whose langid matches // one of the explicit locales. This ensures extensions are included. In addition, any // explicit locales are added to the list, even if they themselves don't contain data; // fallback should be performed upon exporting. - (Some(explicit), FallbackMode::Preresolved) => result - .into_iter() + (Some(explicit), FallbackMode::Preresolved) => supported_map + .into_values() + .flatten() .chain(explicit.iter().map(|langid| langid.into())) .filter(|locale| explicit.contains(&locale.get_langid())) .collect(), @@ -557,8 +576,9 @@ fn select_locales_for_key( } } - result - .into_iter() + supported_map + .into_values() + .flatten() .chain(explicit.iter().cloned()) .filter(|locale_orig| { let mut locale = locale_orig.clone(); From 8eac07ad6c8d2a37b681343130323cbd65513ac6 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 19 Jan 2024 21:12:47 -0800 Subject: [PATCH 2/9] Propagate extensions to child locales; add tests --- provider/datagen/src/driver.rs | 27 ++++++++++++++++++-------- provider/datagen/tests/test-options.rs | 18 +++++++++++++---- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/provider/datagen/src/driver.rs b/provider/datagen/src/driver.rs index 5ffe8a3c038..f606a673ea0 100644 --- a/provider/datagen/src/driver.rs +++ b/provider/datagen/src/driver.rs @@ -552,26 +552,37 @@ fn select_locales_for_key( // one of the explicit locales. This ensures extensions are included. In addition, any // explicit locales are added to the list, even if they themselves don't contain data; // fallback should be performed upon exporting. - (Some(explicit), FallbackMode::Preresolved) => supported_map + (Some(explicit_langids), FallbackMode::Preresolved) => supported_map .into_values() .flatten() - .chain(explicit.iter().map(|langid| langid.into())) - .filter(|locale| explicit.contains(&locale.get_langid())) + .chain(explicit_langids.iter().map(|langid| langid.into())) + .filter(|locale| explicit_langids.contains(&locale.get_langid())) .collect(), // Case 3: All other modes resolve to the "ancestors and descendants" strategy. - (Some(explicit), _) => { - let include_und = explicit.contains(&LanguageIdentifier::UND); - let explicit: HashSet = explicit.iter().map(DataLocale::from).collect(); + (Some(explicit_langids), _) => { + let include_und = explicit_langids.contains(&LanguageIdentifier::UND); let mut implicit = HashSet::new(); // TODO: Make including the default locale configurable implicit.insert(DataLocale::default()); let fallbacker = fallbacker.as_ref().map_err(|e| *e)?; let fallbacker_with_config = fallbacker.for_config(key.fallback_config()); - for locale in explicit.iter() { - let mut iter = fallbacker_with_config.fallback_for(locale.clone()); + let mut explicit: HashSet = Default::default(); + for explicit_langid in explicit_langids.iter() { + explicit.insert(explicit_langid.into()); + let mut iter = fallbacker_with_config.fallback_for(explicit_langid.into()); while !iter.get().is_und() { implicit.insert(iter.get().clone()); + // Inherit aux keys and extension keywords from parent locales + let iter_langid = iter.get().get_langid(); + if let Some(locales) = supported_map.get(&iter_langid) { + implicit.extend(locales.iter().cloned()); // adds ar-u-nu-latn + for locale in locales { + let mut morphed_locale = locale.clone(); + morphed_locale.set_langid(explicit_langid.clone()); + explicit.insert(morphed_locale); // adds ar-SA-u-nu-latn + } + } iter.step(); } } diff --git a/provider/datagen/tests/test-options.rs b/provider/datagen/tests/test-options.rs index 976fe0a4671..444a65f3d65 100644 --- a/provider/datagen/tests/test-options.rs +++ b/provider/datagen/tests/test-options.rs @@ -290,6 +290,7 @@ fn explicit_hybrid() { .with_locales([ langid!("arc"), // Aramaic, not in supported list langid!("ar-EG"), + langid!("ar-SA"), langid!("en-GB"), langid!("es"), langid!("sr-ME"), @@ -323,12 +324,14 @@ fn explicit_hybrid() { ]), ); - // Explicit locales are "arc", "ar-EG", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" + // Explicit locales are "arc", "ar-EG", "ar-SA", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" let locales = [ "ar", // ancestor of ar-EG "ar-EG", // explicit locale "ar-EG-u-nu-latn", // descendant of ar-EG - // "ar-u-nu-latn", // ??? should this be included? + "ar-SA", // explicit locale, inheriting from ar + "ar-SA-u-nu-latn", // extensions should be included (#4533) + "ar-u-nu-latn", // extensions should be included (#4533) "arc", // Aramaic, inheriting from und "en", // ancestor of en-GB "en-001", // ancestor of en-GB @@ -356,6 +359,7 @@ fn explicit_runtime() { .with_locales([ langid!("arc"), // Aramaic, not in supported list langid!("ar-EG"), + langid!("ar-SA"), langid!("en-GB"), langid!("es"), langid!("sr-ME"), @@ -389,12 +393,15 @@ fn explicit_runtime() { ]), ); - // Explicit locales are "arc", "ar-EG", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" + // Explicit locales are "arc", "ar-EG", "ar-SA", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" let locales = [ "ar", // "ar-Arab-EG", (same as 'ar') // "ar-EG", (same as 'ar') "ar-EG-u-nu-latn", + // "ar-SA", (same as 'ar') + // "ar-SA-u-nu-latn", (same as 'ar-u-nu-latn') + "ar-u-nu-latn", // "arc", (same as 'und') // "en", (same as 'und') // "en-001", (same as 'und') @@ -420,6 +427,7 @@ fn explicit_preresolved() { .with_locales([ langid!("arc"), // Aramaic, not in supported list langid!("ar-EG"), + langid!("ar-SA"), langid!("en-GB"), langid!("es"), langid!("sr-ME"), @@ -453,10 +461,12 @@ fn explicit_preresolved() { ]), ); - // Explicit locales are "arc", "ar-EG", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" + // Explicit locales are "arc", "ar-EG", "ar-SA", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" let locales = [ "ar-EG", "ar-EG-u-nu-latn", // extensions included even in preresolved mode + "ar-SA", + // "ar-SA-u-nu-latn", // FIXME "arc", "en-GB", "es", From fa5984992263bf9d063c072845142961d8c700fc Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 19 Jan 2024 21:34:45 -0800 Subject: [PATCH 3/9] Refactor and make work in Preresolved mode --- provider/datagen/src/driver.rs | 108 ++++++++++++++++--------- provider/datagen/tests/test-options.rs | 10 +-- 2 files changed, 73 insertions(+), 45 deletions(-) diff --git a/provider/datagen/src/driver.rs b/provider/datagen/src/driver.rs index f606a673ea0..dff45d137a6 100644 --- a/provider/datagen/src/driver.rs +++ b/provider/datagen/src/driver.rs @@ -468,13 +468,61 @@ impl DatagenDriver { } } +struct ExplicitImplicitLocaleSets { + explicit: HashSet, + implicit: HashSet, +} + +fn make_explicit_implicit_sets( + key: DataKey, + explicit_langids: &HashSet, + supported_map: &HashMap>, + fallbacker: &Lazy< + Result, + impl FnOnce() -> Result, + >, +) -> Result { + let mut implicit = HashSet::new(); + // TODO: Make including the default locale configurable + implicit.insert(DataLocale::default()); + + let mut explicit: HashSet = Default::default(); + for explicit_langid in explicit_langids.iter() { + explicit.insert(explicit_langid.into()); + if let Some(locales) = supported_map.get(&explicit_langid) { + explicit.extend(locales.iter().cloned()); // adds ar-EG-u-nu-latn + } + if explicit_langid == &LanguageIdentifier::UND { + continue; + } + let fallbacker = fallbacker.as_ref().map_err(|e| *e)?; + let fallbacker_with_config = fallbacker.for_config(key.fallback_config()); + let mut iter = fallbacker_with_config.fallback_for(explicit_langid.into()); + while !iter.get().is_und() { + implicit.insert(iter.get().clone()); + // Inherit aux keys and extension keywords from parent locales + let iter_langid = iter.get().get_langid(); + if let Some(locales) = supported_map.get(&iter_langid) { + implicit.extend(locales.iter().cloned()); // adds ar-u-nu-latn + for locale in locales { + let mut morphed_locale = locale.clone(); + morphed_locale.set_langid(explicit_langid.clone()); + explicit.insert(morphed_locale); // adds ar-SA-u-nu-latn + } + } + iter.step(); + } + } + Ok(ExplicitImplicitLocaleSets { explicit, implicit }) +} + /// Selects the maximal set of locales to export based on a [`DataKey`] and this datagen /// provider's options bag. The locales may be later optionally deduplicated for fallback. fn select_locales_for_key( provider: &dyn ExportableProvider, key: DataKey, fallback: FallbackMode, - locales: Option<&HashSet>, + explicit_langids: Option<&HashSet>, additional_collations: &HashSet, segmenter_models: &[String], fallbacker: &Lazy< @@ -485,15 +533,14 @@ fn select_locales_for_key( // A map from langid to data locales. Keys that have aux keys or extension keywords // may have multiple data locales per langid. let mut supported_map: HashMap> = Default::default(); - for locale in provider.supported_locales_for_key(key).map_err(|e| e.with_key(key))? { + for locale in provider + .supported_locales_for_key(key) + .map_err(|e| e.with_key(key))? + { use std::collections::hash_map::Entry; match supported_map.entry(locale.get_langid()) { - Entry::Occupied(mut entry) => { - entry.get_mut().insert(locale) - } - Entry::Vacant(entry) => { - entry.insert(Default::default()).insert(locale) - } + Entry::Occupied(mut entry) => entry.get_mut().insert(locale), + Entry::Vacant(entry) => entry.insert(Default::default()).insert(locale), }; } @@ -502,7 +549,8 @@ fn select_locales_for_key( { supported_map.retain(|_, locales| { locales.retain(|locale| { - let model = crate::transform::segmenter::dictionary::data_locale_to_model_name(locale); + let model = + crate::transform::segmenter::dictionary::data_locale_to_model_name(locale); segmenter_models.iter().any(|m| Some(m.as_ref()) == model) }); !locales.is_empty() @@ -545,48 +593,28 @@ fn select_locales_for_key( }); } - let result = match (locales, fallback) { + let result = match (explicit_langids, fallback) { // Case 1: `None` simply exports all supported locales for this key. (None, _) => supported_map.into_values().flatten().collect(), // Case 2: `FallbackMode::Preresolved` exports all supported locales whose langid matches // one of the explicit locales. This ensures extensions are included. In addition, any // explicit locales are added to the list, even if they themselves don't contain data; // fallback should be performed upon exporting. - (Some(explicit_langids), FallbackMode::Preresolved) => supported_map - .into_values() - .flatten() - .chain(explicit_langids.iter().map(|langid| langid.into())) - .filter(|locale| explicit_langids.contains(&locale.get_langid())) - .collect(), + (Some(explicit_langids), FallbackMode::Preresolved) => { + let ExplicitImplicitLocaleSets { explicit, .. } = + make_explicit_implicit_sets(key, explicit_langids, &supported_map, fallbacker)?; + explicit + } // Case 3: All other modes resolve to the "ancestors and descendants" strategy. (Some(explicit_langids), _) => { let include_und = explicit_langids.contains(&LanguageIdentifier::UND); - let mut implicit = HashSet::new(); - // TODO: Make including the default locale configurable - implicit.insert(DataLocale::default()); + + let ExplicitImplicitLocaleSets { explicit, implicit } = + make_explicit_implicit_sets(key, explicit_langids, &supported_map, fallbacker)?; + let fallbacker = fallbacker.as_ref().map_err(|e| *e)?; let fallbacker_with_config = fallbacker.for_config(key.fallback_config()); - let mut explicit: HashSet = Default::default(); - for explicit_langid in explicit_langids.iter() { - explicit.insert(explicit_langid.into()); - let mut iter = fallbacker_with_config.fallback_for(explicit_langid.into()); - while !iter.get().is_und() { - implicit.insert(iter.get().clone()); - // Inherit aux keys and extension keywords from parent locales - let iter_langid = iter.get().get_langid(); - if let Some(locales) = supported_map.get(&iter_langid) { - implicit.extend(locales.iter().cloned()); // adds ar-u-nu-latn - for locale in locales { - let mut morphed_locale = locale.clone(); - morphed_locale.set_langid(explicit_langid.clone()); - explicit.insert(morphed_locale); // adds ar-SA-u-nu-latn - } - } - iter.step(); - } - } - supported_map .into_values() .flatten() @@ -736,7 +764,7 @@ fn test_collation_filtering() { Some(&HashSet::from_iter([cas.language.clone()])), &HashSet::from_iter(cas.include_collations.iter().copied().map(String::from)), &[], - &once_cell::sync::Lazy::new(|| unreachable!()), + &once_cell::sync::Lazy::new(|| Ok(LocaleFallbacker::new_without_data())), ) .unwrap() .into_iter() diff --git a/provider/datagen/tests/test-options.rs b/provider/datagen/tests/test-options.rs index 444a65f3d65..e4790df18af 100644 --- a/provider/datagen/tests/test-options.rs +++ b/provider/datagen/tests/test-options.rs @@ -332,10 +332,10 @@ fn explicit_hybrid() { "ar-SA", // explicit locale, inheriting from ar "ar-SA-u-nu-latn", // extensions should be included (#4533) "ar-u-nu-latn", // extensions should be included (#4533) - "arc", // Aramaic, inheriting from und - "en", // ancestor of en-GB - "en-001", // ancestor of en-GB - "en-GB", // explicit locale not in supported locales + "arc", // Aramaic, inheriting from und + "en", // ancestor of en-GB + "en-001", // ancestor of en-GB + "en-GB", // explicit locale not in supported locales // "en-ZA", // not reachable "es", // explicit and supported "es-AR", // descendant of es @@ -466,7 +466,7 @@ fn explicit_preresolved() { "ar-EG", "ar-EG-u-nu-latn", // extensions included even in preresolved mode "ar-SA", - // "ar-SA-u-nu-latn", // FIXME + "ar-SA-u-nu-latn", // extensions included even in preresolved mode "arc", "en-GB", "es", From 68ec0480cb6a7e20733d1225acb8641021fcfbde Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 19 Jan 2024 21:41:48 -0800 Subject: [PATCH 4/9] Docs --- provider/datagen/src/driver.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/provider/datagen/src/driver.rs b/provider/datagen/src/driver.rs index dff45d137a6..c5da4303448 100644 --- a/provider/datagen/src/driver.rs +++ b/provider/datagen/src/driver.rs @@ -473,6 +473,10 @@ struct ExplicitImplicitLocaleSets { implicit: HashSet, } +/// Resolves the set of explicit langids and the supported locales into two sets of locales: +/// +/// - `explicit` contains the explicit langids but with aux keys and extension keywords included +/// - `implcit` contains any locale reachable by fallback from an `explicit` locale fn make_explicit_implicit_sets( key: DataKey, explicit_langids: &HashSet, From 7ef53b2755f10e04d54d936b5366886be49d7918 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 19 Jan 2024 21:42:11 -0800 Subject: [PATCH 5/9] cargo make testdata --- .../collator/data@1/ar-EG-u-co-compat.json | 653 ++++++++++++++++++ .../json/collator/data@1/es-AR-u-co-trad.json | 421 +++++++++++ .../collator/meta@1/ar-EG-u-co-compat.json | 3 + .../json/collator/meta@1/es-AR-u-co-trad.json | 3 + .../collator/reord@1/ar-EG-u-co-compat.json | 262 +++++++ .../datetime/patterns/time@1/en-ZA-x-f24.json | 3 + .../datetime/patterns/time@1/en-ZA-x-l24.json | 3 + .../datetime/patterns/time@1/en-ZA-x-m24.json | 3 + .../datetime/patterns/time@1/en-ZA-x-s24.json | 3 + .../datetime/patterns/time@1/es-AR-x-f12.json | 3 + .../datetime/patterns/time@1/es-AR-x-l12.json | 3 + .../datetime/patterns/time@1/es-AR-x-m12.json | 3 + .../datetime/patterns/time@1/es-AR-x-s12.json | 3 + .../tests/data/postcard/fingerprints.csv | 13 + 14 files changed, 1379 insertions(+) create mode 100644 provider/datagen/tests/data/json/collator/data@1/ar-EG-u-co-compat.json create mode 100644 provider/datagen/tests/data/json/collator/data@1/es-AR-u-co-trad.json create mode 100644 provider/datagen/tests/data/json/collator/meta@1/ar-EG-u-co-compat.json create mode 100644 provider/datagen/tests/data/json/collator/meta@1/es-AR-u-co-trad.json create mode 100644 provider/datagen/tests/data/json/collator/reord@1/ar-EG-u-co-compat.json create mode 100644 provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-f24.json create mode 100644 provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-l24.json create mode 100644 provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-m24.json create mode 100644 provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-s24.json create mode 100644 provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-f12.json create mode 100644 provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-l12.json create mode 100644 provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-m12.json create mode 100644 provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-s12.json diff --git a/provider/datagen/tests/data/json/collator/data@1/ar-EG-u-co-compat.json b/provider/datagen/tests/data/json/collator/data@1/ar-EG-u-co-compat.json new file mode 100644 index 00000000000..96cac81e831 --- /dev/null +++ b/provider/datagen/tests/data/json/collator/data@1/ar-EG-u-co-compat.json @@ -0,0 +1,653 @@ +{ + "trie": { + "header": { + "high_start": 65536, + "shifted12_high_start": 16, + "index3_null_offset": 26, + "data_null_offset": 0, + "null_value": 192, + "trie_type": "Small" + }, + "index": [ + 0, + 64, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 87, + 142, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 259, + 267, + 277, + 299, + 0, + 16, + 32, + 48, + 64, + 80, + 96, + 112, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 87, + 103, + 119, + 135, + 142, + 158, + 174, + 190, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 218, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 226, + 0, + 0, + 0, + 0, + 0, + 236, + 0, + 0, + 0, + 252, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 265, + 0, + 0, + 0, + 0, + 270, + 286, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 68, + 76, + 76, + 108, + 76, + 76, + 76, + 76, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 146, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 173, + 204, + 227, + 65518 + ], + "data": [ + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1714046469, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1727022597, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1727022598, + 1727022599, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1727022603, + 192, + 192, + 1727022602, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1714046471, + 1714046470, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1727022601, + 1727022600, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192 + ] + }, + "ces": [], + "ce32s": [], + "contexts": [] +} diff --git a/provider/datagen/tests/data/json/collator/data@1/es-AR-u-co-trad.json b/provider/datagen/tests/data/json/collator/data@1/es-AR-u-co-trad.json new file mode 100644 index 00000000000..bb0010ef083 --- /dev/null +++ b/provider/datagen/tests/data/json/collator/data@1/es-AR-u-co-trad.json @@ -0,0 +1,421 @@ +{ + "trie": { + "header": { + "high_start": 55296, + "shifted12_high_start": 14, + "index3_null_offset": 2, + "data_null_offset": 0, + "null_value": 192, + "trie_type": "Small" + }, + "index": [ + 0, + 64, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 146, + 154, + 164, + 186, + 0, + 16, + 32, + 48, + 64, + 80, + 96, + 112, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 140, + 0, + 0, + 0, + 0, + 0, + 68, + 76, + 76, + 76, + 76, + 76, + 76, + 76, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 114 + ], + "data": [ + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 2249, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 75977, + 192, + 165577, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 223433, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 272585, + 192, + 329417, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192 + ] + }, + "ces": [], + "ce32s": [], + "contexts": [ + 11776, + 1436, + 1, + 72, + 61186, + 1415, + 104, + 61186, + 1350, + 16384, + 1436, + 1, + 76, + 65535, + 16642, + 1415, + 108, + 65535, + 16642, + 1350, + 17408, + 1436, + 48, + 771, + 65535, + 17666, + 1414, + 11776, + 1285, + 48, + 104, + 61186, + 1285, + 16384, + 1285, + 48, + 108, + 65535, + 16642, + 1285, + 17408, + 1285, + 48, + 771, + 65535, + 17666, + 1285 + ] +} diff --git a/provider/datagen/tests/data/json/collator/meta@1/ar-EG-u-co-compat.json b/provider/datagen/tests/data/json/collator/meta@1/ar-EG-u-co-compat.json new file mode 100644 index 00000000000..5929352390a --- /dev/null +++ b/provider/datagen/tests/data/json/collator/meta@1/ar-EG-u-co-compat.json @@ -0,0 +1,3 @@ +{ + "bits": 41 +} diff --git a/provider/datagen/tests/data/json/collator/meta@1/es-AR-u-co-trad.json b/provider/datagen/tests/data/json/collator/meta@1/es-AR-u-co-trad.json new file mode 100644 index 00000000000..ebe110aba66 --- /dev/null +++ b/provider/datagen/tests/data/json/collator/meta@1/es-AR-u-co-trad.json @@ -0,0 +1,3 @@ +{ + "bits": 9 +} diff --git a/provider/datagen/tests/data/json/collator/reord@1/ar-EG-u-co-compat.json b/provider/datagen/tests/data/json/collator/reord@1/ar-EG-u-co-compat.json new file mode 100644 index 00000000000..7ae79ec8d6c --- /dev/null +++ b/provider/datagen/tests/data/json/collator/reord@1/ar-EG-u-co-compat.json @@ -0,0 +1,262 @@ +{ + "min_high_no_reorder": 1728053248, + "reorder_table": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 39, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255 + ], + "reorder_ranges": [] +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-f24.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-f24.json new file mode 100644 index 00000000000..f937308e275 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-f24.json @@ -0,0 +1,3 @@ +{ + "pattern": "HH:mm:ss v" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-l24.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-l24.json new file mode 100644 index 00000000000..f937308e275 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-l24.json @@ -0,0 +1,3 @@ +{ + "pattern": "HH:mm:ss v" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-m24.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-m24.json new file mode 100644 index 00000000000..9788145c306 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-m24.json @@ -0,0 +1,3 @@ +{ + "pattern": "HH:mm:ss" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-s24.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-s24.json new file mode 100644 index 00000000000..f360b85fc30 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-s24.json @@ -0,0 +1,3 @@ +{ + "pattern": "HH:mm" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-f12.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-f12.json new file mode 100644 index 00000000000..bb7442a1859 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-f12.json @@ -0,0 +1,3 @@ +{ + "pattern": "h:mm:ss a v" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-l12.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-l12.json new file mode 100644 index 00000000000..bb7442a1859 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-l12.json @@ -0,0 +1,3 @@ +{ + "pattern": "h:mm:ss a v" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-m12.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-m12.json new file mode 100644 index 00000000000..a874c4da57e --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-m12.json @@ -0,0 +1,3 @@ +{ + "pattern": "h:mm:ss a" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-s12.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-s12.json new file mode 100644 index 00000000000..7331777bec2 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-s12.json @@ -0,0 +1,3 @@ +{ + "pattern": "h:mm a" +} diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index 3fc601c0e9b..d7396a17f94 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -4,6 +4,7 @@ calendar/japanese@1, und, 111B, b31e52deaf52706f calendar/japanext@1, und, 5216B, 6c20e216c8cd6e41 collator/data@1, ar, 8267B, fce742b37324adbe collator/data@1, ar-EG, 8267B, fce742b37324adbe +collator/data@1, ar-EG-u-co-compat, 1888B, e7b7e3fda37b8565 collator/data@1, ar-u-co-compat, 1888B, e7b7e3fda37b8565 collator/data@1, bn, 1304B, 4b0a44d6a365bcd1 collator/data@1, bn-u-co-trad, 11083B, 1d78781818d5ec49 @@ -13,6 +14,7 @@ collator/data@1, en-001, 126799B, a0ea0e0eecc34e27 collator/data@1, en-ZA, 126799B, a0ea0e0eecc34e27 collator/data@1, es, 1064B, 53d5d15868ead10a collator/data@1, es-AR, 1064B, 53d5d15868ead10a +collator/data@1, es-AR-u-co-trad, 1130B, 7f0603bad1cbb60d collator/data@1, es-u-co-trad, 1130B, 7f0603bad1cbb60d collator/data@1, fil, 1088B, f4b69509fc410230 collator/data@1, fr, 126799B, a0ea0e0eecc34e27 @@ -47,6 +49,7 @@ collator/dia@1, und, 160B, 8ace760351a33687 collator/jamo@1, und, 1026B, 8554e65df2b9cfbb collator/meta@1, ar, 1B, 9208c26164ee7a99 collator/meta@1, ar-EG, 1B, 9208c26164ee7a99 +collator/meta@1, ar-EG-u-co-compat, 1B, 9208c26164ee7a99 collator/meta@1, ar-u-co-compat, 1B, 9208c26164ee7a99 collator/meta@1, bn, 1B, 9208c26164ee7a99 collator/meta@1, bn-u-co-trad, 1B, 9208c26164ee7a99 @@ -56,6 +59,7 @@ collator/meta@1, en-001, 1B, 667dd5401e6fd800 collator/meta@1, en-ZA, 1B, 667dd5401e6fd800 collator/meta@1, es, 1B, 9aab82d56f3b362e collator/meta@1, es-AR, 1B, 9aab82d56f3b362e +collator/meta@1, es-AR-u-co-trad, 1B, 9aab82d56f3b362e collator/meta@1, es-u-co-trad, 1B, 9aab82d56f3b362e collator/meta@1, fil, 1B, 9aab82d56f3b362e collator/meta@1, fr, 1B, 667dd5401e6fd800 @@ -72,6 +76,7 @@ collator/meta@1, und-u-co-eor, 1B, 667dd5401e6fd800 collator/prim@1, und, 10B, 792009c72825eaba collator/reord@1, ar, 264B, 556a25539c4da116 collator/reord@1, ar-EG, 264B, 556a25539c4da116 +collator/reord@1, ar-EG-u-co-compat, 264B, 556a25539c4da116 collator/reord@1, ar-u-co-compat, 264B, 556a25539c4da116 collator/reord@1, bn, 268B, 99752b8b1cb4c37b collator/reord@1, bn-u-co-trad, 268B, 99752b8b1cb4c37b @@ -1591,12 +1596,16 @@ datetime/patterns/time@1, en-001-x-s, 17B, dd691ff921592b5 datetime/patterns/time@1, en-001-x-s24, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, en-ZA-x-f, 23B, 6a41da8c43bf6f45 datetime/patterns/time@1, en-ZA-x-f12, 29B, 7cab8ab71d571c6 +datetime/patterns/time@1, en-ZA-x-f24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, en-ZA-x-l, 23B, 2244959498606494 datetime/patterns/time@1, en-ZA-x-l12, 29B, 7cab8ab71d571c6 +datetime/patterns/time@1, en-ZA-x-l24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, en-ZA-x-m, 17B, d037d51a86bbe2 datetime/patterns/time@1, en-ZA-x-m12, 23B, 72dd914cf7818843 +datetime/patterns/time@1, en-ZA-x-m24, 17B, d037d51a86bbe2 datetime/patterns/time@1, en-ZA-x-s, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, en-ZA-x-s12, 17B, dd691ff921592b5 +datetime/patterns/time@1, en-ZA-x-s24, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, en-x-f, 29B, eab82b7bd4db3d8d datetime/patterns/time@1, en-x-f24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, en-x-l, 29B, 513d9b72aeff08ed @@ -1606,12 +1615,16 @@ datetime/patterns/time@1, en-x-m24, 17B, d037d51a86bbe2 datetime/patterns/time@1, en-x-s, 17B, dd691ff921592b5 datetime/patterns/time@1, en-x-s24, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, es-AR-x-f, 29B, eab82b7bd4db3d8d +datetime/patterns/time@1, es-AR-x-f12, 29B, 7cab8ab71d571c6 datetime/patterns/time@1, es-AR-x-f24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, es-AR-x-l, 29B, 513d9b72aeff08ed +datetime/patterns/time@1, es-AR-x-l12, 29B, 7cab8ab71d571c6 datetime/patterns/time@1, es-AR-x-l24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, es-AR-x-m, 23B, 72dd914cf7818843 +datetime/patterns/time@1, es-AR-x-m12, 23B, 72dd914cf7818843 datetime/patterns/time@1, es-AR-x-m24, 17B, d037d51a86bbe2 datetime/patterns/time@1, es-AR-x-s, 17B, dd691ff921592b5 +datetime/patterns/time@1, es-AR-x-s12, 17B, dd691ff921592b5 datetime/patterns/time@1, es-AR-x-s24, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, es-x-f, 29B, c1bdced58fc9f02f datetime/patterns/time@1, es-x-f12, 29B, 7cab8ab71d571c6 From 878fc12919752ef68b2fe6511b2542d6fb25b7ca Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 19 Jan 2024 21:52:36 -0800 Subject: [PATCH 6/9] Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6875ad8976..21e9844fe00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Datagen shows elapsed time for keys that are slow to generate (https://github.com/unicode-org/icu4x/pull/4469) - Datagen performance improvement by caching supported locales (https://github.com/unicode-org/icu4x/pull/4470) - Never use fallback for baked segmentation data (https://github.com/unicode-org/icu4x/pull/4510) + - Propagate extension keywords and auxiliary keys to explicit locales in Hybrid and Preresolved modes (https://github.com/unicode-org/icu4x/pull/4533) - `icu_provider` - (Small breakage) `DataPayload::new_owned()` is no longer `const`, this was a mistake (https://github.com/unicode-org/icu4x/pull/4456) - `icu_provider_blob` From bb07edf57e5c8f796c95b7faf9df7f1213f62505 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 19 Jan 2024 21:53:31 -0800 Subject: [PATCH 7/9] clippy --- provider/datagen/src/driver.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/driver.rs b/provider/datagen/src/driver.rs index c5da4303448..246bbbcbc19 100644 --- a/provider/datagen/src/driver.rs +++ b/provider/datagen/src/driver.rs @@ -493,7 +493,7 @@ fn make_explicit_implicit_sets( let mut explicit: HashSet = Default::default(); for explicit_langid in explicit_langids.iter() { explicit.insert(explicit_langid.into()); - if let Some(locales) = supported_map.get(&explicit_langid) { + if let Some(locales) = supported_map.get(explicit_langid) { explicit.extend(locales.iter().cloned()); // adds ar-EG-u-nu-latn } if explicit_langid == &LanguageIdentifier::UND { From 636fb63f4d150e240958eca01281dfb2e941b291 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 19 Jan 2024 21:56:01 -0800 Subject: [PATCH 8/9] Fix changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21e9844fe00..bc7fe82c6ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ - Datagen shows elapsed time for keys that are slow to generate (https://github.com/unicode-org/icu4x/pull/4469) - Datagen performance improvement by caching supported locales (https://github.com/unicode-org/icu4x/pull/4470) - Never use fallback for baked segmentation data (https://github.com/unicode-org/icu4x/pull/4510) - - Propagate extension keywords and auxiliary keys to explicit locales in Hybrid and Preresolved modes (https://github.com/unicode-org/icu4x/pull/4533) + - Propagate extension keywords and auxiliary keys to explicit locales (https://github.com/unicode-org/icu4x/pull/4533) - `icu_provider` - (Small breakage) `DataPayload::new_owned()` is no longer `const`, this was a mistake (https://github.com/unicode-org/icu4x/pull/4456) - `icu_provider_blob` From 661326c876496ffbe6b49fc59bebe99482bb5027 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 23 Jan 2024 13:12:50 -0600 Subject: [PATCH 9/9] Add more docs --- provider/datagen/src/driver.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/driver.rs b/provider/datagen/src/driver.rs index 246bbbcbc19..568597d1098 100644 --- a/provider/datagen/src/driver.rs +++ b/provider/datagen/src/driver.rs @@ -475,8 +475,11 @@ struct ExplicitImplicitLocaleSets { /// Resolves the set of explicit langids and the supported locales into two sets of locales: /// -/// - `explicit` contains the explicit langids but with aux keys and extension keywords included -/// - `implcit` contains any locale reachable by fallback from an `explicit` locale +/// - `explicit` contains the explicit langids but with aux keys and extension keywords included. +/// For example, if `ar-SA` is requested (explicit langid), and `ar` and `ar-u-nu-latn` are supported, +/// then `ar-SA` and `ar-SA-u-nu-latn` will be returned as `explicit`. +/// - `implcit` contains all supported locales reachable by fallback from an `explicit` locale. +/// These locales can be included without increasing data payload size. fn make_explicit_implicit_sets( key: DataKey, explicit_langids: &HashSet,