From 0cb15c31d61519948a7b581567765856114699f6 Mon Sep 17 00:00:00 2001 From: Niels Saurer Date: Tue, 22 Aug 2023 14:04:32 +0000 Subject: [PATCH] addressing first part of review --- experimental/transliteration/data/Cargo.toml | 2 - experimental/transliteration/src/provider.rs | 8 +--- provider/datagen/src/source.rs | 9 +++- provider/datagen/src/transform/cldr/source.rs | 16 ++----- .../src/transform/cldr/transforms/mod.rs | 42 ++++++++++--------- 5 files changed, 35 insertions(+), 42 deletions(-) diff --git a/experimental/transliteration/data/Cargo.toml b/experimental/transliteration/data/Cargo.toml index bf4d286d85e..91eda14799c 100644 --- a/experimental/transliteration/data/Cargo.toml +++ b/experimental/transliteration/data/Cargo.toml @@ -24,5 +24,3 @@ include = [ "LICENSE", "README.md" ] - -[dependencies] diff --git a/experimental/transliteration/src/provider.rs b/experimental/transliteration/src/provider.rs index b9aaf1b9425..90eb8712810 100644 --- a/experimental/transliteration/src/provider.rs +++ b/experimental/transliteration/src/provider.rs @@ -126,13 +126,7 @@ pub struct Rule<'a> { } /// The special matchers and replacers used by this transliterator. -#[derive( - Debug, - Clone, - zerofrom::ZeroFrom, // QUESTION: does ZeroFrom need to be behind a feature? - PartialEq, - Eq, -)] +#[derive(Debug, Clone, zerofrom::ZeroFrom, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_transliteration::provider))] pub struct VarTable<'a> { diff --git a/provider/datagen/src/source.rs b/provider/datagen/src/source.rs index 35cb0a86f24..dc4145b130a 100644 --- a/provider/datagen/src/source.rs +++ b/provider/datagen/src/source.rs @@ -177,7 +177,7 @@ impl AbstractFs { Ok(()) } - pub(crate) fn read_to_buf(&self, path: &str) -> Result, DataError> { + fn read_to_buf(&self, path: &str) -> Result, DataError> { self.init()?; match self { Self::Fs(root) => { @@ -211,6 +211,13 @@ impl AbstractFs { } } + pub(crate) fn read_to_string(&self, path: &str) -> Result { + let vec = self.read_to_buf(path)?; + let s = String::from_utf8(vec) + .map_err(|e| DataError::custom("Invalid UTF-8").with_display_context(&e))?; + Ok(s) + } + fn list(&self, path: &str) -> Result, DataError> { self.init()?; Ok(match self { diff --git a/provider/datagen/src/transform/cldr/source.rs b/provider/datagen/src/transform/cldr/source.rs index 06be6e333c0..2c43b2204a7 100644 --- a/provider/datagen/src/transform/cldr/source.rs +++ b/provider/datagen/src/transform/cldr/source.rs @@ -238,24 +238,16 @@ impl<'a> CldrDirTransform<'a> { transform: &str, ) -> Result<&'a crate::transform::cldr::cldr_serde::transforms::Resource, DataError> { let dir_suffix = self.0.dir_suffix()?; - let path = format!("{}-{dir_suffix}/main/{transform}/metadata.json", self.1); - if self.0.serde_cache.file_exists(&path)? { - self.0.serde_cache.read_and_parse_json(&path) - } else { - Err(DataErrorKind::Io(std::io::ErrorKind::NotFound) - .into_error() - .with_display_context(&path)) - } + // using the -NoLang version because `transform` is not a valid LanguageIdentifier + let cldr_dir = CldrDirNoLang(self.0, format!("{}-{dir_suffix}/main/{transform}", self.1)); + cldr_dir.read_and_parse("metadata.json") } pub fn read_source(&self, transform: &str) -> Result { let dir_suffix = self.0.dir_suffix()?; let path = format!("{}-{dir_suffix}/main/{transform}/source.txt", self.1); if self.0.serde_cache.file_exists(&path)? { - let buf = self.0.serde_cache.root.read_to_buf(&path)?; - let s = String::from_utf8(buf) - .map_err(|e| DataError::custom("Source UTF-8 checking").with_display_context(&e))?; - Ok(s) + self.0.serde_cache.root.read_to_string(&path) } else { Err(DataErrorKind::Io(std::io::ErrorKind::NotFound) .into_error() diff --git a/provider/datagen/src/transform/cldr/transforms/mod.rs b/provider/datagen/src/transform/cldr/transforms/mod.rs index 0e779872dab..fda7a0496d9 100644 --- a/provider/datagen/src/transform/cldr/transforms/mod.rs +++ b/provider/datagen/src/transform/cldr/transforms/mod.rs @@ -21,23 +21,23 @@ impl<'a> TransliteratorCollection<'a> { } /// Given an internal ID for an existing transliterator, returns the directory name for a - /// source that maps to the given internal ID. Additionally returns `true` if this is a - /// forwards transliterator, or `false` if it is a backwards transliterator. + /// source that maps to the given internal ID. Additionally returns the direction (relative to the metadata) + /// of the passed internal ID. fn lookup_dir_from_internal_id( &self, internal_id: &str, - ) -> Result, DataError> { + ) -> Result, DataError> { for transform in self.cldr_transforms.list_transforms()? { let metadata = self.cldr_transforms.read_and_parse_metadata(&transform)?; let (forwards, backwards) = internal_ids_from_metadata(metadata); if let Some(forwards) = forwards { if forwards == internal_id { - return Ok(Some((transform, true))); + return Ok(Some((transform, icu_transliterator_parser::Direction::Forward))); } } if let Some(backwards) = backwards { if backwards == internal_id { - return Ok(Some((transform, false))); + return Ok(Some((transform, icu_transliterator_parser::Direction::Reverse))); } } } @@ -99,7 +99,7 @@ impl DataProvider for crate::DatagenProvider { // our `supported_locales` use the same mapping mechanism as in lookup_dir_from_internal_id #[allow(clippy::unwrap_used)] - let (transform, is_forwards) = tc.lookup_dir_from_internal_id(&internal_id)?.unwrap(); + let (transform, want_direction) = tc.lookup_dir_from_internal_id(&internal_id)?.unwrap(); let metadata = self .cldr()? @@ -119,24 +119,26 @@ impl DataProvider for crate::DatagenProvider { let source = self.cldr()?.transforms().read_source(&transform)?; - let dir = if is_forwards { - icu_transliterator_parser::Direction::Forward - } else { - icu_transliterator_parser::Direction::Reverse - }; let (forwards, backwards) = - icu_transliterator_parser::parse_unstable(&source, dir, metadata, mapping, self) + icu_transliterator_parser::parse_unstable(&source, want_direction, metadata, mapping, self) .map_err(|e| { DataError::custom("transliterator parsing failed").with_debug_context(&e) })?; - let transliterator = if is_forwards { - // the parser guarantees we receive this - #[allow(clippy::unwrap_used)] - forwards.unwrap() - } else { - // the parser guarantees we receive this - #[allow(clippy::unwrap_used)] - backwards.unwrap() + let transliterator = match want_direction { + icu_transliterator_parser::Direction::Forward => { + // the parser guarantees we receive this + #[allow(clippy::unwrap_used)] + forwards.unwrap() + }, + icu_transliterator_parser::Direction::Reverse => { + // the parser guarantees we receive this + #[allow(clippy::unwrap_used)] + backwards.unwrap() + } + _ => { + // unreachable because `lookup_dir_from_internal_id` only ever returns one direction. + unreachable!("unexpected want_direction") + } }; Ok(DataResponse {