From c97b87f723c479ade04783471e4dc97b37a70da2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 24 Oct 2023 15:53:30 +0200 Subject: [PATCH] Remove traces --- components/segmenter/src/lib.rs | 2 +- components/segmenter/src/rule_segmenter.rs | 9 --------- components/segmenter/src/sentence.rs | 6 ------ components/segmenter/tests/spec_test.rs | 3 +-- provider/datagen/src/transform/segmenter/mod.rs | 3 --- 5 files changed, 2 insertions(+), 21 deletions(-) diff --git a/components/segmenter/src/lib.rs b/components/segmenter/src/lib.rs index 74ef588e749..b286c4e312d 100644 --- a/components/segmenter/src/lib.rs +++ b/components/segmenter/src/lib.rs @@ -100,7 +100,7 @@ //! See [`SentenceSegmenter`] for more examples. // https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations -//#![cfg_attr(not(any(test, feature = "std")), no_std)] +#![cfg_attr(not(any(test, feature = "std")), no_std)] #![cfg_attr( not(test), deny( diff --git a/components/segmenter/src/rule_segmenter.rs b/components/segmenter/src/rule_segmenter.rs index dbf1a740e44..c70986fa1b0 100644 --- a/components/segmenter/src/rule_segmenter.rs +++ b/components/segmenter/src/rule_segmenter.rs @@ -105,7 +105,6 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<' let left_prop = self.get_break_property(left_codepoint); self.advance_iter(); - let right_codepoint = self.get_current_codepoint().map_or("????".to_string(), |c| format!("U+{:02X}", c.into())); let Some(right_prop) = self.get_current_break_property() else { self.boundary_property = left_prop; return Some(self.len); @@ -127,9 +126,6 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<' // If break_state is equals or grater than 0, it is alias of property. let mut break_state = self.get_break_state_from_table(left_prop, right_prop); - let STATE_NAMES = ["Unknown", "CR", "LF", "Extend", "Sep", "Format", "Sp", "Lower", "Upper", "OLetter", "Numeric", "ATerm", "SContinue", "STerm", "Close", "ATerm_Close", "ATerm_Close_Sp", "STerm_Close", "STerm_Close_Sp", "Upper_ATerm", "Lower_ATerm", "ATerm_Close_Sp_SB8", "ATerm_Close_Sp_ParaSep", "ATerm_Close_Sp_CR", "STerm_Close_Sp_ParaSep", "STerm_Close_Sp_CR", "sot", "eot"]; - println!("left={:02X} right={:02X} {} state={:02X}", left_prop, right_prop, right_codepoint, break_state); - println!("left={} right={} {}", STATE_NAMES[left_prop as usize], STATE_NAMES[right_prop as usize], right_codepoint); if break_state >= 0 { // This isn't simple rule set. We need marker to restore iterator to previous position. @@ -137,9 +133,7 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<' let mut previous_pos_data = self.current_pos_data; let mut previous_left_prop = left_prop; - println!("Inner loop"); if (break_state & INTERMEDIATE_MATCH_RULE) != 0 { - println!("going through intermediate match rule"); break_state -= INTERMEDIATE_MATCH_RULE; } loop { @@ -163,8 +157,6 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<' let previous_break_state = break_state; break_state = self.get_break_state_from_table(break_state as u8, prop); - println!("> left={:02X} right={:02X} state={:02X}", previous_break_state, prop, break_state); - println!("> left={} right={}", STATE_NAMES[(previous_break_state & !INTERMEDIATE_MATCH_RULE) as usize], STATE_NAMES[prop as usize]); if break_state < 0 { break; } @@ -177,7 +169,6 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<' previous_left_prop = break_state as u8; } if (break_state & INTERMEDIATE_MATCH_RULE) != 0 { - println!("going through intermediate match rule"); break_state -= INTERMEDIATE_MATCH_RULE; previous_iter = self.iter.clone(); previous_pos_data = self.current_pos_data; diff --git a/components/segmenter/src/sentence.rs b/components/segmenter/src/sentence.rs index 805297a7ba5..05173f9eb5f 100644 --- a/components/segmenter/src/sentence.rs +++ b/components/segmenter/src/sentence.rs @@ -28,12 +28,6 @@ pub struct SentenceBreakIterator<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized>( RuleBreakIterator<'l, 's, Y>, ); -impl<'l, 's, Y: RuleBreakType<'l, 's>> SentenceBreakIterator<'l, 's, Y> { - pub fn state(&self) -> u8 { - self.0.boundary_property - } -} - derive_usize_iterator_with_type!(SentenceBreakIterator); /// Sentence break iterator for an `str` (a UTF-8 string). diff --git a/components/segmenter/tests/spec_test.rs b/components/segmenter/tests/spec_test.rs index 76e47839655..c8f8fba41fe 100644 --- a/components/segmenter/tests/spec_test.rs +++ b/components/segmenter/tests/spec_test.rs @@ -225,7 +225,6 @@ fn sentence_break_test(filename: &str) { let mut iter = segmenter.segment_str(&s); // TODO(egg): It would be really nice to have Name here. println!(" | A | E | Code pt. | Sentence_Break | State | Literal"); - let STATE_NAMES = ["Unknown", "CR", "LF", "Extend", "Sep", "Format", "Sp", "Lower", "Upper", "OLetter", "Numeric", "ATerm", "SContinue", "STerm", "Close", "ATerm_Close", "ATerm_Close_Sp", "STerm_Close", "STerm_Close_Sp", "Upper_ATerm", "Lower_ATerm", "ATerm_Close_Sp_SB8", "ATerm_Close_Sp_ParaSep", "ATerm_Close_Sp_CR", "STerm_Close_Sp_ParaSep", "STerm_Close_Sp_CR", "sot", "eot"]; for (i, c) in s.char_indices() { let expected_break = test.break_result_utf8.contains(&i); let actual_break = result.contains(&i); @@ -243,7 +242,7 @@ fn sentence_break_test(filename: &str) { sb_name .get(sb.get(c)) .unwrap_or(&format!("{:?}", sb.get(c))), - if actual_break { format!("{:02X} {}", iter.state(), STATE_NAMES[iter.state() as usize]) } else {"??".to_string()}, + "??".to_string(), c ) } diff --git a/provider/datagen/src/transform/segmenter/mod.rs b/provider/datagen/src/transform/segmenter/mod.rs index e1b9a121d1b..0d01faa5ca4 100644 --- a/provider/datagen/src/transform/segmenter/mod.rs +++ b/provider/datagen/src/transform/segmenter/mod.rs @@ -17,7 +17,6 @@ use icu_provider::datagen::IterableDataProvider; use icu_provider::prelude::*; use icu_segmenter::provider::*; use icu_segmenter::symbols::*; -use itertools::Itertools; use std::fmt::Debug; use zerovec::ZeroVec; @@ -470,8 +469,6 @@ impl crate::DatagenProvider { // sot and eot properties_names.push("sot".to_string()); properties_names.push("eot".to_string()); - println!("{:?}", properties_names); - println!("{}", properties_names.iter().enumerate().map(|(i, name)| format!("{:02X}={}", i, name)).join("\n")); let rule_size = properties_names.len() * properties_names.len(); let mut break_state_table = vec![UNKNOWN_RULE; rule_size];