Skip to content

Commit

Permalink
Remove traces
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Oct 24, 2023
1 parent e795a2a commit c97b87f
Show file tree
Hide file tree
Showing 5 changed files with 2 additions and 21 deletions.
2 changes: 1 addition & 1 deletion components/segmenter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
//! See [`SentenceSegmenter`] for more examples.
// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
//#![cfg_attr(not(any(test, feature = "std")), no_std)]
#![cfg_attr(not(any(test, feature = "std")), no_std)]
#![cfg_attr(
not(test),
deny(
Expand Down
9 changes: 0 additions & 9 deletions components/segmenter/src/rule_segmenter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<'
let left_prop = self.get_break_property(left_codepoint);
self.advance_iter();

let right_codepoint = self.get_current_codepoint().map_or("????".to_string(), |c| format!("U+{:02X}", c.into()));
let Some(right_prop) = self.get_current_break_property() else {
self.boundary_property = left_prop;
return Some(self.len);
Expand All @@ -127,19 +126,14 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<'

// If break_state is equals or grater than 0, it is alias of property.
let mut break_state = self.get_break_state_from_table(left_prop, right_prop);
let STATE_NAMES = ["Unknown", "CR", "LF", "Extend", "Sep", "Format", "Sp", "Lower", "Upper", "OLetter", "Numeric", "ATerm", "SContinue", "STerm", "Close", "ATerm_Close", "ATerm_Close_Sp", "STerm_Close", "STerm_Close_Sp", "Upper_ATerm", "Lower_ATerm", "ATerm_Close_Sp_SB8", "ATerm_Close_Sp_ParaSep", "ATerm_Close_Sp_CR", "STerm_Close_Sp_ParaSep", "STerm_Close_Sp_CR", "sot", "eot"];
println!("left={:02X} right={:02X} {} state={:02X}", left_prop, right_prop, right_codepoint, break_state);
println!("left={} right={} {}", STATE_NAMES[left_prop as usize], STATE_NAMES[right_prop as usize], right_codepoint);

if break_state >= 0 {
// This isn't simple rule set. We need marker to restore iterator to previous position.
let mut previous_iter = self.iter.clone();
let mut previous_pos_data = self.current_pos_data;
let mut previous_left_prop = left_prop;

println!("Inner loop");
if (break_state & INTERMEDIATE_MATCH_RULE) != 0 {
println!("going through intermediate match rule");
break_state -= INTERMEDIATE_MATCH_RULE;
}
loop {
Expand All @@ -163,8 +157,6 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<'

let previous_break_state = break_state;
break_state = self.get_break_state_from_table(break_state as u8, prop);
println!("> left={:02X} right={:02X} state={:02X}", previous_break_state, prop, break_state);
println!("> left={} right={}", STATE_NAMES[(previous_break_state & !INTERMEDIATE_MATCH_RULE) as usize], STATE_NAMES[prop as usize]);
if break_state < 0 {
break;
}
Expand All @@ -177,7 +169,6 @@ impl<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized> Iterator for RuleBreakIterator<'
previous_left_prop = break_state as u8;
}
if (break_state & INTERMEDIATE_MATCH_RULE) != 0 {
println!("going through intermediate match rule");
break_state -= INTERMEDIATE_MATCH_RULE;
previous_iter = self.iter.clone();
previous_pos_data = self.current_pos_data;
Expand Down
6 changes: 0 additions & 6 deletions components/segmenter/src/sentence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ pub struct SentenceBreakIterator<'l, 's, Y: RuleBreakType<'l, 's> + ?Sized>(
RuleBreakIterator<'l, 's, Y>,
);

impl<'l, 's, Y: RuleBreakType<'l, 's>> SentenceBreakIterator<'l, 's, Y> {
pub fn state(&self) -> u8 {
self.0.boundary_property
}
}

derive_usize_iterator_with_type!(SentenceBreakIterator);

/// Sentence break iterator for an `str` (a UTF-8 string).
Expand Down
3 changes: 1 addition & 2 deletions components/segmenter/tests/spec_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,6 @@ fn sentence_break_test(filename: &str) {
let mut iter = segmenter.segment_str(&s);
// TODO(egg): It would be really nice to have Name here.
println!(" | A | E | Code pt. | Sentence_Break | State | Literal");
let STATE_NAMES = ["Unknown", "CR", "LF", "Extend", "Sep", "Format", "Sp", "Lower", "Upper", "OLetter", "Numeric", "ATerm", "SContinue", "STerm", "Close", "ATerm_Close", "ATerm_Close_Sp", "STerm_Close", "STerm_Close_Sp", "Upper_ATerm", "Lower_ATerm", "ATerm_Close_Sp_SB8", "ATerm_Close_Sp_ParaSep", "ATerm_Close_Sp_CR", "STerm_Close_Sp_ParaSep", "STerm_Close_Sp_CR", "sot", "eot"];
for (i, c) in s.char_indices() {
let expected_break = test.break_result_utf8.contains(&i);
let actual_break = result.contains(&i);
Expand All @@ -243,7 +242,7 @@ fn sentence_break_test(filename: &str) {
sb_name
.get(sb.get(c))
.unwrap_or(&format!("{:?}", sb.get(c))),
if actual_break { format!("{:02X} {}", iter.state(), STATE_NAMES[iter.state() as usize]) } else {"??".to_string()},
"??".to_string(),
c
)
}
Expand Down
3 changes: 0 additions & 3 deletions provider/datagen/src/transform/segmenter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use icu_provider::datagen::IterableDataProvider;
use icu_provider::prelude::*;
use icu_segmenter::provider::*;
use icu_segmenter::symbols::*;
use itertools::Itertools;
use std::fmt::Debug;
use zerovec::ZeroVec;

Expand Down Expand Up @@ -470,8 +469,6 @@ impl crate::DatagenProvider {
// sot and eot
properties_names.push("sot".to_string());
properties_names.push("eot".to_string());
println!("{:?}", properties_names);
println!("{}", properties_names.iter().enumerate().map(|(i, name)| format!("{:02X}={}", i, name)).join("\n"));

let rule_size = properties_names.len() * properties_names.len();
let mut break_state_table = vec![UNKNOWN_RULE; rule_size];
Expand Down

0 comments on commit c97b87f

Please sign in to comment.