Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch locid Value to use Subtag #4941

Merged
merged 1 commit into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/calendar/src/any_calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ impl AnyCalendarKind {
/// Returns `None` if the calendar is unknown. If you prefer an error, use
/// [`CalendarError::unknown_any_calendar_kind`].
pub fn get_for_bcp47_value(x: &Value) -> Option<Self> {
match *x.as_tinystr_slice() {
match x.as_subtags_slice() {
[first] if first == "buddhist" => Some(AnyCalendarKind::Buddhist),
[first] if first == "chinese" => Some(AnyCalendarKind::Chinese),
[first] if first == "coptic" => Some(AnyCalendarKind::Coptic),
Expand Down
18 changes: 10 additions & 8 deletions components/datetime/src/calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ use icu_calendar::{
islamic::IslamicUmmAlQura, japanese::Japanese, japanese::JapaneseExtended, persian::Persian,
Gregorian,
};
use icu_locale_core::extensions::unicode::{value, Value};
use icu_locale_core::{
extensions::unicode::{value, Value},
subtags::{subtag, Subtag},
};
use icu_provider::prelude::*;
use tinystr::{tinystr, TinyAsciiStr};

#[cfg(any(feature = "datagen", feature = "experimental"))]
use crate::provider::neo::*;
Expand Down Expand Up @@ -84,9 +86,9 @@ pub trait CldrCalendar: InternalCldrCalendar {
}

/// Check if the provided value is of the form `islamic-{subcal}`
fn is_islamic_subcal(value: &Value, subcal: TinyAsciiStr<8>) -> bool {
if let &[first, second] = value.as_tinystr_slice() {
first == "islamic" && second == subcal
fn is_islamic_subcal(value: &Value, subcal: Subtag) -> bool {
if let &[first, second] = value.as_subtags_slice() {
first == *"islamic" && second == subcal
} else {
false
}
Expand Down Expand Up @@ -243,7 +245,7 @@ impl CldrCalendar for IslamicCivil {
#[cfg(any(feature = "datagen", feature = "experimental"))]
type SkeletaV1Marker = IslamicDateNeoSkeletonPatternsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
*value == value!("islamicc") || is_islamic_subcal(value, tinystr!(8, "civil"))
*value == value!("islamicc") || is_islamic_subcal(value, subtag!("civil"))
}
}

Expand Down Expand Up @@ -277,7 +279,7 @@ impl CldrCalendar for IslamicTabular {
#[cfg(any(feature = "datagen", feature = "experimental"))]
type SkeletaV1Marker = IslamicDateNeoSkeletonPatternsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
is_islamic_subcal(value, tinystr!(8, "tbla"))
is_islamic_subcal(value, subtag!("tbla"))
}
}

Expand All @@ -297,7 +299,7 @@ impl CldrCalendar for IslamicUmmAlQura {
#[cfg(any(feature = "datagen", feature = "experimental"))]
type SkeletaV1Marker = IslamicDateNeoSkeletonPatternsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
is_islamic_subcal(value, tinystr!(8, "umalqura"))
is_islamic_subcal(value, subtag!("umalqura"))
}
}

Expand Down
15 changes: 8 additions & 7 deletions components/datetime/src/options/preferences.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ use crate::fields;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use icu_locale_core::extensions::unicode::key;
use icu_locale_core::{
extensions::unicode::key,
subtags::{subtag, Subtag},
};
use icu_provider::DataLocale;
use tinystr::tinystr;
use tinystr::TinyAsciiStr;

/// Stores user preferences which may affect the result of date and time formatting.
///
Expand Down Expand Up @@ -81,10 +82,10 @@ impl Bag {

/// Construct a [`Bag`] from a given [`DataLocale`]
pub(crate) fn from_data_locale(data_locale: &DataLocale) -> Self {
const H11: TinyAsciiStr<8> = tinystr!(8, "h11");
const H12: TinyAsciiStr<8> = tinystr!(8, "h12");
const H23: TinyAsciiStr<8> = tinystr!(8, "h23");
const H24: TinyAsciiStr<8> = tinystr!(8, "h24");
const H11: Subtag = subtag!("h11");
const H12: Subtag = subtag!("h12");
const H23: Subtag = subtag!("h23");
const H24: Subtag = subtag!("h24");
let hour_cycle = match data_locale
.get_unicode_ext(&key!("hc"))
.and_then(|v| v.as_single_subtag().copied())
Expand Down
8 changes: 5 additions & 3 deletions components/datetime/src/provider/date_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,10 @@ where
fn skeleton_data_payload(
&self,
) -> Result<DataPayload<DateSkeletonPatternsV1Marker>, DataError> {
use icu_locale_core::extensions::unicode::{key, value};
use tinystr::tinystr;
use icu_locale_core::{
extensions::unicode::{key, value},
subtags::subtag,
};
let mut locale = self.locale.clone();
#[allow(clippy::expect_used)] // experimental
let cal_val = self.cal_val.expect("should be present for components bag");
Expand All @@ -356,7 +358,7 @@ where
locale.set_unicode_ext(key!("ca"), value!("ethiopic"));
} else if cal_val == &value!("islamic")
|| cal_val == &value!("islamicc")
|| cal_val.as_tinystr_slice().first() == Some(&tinystr!(8, "islamic"))
|| cal_val.as_subtags_slice().first() == Some(&subtag!("islamic"))
{
// All islamic calendars store skeleton data under islamic, not their individual extension keys
locale.set_unicode_ext(key!("ca"), value!("islamic"));
Expand Down
4 changes: 2 additions & 2 deletions components/locale/src/canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,12 @@ impl LocaleCanonicalizer {
if !extensions.unicode.keywords.is_empty() {
for key in [key!("rg"), key!("sd")] {
if let Some(value) = extensions.unicode.keywords.get_mut(&key) {
if let &[only_value] = value.as_tinystr_slice() {
if let Some(only_value) = value.as_single_subtag() {
if let Some(modified_value) = self
.aliases
.get()
.subdivision
.get(&only_value.resize().to_unvalidated())
.get(&only_value.into_tinystr().resize().to_unvalidated())
{
if let Ok(modified_value) = modified_value.parse() {
*value = modified_value;
Expand Down
39 changes: 24 additions & 15 deletions components/locale_core/src/extensions/transform/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

use crate::parser::{ParserError, SubtagIterator};
use crate::shortvec::ShortBoxSlice;
use crate::subtags::{subtag, Subtag};
use core::ops::RangeInclusive;
use core::str::FromStr;
use tinystr::TinyAsciiStr;

/// A value used in a list of [`Fields`](super::Fields).
///
Expand All @@ -27,10 +27,10 @@ use tinystr::TinyAsciiStr;
/// "no".parse::<Value>().expect_err("Invalid Value.");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)]
pub struct Value(ShortBoxSlice<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>);
pub struct Value(ShortBoxSlice<Subtag>);

const TYPE_LENGTH: RangeInclusive<usize> = 3..=8;
const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
const TRUE_TVALUE: Subtag = subtag!("true");

impl Value {
/// A constructor which takes a utf8 slice, parses it and
Expand All @@ -52,8 +52,7 @@ impl Value {
return Err(ParserError::InvalidExtension);
}
has_value = true;
let val =
TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?;
let val = Subtag::try_from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?;
if val != TRUE_TVALUE {
v.push(val);
}
Expand All @@ -65,23 +64,19 @@ impl Value {
Ok(Self(v))
}

pub(crate) fn from_short_slice_unchecked(
input: ShortBoxSlice<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>,
) -> Self {
pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice<Subtag>) -> Self {
Self(input)
}

pub(crate) fn is_type_subtag(t: &[u8]) -> bool {
TYPE_LENGTH.contains(&t.len()) && t.iter().all(u8::is_ascii_alphanumeric)
}

pub(crate) fn parse_subtag(
t: &[u8],
) -> Result<Option<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>, ParserError> {
let s = TinyAsciiStr::from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?;
if !TYPE_LENGTH.contains(&t.len()) || !s.is_ascii_alphanumeric() {
pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<Subtag>, ParserError> {
if !TYPE_LENGTH.contains(&t.len()) {
return Err(ParserError::InvalidExtension);
}
let s = Subtag::try_from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?;

let s = s.to_ascii_lowercase();

Expand All @@ -97,9 +92,9 @@ impl Value {
F: FnMut(&str) -> Result<(), E>,
{
if self.0.is_empty() {
f("true")?;
f(TRUE_TVALUE.as_str())?;
} else {
self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)?;
self.0.iter().map(Subtag::as_str).try_for_each(f)?;
}
Ok(())
}
Expand Down Expand Up @@ -132,3 +127,17 @@ fn test_writeable() {
"hybrid-foobar"
);
}

#[test]
fn test_short_tvalue() {
let value = Value::from_str("foo-longstag");
assert!(value.is_ok());
let value = value.unwrap();
assert_eq!(value.0.len(), 2);
for (s, reference) in value.0.iter().zip(&[subtag!("foo"), subtag!("longstag")]) {
assert_eq!(s, reference);
}

let value = Value::from_str("foo-ba");
assert!(value.is_err());
}
91 changes: 28 additions & 63 deletions components/locale_core/src/extensions/unicode/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@

use crate::parser::{ParserError, SubtagIterator};
use crate::shortvec::ShortBoxSlice;
use crate::subtags::{subtag, Subtag};
use alloc::vec::Vec;
use core::ops::RangeInclusive;
use core::str::FromStr;
use tinystr::TinyAsciiStr;

/// A value used in a list of [`Keywords`](super::Keywords).
///
Expand All @@ -33,10 +32,9 @@ use tinystr::TinyAsciiStr;
/// assert_eq!(value!("true").to_string(), "");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)]
pub struct Value(ShortBoxSlice<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);
pub struct Value(ShortBoxSlice<Subtag>);

const VALUE_LENGTH: RangeInclusive<usize> = 3..=8;
const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
const TRUE_VALUE: Subtag = subtag!("true");

impl Value {
/// A constructor which takes a utf8 slice, parses it and
Expand All @@ -53,53 +51,31 @@ impl Value {
let mut v = ShortBoxSlice::new();

if !input.is_empty() {
for subtag in SubtagIterator::new(input) {
let val = Self::subtag_from_bytes(subtag)?;
if let Some(val) = val {
v.push(val);
for chunk in SubtagIterator::new(input) {
let subtag = Subtag::try_from_bytes(chunk)?;
if subtag != TRUE_VALUE {
v.push(subtag);
}
}
}
Ok(Self(v))
}

/// Const constructor for when the value contains only a single subtag.
///
/// # Examples
///
/// ```
/// use icu::locale::extensions::unicode::Value;
///
/// Value::try_from_single_subtag(b"buddhist").expect("valid subtag");
/// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag");
/// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag");
/// ```
pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> {
match Self::subtag_from_bytes(subtag) {
Err(_) => Err(ParserError::InvalidExtension),
Ok(option) => Ok(Self::from_tinystr(option)),
}
}

#[doc(hidden)]
pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] {
&self.0
pub const fn as_single_subtag(&self) -> Option<&Subtag> {
self.0.single()
}

#[doc(hidden)]
pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> {
self.0.single()
pub fn as_subtags_slice(&self) -> &[Subtag] {
&self.0
}

#[doc(hidden)]
pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self {
pub const fn from_subtag(subtag: Option<Subtag>) -> Self {
match subtag {
None => Self(ShortBoxSlice::new()),
Some(val) => {
debug_assert!(val.is_ascii_alphanumeric());
debug_assert!(!matches!(val, TRUE_VALUE));
Self(ShortBoxSlice::new_single(val))
}
None | Some(TRUE_VALUE) => Self(ShortBoxSlice::new()),
Some(val) => Self(ShortBoxSlice::new_single(val)),
}
}

Expand All @@ -110,11 +86,11 @@ impl Value {
///
/// ```
/// use icu::locale::extensions::unicode::Value;
/// use tinystr::{TinyAsciiStr, tinystr};
/// use icu::locale::subtags::subtag;
///
/// let tinystr1: TinyAsciiStr<8> = tinystr!(8, "foobar");
/// let tinystr2: TinyAsciiStr<8> = tinystr!(8, "testing");
/// let mut v = vec![tinystr1, tinystr2];
/// let subtag1 = subtag!("foobar");
/// let subtag2 = subtag!("testing");
/// let mut v = vec![subtag1, subtag2];
/// v.sort();
/// v.dedup();
///
Expand All @@ -124,37 +100,26 @@ impl Value {
/// Notice: For performance- and memory-constrained environments, it is recommended
/// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort)
/// and [`dedup`](Vec::dedup()).
pub fn from_vec_unchecked(input: Vec<TinyAsciiStr<8>>) -> Self {
pub fn from_vec_unchecked(input: Vec<Subtag>) -> Self {
Self(input.into())
}

pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice<TinyAsciiStr<8>>) -> Self {
pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice<Subtag>) -> Self {
Self(input)
}

#[doc(hidden)]
pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len())
}

pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<Subtag>, ParserError> {
Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len())
}

pub(crate) const fn parse_subtag_from_bytes_manual_slice(
bytes: &[u8],
start: usize,
end: usize,
) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
let slice_len = end - start;
if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() {
return Err(ParserError::InvalidExtension);
}

match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
) -> Result<Option<Subtag>, ParserError> {
match Subtag::try_from_bytes_manual_slice(bytes, start, end) {
Ok(TRUE_VALUE) => Ok(None),
Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())),
Ok(_) => Err(ParserError::InvalidExtension),
Ok(s) => Ok(Some(s)),
Err(_) => Err(ParserError::InvalidSubtag),
}
}
Expand All @@ -163,7 +128,7 @@ impl Value {
where
F: FnMut(&str) -> Result<(), E>,
{
self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)
self.0.iter().map(Subtag::as_str).try_for_each(f)
}
}

Expand Down Expand Up @@ -209,9 +174,9 @@ macro_rules! extensions_unicode_value {
// };
// Workaround until https://github.com/rust-lang/rust/issues/73255 lands:
const R: $crate::extensions::unicode::Value =
$crate::extensions::unicode::Value::from_tinystr(
match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) {
Ok(r) => r,
$crate::extensions::unicode::Value::from_subtag(
match $crate::subtags::Subtag::try_from_bytes($value.as_bytes()) {
Ok(r) => Some(r),
_ => panic!(concat!("Invalid Unicode extension value: ", $value)),
},
);
Expand Down
Loading
Loading