Skip to content

Commit

Permalink
provider
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Jun 13, 2024
1 parent 275f3f7 commit 29ee634
Show file tree
Hide file tree
Showing 821 changed files with 2,352 additions and 2,176 deletions.
34 changes: 27 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ members = [
# Provider
"provider/adapters",
"provider/baked",
"provider/bikeshed",
"provider/blob",
"provider/core",
"provider/core/macros",
Expand Down Expand Up @@ -150,6 +151,7 @@ icu_harfbuzz = { version = "~0.2.0", path = "ffi/harfbuzz", default-features = f

# Provider
icu_datagen = { version = "~1.5.0", path = "provider/datagen", default-features = false }
icu_datagen_bikeshed = { version = "~1.5.0", path = "provider/bikeshed", default-features = false }
icu_provider = { version = "~1.5.0", path = "provider/core", default-features = false }
icu_provider_macros = { version = "~1.5.0", path = "provider/core/macros", default-features = false }
icu_provider_adapters = { version = "~1.5.0", path = "provider/adapters", default-features = false }
Expand Down
2 changes: 2 additions & 0 deletions provider/bikeshed/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
data/** linguist-generated=true
tests/data/** linguist-generated=true
100 changes: 100 additions & 0 deletions provider/bikeshed/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# This file is part of ICU4X. For terms of use, please see the file
# called LICENSE at the top level of the ICU4X source tree
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "icu_datagen_bikeshed"
description = "A data provider based on CLDR and ICU data."
license = "Unicode-3.0"
include = [
"data/**/*",
"src/**/*",
"examples/**/*",
"benches/**/*",
"tests/**/*",
"!tests/data/json/**/*",
"!tests/data/postcard/**/*",
"Cargo.toml",
"LICENSE",
"README.md",
]

authors.workspace = true
categories.workspace = true
edition.workspace = true
homepage.workspace = true
repository.workspace = true
rust-version.workspace = true
version.workspace = true

[package.metadata.docs.rs]
all-features = true

[dependencies]

# ICU components
icu = { workspace = true, features = ["datagen"] }

# ICU infrastructure
calendrical_calculations = { workspace = true }
icu_codepointtrie_builder = { workspace = true }
icu_collections = { workspace = true, features = ["serde"] }
icu_pattern = { workspace = true, features = ["alloc"] }
icu_provider = { workspace = true, features = ["std", "logging", "datagen"]}
icu_provider_adapters = { workspace = true }
icu_registry = { workspace = true }
litemap = { workspace = true, features = ["serde"] }
tinystr = { workspace = true, features = ["alloc", "serde", "zerovec"] }
writeable = { workspace = true }
zerotrie = { workspace = true, features = ["alloc"] }
zerovec = { workspace = true, features = ["serde", "yoke"] }

# External dependencies
displaydoc = { workspace = true }
either = { workspace = true }
elsa = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
ndarray = { workspace = true }
serde = { workspace = true, features = ["derive", "alloc"] }
serde_json = { workspace = true }
serde-aux = { workspace = true }
toml = { workspace = true }
twox-hash = { workspace = true }
zip = { workspace = true, features = ["deflate"] }

# `networking` feature
ureq = { workspace = true, optional = true}

# `experimental` feature
icu_experimental = { workspace = true, features = ["datagen"], optional = true }
num-bigint = { workspace = true, optional = true }
num-rational = { workspace = true, optional = true }
num-traits = { workspace = true, optional = true }

[dev-dependencies]
postcard = { workspace = true, features = ["alloc"] }
icu_datagen = { workspace = true, features = ["experimental"] }
icu_provider = { workspace = true, features = ["deserialize_postcard_1"] }
icu_segmenter = { path = "../../components/segmenter", features = ["lstm"] }

[features]
default = ["use_wasm", "networking"]
# Use wasm for building codepointtries
use_wasm = ["icu_codepointtrie_builder/wasm"]
# Use local ICU4C libraries for building codepointtries
# (will do nothing if used with `use_wasm`)
# If neither `use_wasm` nor `use_icu4c` are enabled,
# rule based segmenter data will not be generated.
use_icu4c = ["icu_codepointtrie_builder/icu4c"]
networking = ["dep:ureq"]
experimental = [
"icu/experimental",
"dep:num-bigint",
"dep:num-rational",
"dep:num-traits",
]

[package.metadata.cargo-all-features]
# We don't need working CPT builders for check
skip_feature_sets = [["use_icu4c"], ["use_wasm"]]
46 changes: 46 additions & 0 deletions provider/bikeshed/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 2020-2024 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.

Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.

THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.

IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.

Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.

SPDX-License-Identifier: Unicode-3.0


Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
24 changes: 24 additions & 0 deletions provider/bikeshed/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use std::collections::HashSet;

use crate::provider::DatagenProvider;
use crate::DatagenProvider;
use calendrical_calculations::chinese_based::{Chinese, ChineseBased, Dangi};
use icu::calendar::provider::chinese_based::*;
use icu_provider::datagen::IterableDataProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use std::collections::HashSet;

use crate::provider::DatagenProvider;
use crate::DatagenProvider;
use calendrical_calculations::islamic::{
IslamicBasedMarker, ObservationalIslamicMarker, SaudiIslamicMarker,
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::provider::transform::cldr::cldr_serde;
use crate::provider::DatagenProvider;
use crate::cldr_serde;
use crate::DatagenProvider;
use icu::calendar::provider::*;
use icu::locale::langid;
use icu_provider::datagen::IterableDataProvider;
Expand All @@ -16,8 +16,7 @@ use std::sync::OnceLock;
use tinystr::tinystr;
use tinystr::TinyStr16;

const JAPANEXT_FILE: &str =
include_str!("../../../../data/japanese-golden/calendar/japanext@1/und.json");
const JAPANEXT_FILE: &str = include_str!("../../data/japanese-golden/calendar/japanext@1/und.json");

impl DatagenProvider {
fn load_japanese_eras(
Expand Down Expand Up @@ -102,7 +101,7 @@ impl DatagenProvider {
`ICU4X_SKIP_JAPANESE_INTEGRITY_CHECK=1 cargo run -p icu4x-datagen -- --markers calendar/japanext@1 --format dir --syntax json \
--out provider/datagen/data/japanese-golden --pretty --overwrite` in the icu4x repo and inspect the diff to \
check which situation it is. If a new era has been introduced, commit the diff, if not, it's likely that japanese.rs \
in icu_datagen will need to be updated to handle the data changes."
in icu_datagen_bikeshed will need to be updated to handle the data changes."
));
}
}
Expand Down Expand Up @@ -193,7 +192,7 @@ impl IterableDataProvider<JapaneseExtendedErasV1Marker> for DatagenProvider {
}

/// Computes the japanese era code map or loads from static cache
pub(in crate::provider) fn get_era_code_map() -> &'static BTreeMap<String, TinyStr16> {
pub(crate) fn get_era_code_map() -> &'static BTreeMap<String, TinyStr16> {
static MAP: OnceLock<BTreeMap<String, TinyStr16>> = OnceLock::new();

MAP.get_or_init(|| {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

/// Data for calendar arithmetic
pub(in crate::provider) mod japanese;
pub(crate) mod japanese;

/// Cached data for chinese-based calendars
pub(in crate::provider) mod chinese_based;
pub(crate) mod chinese_based;

/// Cached data for islamic calendars
pub(in crate::provider) mod islamic;
pub(crate) mod islamic;
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
use std::collections::HashSet;
use std::marker::PhantomData;

use crate::provider::transform::cldr::cldr_serde;
use crate::provider::DatagenProvider;
use crate::provider::IterableDataProviderCached;
use crate::cldr_serde;
use crate::DatagenProvider;
use crate::IterableDataProviderCached;
use icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList;
use icu::properties::provider::*;
use icu_provider::prelude::*;
Expand Down
Loading

0 comments on commit 29ee634

Please sign in to comment.