Skip to content

Commit

Permalink
binary crate
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Jun 5, 2024
1 parent 0f3dfc8 commit b5fc9d8
Show file tree
Hide file tree
Showing 20 changed files with 179 additions and 122 deletions.
17 changes: 15 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ members = [
"provider/datagen",
"provider/fs",
"provider/registry",
"provider/icu4x-datagen",

# Baked data
"provider/data/calendar",
Expand Down
2 changes: 1 addition & 1 deletion components/datetime/tests/data/gen.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/sh
cargo run -p icu_datagen -- \
cargo run -p icu4x-datagen -- \
--markers "datetime/gregory/datelengths@1" "datetime/gregory/datesymbols@1" "datetime/timelengths@1" "datetime/timesymbols@1" "decimal/symbols@1" "time_zone/formats@1" "time_zone/specific_short@1" \
--locales en \
--format blob2 \
Expand Down
2 changes: 1 addition & 1 deletion components/experimental/tests/transliterate/data/gen.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/sh
cargo run -p icu_datagen --features experimental_components -- \
cargo run -p icu4x-datagen --features experimental_components -- \
--markers "transliterator/rules@1" "normalizer/comp@1" "normalizer/decomp@1" "normalizer/nfd@1" "normalizer/nfdex@1" "normalizer/nfkd@1" "normalizer/nfkdex@1" "normalizer/uts46d@1" \
--locales full \
--runtime-fallback-location external \
Expand Down
31 changes: 1 addition & 30 deletions provider/datagen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ include = [
"LICENSE",
"README.md",
]
default-run = "icu4x-datagen"

authors.workspace = true
categories.workspace = true
Expand Down Expand Up @@ -93,11 +92,6 @@ twox-hash = { workspace = true, optional = true }
ureq = { workspace = true, optional = true }
zip = { workspace = true, features = ["deflate"], optional = true }

# "bin" feature
clap = { workspace = true, optional = true, features = ["derive"] }
eyre = { workspace = true, optional = true }
simple_logger = { workspace = true, optional = true }

[dev-dependencies]
crlify = { path = "../../utils/crlify" }
elsa = { workspace = true }
Expand All @@ -106,7 +100,7 @@ postcard = { workspace = true }
simple_logger = { workspace = true }

[features]
default = ["bin", "use_wasm", "networking", "rayon", "fs_exporter", "blob_exporter", "baked_exporter", "provider"]
default = ["use_wasm", "networking", "rayon", "fs_exporter", "blob_exporter", "baked_exporter", "provider"]
provider = [
"icu_calendar/datagen",
"icu_casemap/datagen",
Expand Down Expand Up @@ -145,24 +139,7 @@ provider = [
]
baked_exporter = ["dep:icu_provider_baked"]
blob_exporter = ["dep:icu_provider_blob"]
blob_input = [
"dep:icu_provider_blob",
"icu_calendar/datagen",
"icu_casemap/datagen",
"icu_collator/datagen",
"icu_datetime/datagen",
"icu_decimal/datagen",
"icu_list/datagen",
"icu_locale/datagen",
"icu_normalizer/datagen",
"icu_plurals/datagen",
"icu_properties/datagen",
"icu_segmenter/datagen",
"icu_timezone/datagen",
"icu_experimental?/datagen",
]
fs_exporter = ["dep:icu_provider_fs"]
bin = ["dep:clap", "dep:eyre", "dep:simple_logger"]
rayon = ["dep:rayon"]
# Use wasm for building codepointtries
use_wasm = ["icu_codepointtrie_builder?/wasm"]
Expand All @@ -184,12 +161,6 @@ experimental_components = [
"dep:num-traits",
"icu/experimental",
]

[[bin]]
name = "icu4x-datagen"
path = "src/bin/icu4x-datagen.rs"
required-features = ["bin"]

[package.metadata.cargo-all-features]
# We don't need working CPT builders for check
skip_feature_sets = [["use_icu4c"], ["use_wasm"]]
Expand Down
28 changes: 2 additions & 26 deletions provider/datagen/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 2 additions & 46 deletions provider/datagen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,12 @@
#![allow(clippy::needless_doctest_main)]
//! `icu_datagen` is a library to generate data files that can be used in ICU4X data providers.
//!
//! Data files can be generated either programmatically (i.e. in `build.rs`), or through a
//! command-line utility.
//!
//! For command-line usage, see the [`icu4x-datagen` binary](https://crates.io/crate/icu4x-datagen).
//!
//! Also see our [datagen tutorial](https://github.com/unicode-org/icu4x/blob/main/tutorials/data_management.md).
//!
//! # Examples
//!
//! ## Rust API
//!
//! ```no_run
//! use icu_datagen::blob_exporter::*;
//! use icu_datagen::prelude::*;
Expand All @@ -32,22 +28,6 @@
//! .unwrap();
//! ```
//!
//! ## Command line
//!
//! The command line interface can be installed through Cargo.
//!
//! ```bash
//! $ cargo install icu_datagen
//! ```
//!
//! Once the tool is installed, you can invoke it like this:
//!
//! ```bash
//! $ icu4x-datagen --markers all --locales de en-AU --format blob --out data.postcard
//! ```
//!
//! More details can be found by running `--help`.
//!
//! # Cargo features
//!
//! This crate has a lot of dependencies, some of which are not required for all operating modes. These default Cargo features
Expand All @@ -68,13 +48,9 @@
//! * enables parallelism during export
//! * `use_wasm` / `use_icu4c`
//! * see the documentation on [`icu_codepointtrie_builder`](icu_codepointtrie_builder#build-configuration)
//! * `bin`
//! * required by the CLI and enabled by default to make `cargo install` work
//! * `icu_experimental`
//! * `experimental_components`
//! * enables data generation for markers defined in the unstable `icu_experimental` crate
//! * note that this features affects the behaviour of `all_markers`
//!
//! The meta-feature `experimental_components` is available to activate all experimental components.
#![cfg_attr(
not(test),
Expand Down Expand Up @@ -212,26 +188,6 @@ macro_rules! cb {
Some(Ok(marker)) => Some(marker)
}
}

#[macro_export]
#[doc(hidden)] // macro
macro_rules! make_exportable_provider {
($ty:ty) => {
icu_provider::make_exportable_provider!(
$ty,
[
icu_provider::hello_world::HelloWorldV1Marker,
$(
$marker,
)+
$(
#[cfg(feature = "experimental_components")]
$emarker,
)+
]
);
}
}
}
}
icu_registry::registry!(cb);
Expand Down
2 changes: 1 addition & 1 deletion provider/datagen/src/transform/cldr/calendar/japanese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ impl DatagenProvider {
return Err(DataError::custom(
"Era data has changed! This can be for two reasons: Either the CLDR locale data for Japanese eras has \
changed in an incompatible way, or there is a new Japanese era. Run \
`ICU4X_SKIP_JAPANESE_INTEGRITY_CHECK=1 cargo run -p icu_datagen -- --markers calendar/japanext@1 --format dir --syntax json \
`ICU4X_SKIP_JAPANESE_INTEGRITY_CHECK=1 cargo run -p icu4x-datagen -- --markers calendar/japanext@1 --format dir --syntax json \
--out provider/datagen/data/japanese-golden --pretty --overwrite` in the icu4x repo and inspect the diff to \
check which situation it is. If a new era has been introduced, commit the diff, if not, it's likely that japanese.rs \
in icu_datagen will need to be updated to handle the data changes."
Expand Down
46 changes: 46 additions & 0 deletions provider/icu4x-datagen/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# This file is part of ICU4X. For terms of use, please see the file
# called LICENSE at the top level of the ICU4X source tree
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "icu4x-datagen"
description = "Generate data for ICU4X DataProvider"
license = "Unicode-3.0"

authors.workspace = true
categories.workspace = true
edition.workspace = true
homepage.workspace = true
include.workspace = true
repository.workspace = true
rust-version.workspace = true
version.workspace = true

[dependencies]
icu_provider = { workspace = true, features = ["datagen"] }
icu = { workspace = true }
icu_datagen = { workspace = true, features = ["rayon"] }
icu_provider_blob = { workspace = true, optional = true }
icu_registry = { workspace = true, optional = true }

clap = { workspace = true, features = ["derive"] }
eyre = { workspace = true }
log = { workspace = true }
simple_logger = { workspace = true }

[features]
default = ["use_wasm", "networking", "fs_exporter", "blob_exporter", "baked_exporter", "provider"]
provider = ["icu_datagen/provider"]
baked_exporter = ["icu_datagen/baked_exporter"]
blob_exporter = ["icu_datagen/blob_exporter"]
blob_input = ["dep:icu_provider_blob", "dep:icu_registry"]
fs_exporter = ["icu_datagen/fs_exporter"]
# Use wasm for building codepointtries
use_wasm = ["icu_datagen/use_wasm"]
# Use local ICU4C libraries for building codepointtries
# (will do nothing if used with `use_wasm`)
# If neither `use_wasm` nor `use_icu4c` are enabled,
# rule based segmenter data will not be generated.
use_icu4c = ["icu_datagen/use_icu4c"]
networking = ["icu_datagen/networking"]
experimental_components = ["icu_datagen/experimental_components", "icu/experimental"]
46 changes: 46 additions & 0 deletions provider/icu4x-datagen/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 2020-2024 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.

Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.

THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.

IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.

Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.

SPDX-License-Identifier: Unicode-3.0


Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
18 changes: 18 additions & 0 deletions provider/icu4x-datagen/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b5fc9d8

Please sign in to comment.