From c84b330a30c0b6b8b71ec5d2bffab05928a715d0 Mon Sep 17 00:00:00 2001
From: Mike Mahoney
Date: Fri, 29 Oct 2021 11:09:51 -0400
Subject: [PATCH 1/4] Add 'nrounds' as an alias for 'num_iterations'
---
R-package/R/aliases.R | 1 +
R-package/tests/testthat/test_basic.R | 54 +++++++++++++++++++++++++++
docs/Parameters.rst | 2 +-
include/LightGBM/config.h | 2 +-
python-package/lightgbm/basic.py | 1 +
src/io/config_auto.cpp | 1 +
6 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R
index 5fcba4f46fe7..7cd3245727af 100644
--- a/R-package/R/aliases.R
+++ b/R-package/R/aliases.R
@@ -113,6 +113,7 @@
, "num_trees"
, "num_round"
, "num_rounds"
+ , "nrounds"
, "num_boost_round"
, "n_estimators"
, "max_iter"
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 5aff8e2c2d2c..cc717063d253 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -188,6 +188,60 @@ test_that("lightgbm() rejects negative or 0 value passed to nrounds", {
}
})
+test_that("lightgbm() accepts nrounds as either a top-level argument or parameter", {
+ nrounds <- 15L
+
+ set.seed(708L)
+ top_level_bst <- lightgbm(
+ data = train$data
+ , label = train$label
+ , nrounds = nrounds
+ , params = list(
+ objective = "regression"
+ , metric = "l2"
+ , num_leaves = 5L
+ )
+ , save_name = tempfile(fileext = ".model")
+ )
+
+ set.seed(708L)
+ param_bst <- lightgbm(
+ data = train$data
+ , label = train$label
+ , params = list(
+ objective = "regression"
+ , metric = "l2"
+ , num_leaves = 5L
+ , nrounds = nrounds
+ )
+ , save_name = tempfile(fileext = ".model")
+ )
+
+ set.seed(708L)
+ both_customized <- lightgbm(
+ data = train$data
+ , label = train$label
+ , nrounds = 20L
+ , params = list(
+ objective = "regression"
+ , metric = "l2"
+ , num_leaves = 5L
+ , nrounds = nrounds
+ )
+ , save_name = tempfile(fileext = ".model")
+ )
+
+ expect_equal(param_bst$current_iter(), top_level_bst$current_iter())
+ expect_equal(param_bst$best_score
+ , top_level_bst$best_score
+ , tolerance = TOLERANCE)
+
+ expect_equal(param_bst$current_iter(), both_customized$current_iter())
+ expect_equal(param_bst$best_score
+ , both_customized$best_score
+ , tolerance = TOLERANCE)
+})
+
test_that("lightgbm() performs evaluation on validation sets if they are provided", {
set.seed(708L)
dvalid1 <- lgb.Dataset(
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 5faa9af9fd31..8a37bbf90dc7 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -153,7 +153,7 @@ Core Parameters
- **Note**: can be used only in CLI version
-- ``num_iterations`` :raw-html:`🔗︎`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, ``max_iter``, constraints: ``num_iterations >= 0``
+- ``num_iterations`` :raw-html:`🔗︎`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``nrounds``, ``num_boost_round``, ``n_estimators``, ``max_iter``, constraints: ``num_iterations >= 0``
- number of boosting iterations
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 45fffa432819..e167f73284a6 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -161,7 +161,7 @@ struct Config {
// desc = **Note**: can be used only in CLI version
std::vector valid;
- // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators, max_iter
+ // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, nrounds, num_boost_round, n_estimators, max_iter
// check = >=0
// desc = number of boosting iterations
// desc = **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 83a4b5c071da..eb7d4a16b813 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -386,6 +386,7 @@ class _ConfigAliases:
"num_trees",
"num_round",
"num_rounds",
+ "nrounds",
"num_boost_round",
"n_estimators",
"max_iter"},
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 4e3f000a88f5..cd24790b820c 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -33,6 +33,7 @@ const std::unordered_map& Config::alias_table() {
{"num_trees", "num_iterations"},
{"num_round", "num_iterations"},
{"num_rounds", "num_iterations"},
+ {"nrounds", "num_iterations"},
{"num_boost_round", "num_iterations"},
{"n_estimators", "num_iterations"},
{"max_iter", "num_iterations"},
From d47296c4a7c80dc2723a9816b4787ab9af130b96 Mon Sep 17 00:00:00 2001
From: Mike Mahoney
Date: Fri, 29 Oct 2021 17:06:02 -0400
Subject: [PATCH 2/4] Improve tests
---
R-package/tests/testthat/test_basic.R | 94 ++++++++++++++++++++++++---
1 file changed, 85 insertions(+), 9 deletions(-)
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index cc717063d253..f2ae5c549462 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -230,16 +230,23 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete
)
, save_name = tempfile(fileext = ".model")
)
+
+ top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
+ param_l2 <- param_bst$eval_train()[[1L]][["value"]]
+ both_l2 <- both_customized$eval_train()[[1L]][["value"]]
+
+ # check type just to be sure the subsetting didn't return a NULL
+ expect_true(is.numeric(top_level_l2))
+ expect_true(is.numeric(param_l2))
+ expect_true(is.numeric(both_l2))
+
+ # check that model produces identical performance
+ expect_identical(top_level_l2, param_l2)
+ expect_identical(both_l2, param_l2)
+
+ expect_identical(param_bst$current_iter(), top_level_bst$current_iter())
+ expect_identical(param_bst$current_iter(), both_customized$current_iter())
- expect_equal(param_bst$current_iter(), top_level_bst$current_iter())
- expect_equal(param_bst$best_score
- , top_level_bst$best_score
- , tolerance = TOLERANCE)
-
- expect_equal(param_bst$current_iter(), both_customized$current_iter())
- expect_equal(param_bst$best_score
- , both_customized$best_score
- , tolerance = TOLERANCE)
})
test_that("lightgbm() performs evaluation on validation sets if they are provided", {
@@ -521,6 +528,75 @@ test_that("lgb.train() rejects negative or 0 value passed to nrounds", {
}
})
+
+test_that("lgb.train() accepts nrounds as either a top-level argument or parameter", {
+ nrounds <- 15L
+
+ set.seed(708L)
+ top_level_bst <- lgb.train(
+ data = lgb.Dataset(
+ train$data
+ , label = train$label
+ )
+ , nrounds = nrounds
+ , params = list(
+ objective = "regression"
+ , metric = "l2"
+ , num_leaves = 5L
+ , save_name = tempfile(fileext = ".model")
+ )
+ )
+
+ set.seed(708L)
+ param_bst <- lgb.train(
+ data = lgb.Dataset(
+ train$data
+ , label = train$label
+ )
+ , params = list(
+ objective = "regression"
+ , metric = "l2"
+ , num_leaves = 5L
+ , nrounds = nrounds
+ , save_name = tempfile(fileext = ".model")
+ )
+ )
+
+ set.seed(708L)
+ both_customized <- lgb.train(
+ data = lgb.Dataset(
+ train$data
+ , label = train$label
+ )
+ , nrounds = 20L
+ , params = list(
+ objective = "regression"
+ , metric = "l2"
+ , num_leaves = 5L
+ , nrounds = nrounds
+ , save_name = tempfile(fileext = ".model")
+ )
+ )
+
+ top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
+ params_l2 <- param_bst$eval_train()[[1L]][["value"]]
+ both_l2 <- both_customized$eval_train()[[1L]][["value"]]
+
+ # check type just to be sure the subsetting didn't return a NULL
+ expect_true(is.numeric(top_level_l2))
+ expect_true(is.numeric(params_l2))
+ expect_true(is.numeric(both_l2))
+
+ # check that model produces identical performance
+ expect_identical(top_level_l2, params_l2)
+ expect_identical(both_l2, params_l2)
+
+ expect_identical(param_bst$current_iter(), top_level_bst$current_iter())
+ expect_identical(param_bst$current_iter(), both_customized$current_iter())
+
+})
+
+
test_that("lgb.train() throws an informative error if 'data' is not an lgb.Dataset", {
bad_values <- list(
4L
From 302871686da4d25f91144db75f397c0d482003f2 Mon Sep 17 00:00:00 2001
From: Mike Mahoney
Date: Fri, 29 Oct 2021 17:11:51 -0400
Subject: [PATCH 3/4] Compare against nrounds directly
---
R-package/tests/testthat/test_basic.R | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index f2ae5c549462..62acb8eadaf6 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -232,20 +232,21 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete
)
top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
- param_l2 <- param_bst$eval_train()[[1L]][["value"]]
+ params_l2 <- param_bst$eval_train()[[1L]][["value"]]
both_l2 <- both_customized$eval_train()[[1L]][["value"]]
# check type just to be sure the subsetting didn't return a NULL
expect_true(is.numeric(top_level_l2))
- expect_true(is.numeric(param_l2))
+ expect_true(is.numeric(params_l2))
expect_true(is.numeric(both_l2))
# check that model produces identical performance
- expect_identical(top_level_l2, param_l2)
- expect_identical(both_l2, param_l2)
+ expect_identical(top_level_l2, params_l2)
+ expect_identical(both_l2, params_l2)
expect_identical(param_bst$current_iter(), top_level_bst$current_iter())
expect_identical(param_bst$current_iter(), both_customized$current_iter())
+ expect_identical(param_bst$current_iter(), nrounds)
})
@@ -593,6 +594,7 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet
expect_identical(param_bst$current_iter(), top_level_bst$current_iter())
expect_identical(param_bst$current_iter(), both_customized$current_iter())
+ expect_identical(param_bst$current_iter(), nrounds)
})
From 00704025b47988372cee11b10bef2fa0c1be650d Mon Sep 17 00:00:00 2001
From: Mike Mahoney
Date: Wed, 10 Nov 2021 08:26:29 -0500
Subject: [PATCH 4/4] Fix whitespace lints
---
R-package/tests/testthat/test_basic.R | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 62acb8eadaf6..92b11650c3e2 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -230,7 +230,7 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete
)
, save_name = tempfile(fileext = ".model")
)
-
+
top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
params_l2 <- param_bst$eval_train()[[1L]][["value"]]
both_l2 <- both_customized$eval_train()[[1L]][["value"]]
@@ -239,7 +239,7 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete
expect_true(is.numeric(top_level_l2))
expect_true(is.numeric(params_l2))
expect_true(is.numeric(both_l2))
-
+
# check that model produces identical performance
expect_identical(top_level_l2, params_l2)
expect_identical(both_l2, params_l2)
@@ -532,7 +532,7 @@ test_that("lgb.train() rejects negative or 0 value passed to nrounds", {
test_that("lgb.train() accepts nrounds as either a top-level argument or parameter", {
nrounds <- 15L
-
+
set.seed(708L)
top_level_bst <- lgb.train(
data = lgb.Dataset(
@@ -547,7 +547,7 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet
, save_name = tempfile(fileext = ".model")
)
)
-
+
set.seed(708L)
param_bst <- lgb.train(
data = lgb.Dataset(
@@ -562,7 +562,7 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet
, save_name = tempfile(fileext = ".model")
)
)
-
+
set.seed(708L)
both_customized <- lgb.train(
data = lgb.Dataset(
@@ -578,24 +578,24 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet
, save_name = tempfile(fileext = ".model")
)
)
-
+
top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]]
params_l2 <- param_bst$eval_train()[[1L]][["value"]]
both_l2 <- both_customized$eval_train()[[1L]][["value"]]
-
+
# check type just to be sure the subsetting didn't return a NULL
expect_true(is.numeric(top_level_l2))
expect_true(is.numeric(params_l2))
expect_true(is.numeric(both_l2))
-
+
# check that model produces identical performance
expect_identical(top_level_l2, params_l2)
expect_identical(both_l2, params_l2)
-
+
expect_identical(param_bst$current_iter(), top_level_bst$current_iter())
expect_identical(param_bst$current_iter(), both_customized$current_iter())
expect_identical(param_bst$current_iter(), nrounds)
-
+
})