[python][sklearn] Remove early_stopping_rounds argument of fit() …

…method (microsoft#4846)
rayuron · Dec 10, 2021 · f71328d · f71328d
1 parent 1114ec8
commit f71328d
Show file tree

Hide file tree

Showing 4 changed files with 178 additions and 109 deletions.
diff --git a/examples/python-guide/sklearn_example.py b/examples/python-guide/sklearn_example.py
@@ -27,7 +27,7 @@
 gbm.fit(X_train, y_train,
         eval_set=[(X_test, y_test)],
         eval_metric='l1',
-        early_stopping_rounds=5)
+        callbacks=[lgb.early_stopping(5)])
 
 print('Starting predicting...')
 # predict
@@ -52,7 +52,7 @@ def rmsle(y_true, y_pred):
 gbm.fit(X_train, y_train,
         eval_set=[(X_test, y_test)],
         eval_metric=rmsle,
-        early_stopping_rounds=5)
+        callbacks=[lgb.early_stopping(5)])
 
 
 # another self-defined eval metric
@@ -67,7 +67,7 @@ def rae(y_true, y_pred):
 gbm.fit(X_train, y_train,
         eval_set=[(X_test, y_test)],
         eval_metric=[rmsle, rae],
-        early_stopping_rounds=5)
+        callbacks=[lgb.early_stopping(5)])
 
 print('Starting predicting...')
 # predict

diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
@@ -1038,15 +1038,11 @@ def _lgb_dask_fit(
         eval_group: Optional[List[_DaskVectorLike]] = None,
         eval_metric: Optional[Union[_LGBM_ScikitCustomEvalFunction, str, List[Union[_LGBM_ScikitCustomEvalFunction, str]]]] = None,
         eval_at: Optional[Iterable[int]] = None,
-        early_stopping_rounds: Optional[int] = None,
         **kwargs: Any
     ) -> "_DaskLGBMModel":
         if not all((DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED)):
             raise LightGBMError('dask, pandas and scikit-learn are required for lightgbm.dask')
 
-        if early_stopping_rounds is not None:
-            raise RuntimeError('early_stopping_rounds is not currently supported in lightgbm.dask')
-
         params = self.get_params(True)
         params.pop("client", None)
 
@@ -1171,13 +1167,9 @@ def fit(
         eval_class_weight: Optional[List[Union[dict, str]]] = None,
         eval_init_score: Optional[List[_DaskCollection]] = None,
         eval_metric: Optional[Union[_LGBM_ScikitCustomEvalFunction, str, List[Union[_LGBM_ScikitCustomEvalFunction, str]]]] = None,
-        early_stopping_rounds: Optional[int] = None,
         **kwargs: Any
     ) -> "DaskLGBMClassifier":
         """Docstring is inherited from the lightgbm.LGBMClassifier.fit."""
-        if early_stopping_rounds is not None:
-            raise RuntimeError('early_stopping_rounds is not currently supported in lightgbm.dask')
-
         return self._lgb_dask_fit(
             model_factory=LGBMClassifier,
             X=X,
@@ -1204,16 +1196,13 @@ def fit(
         eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)"
     )
 
-    # DaskLGBMClassifier does not support group, eval_group, early_stopping_rounds.
+    # DaskLGBMClassifier does not support group, eval_group.
     _base_doc = (_base_doc[:_base_doc.find('group :')]
                  + _base_doc[_base_doc.find('eval_set :'):])
 
     _base_doc = (_base_doc[:_base_doc.find('eval_group :')]
                  + _base_doc[_base_doc.find('eval_metric :'):])
 
-    _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
-                 + _base_doc[_base_doc.find('feature_name :'):])
-
     # DaskLGBMClassifier support for callbacks and init_model is not tested
     fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
         Other parameters passed through to ``LGBMClassifier.fit()``.
@@ -1352,13 +1341,9 @@ def fit(
         eval_sample_weight: Optional[List[_DaskVectorLike]] = None,
         eval_init_score: Optional[List[_DaskVectorLike]] = None,
         eval_metric: Optional[Union[_LGBM_ScikitCustomEvalFunction, str, List[Union[_LGBM_ScikitCustomEvalFunction, str]]]] = None,
-        early_stopping_rounds: Optional[int] = None,
         **kwargs: Any
     ) -> "DaskLGBMRegressor":
         """Docstring is inherited from the lightgbm.LGBMRegressor.fit."""
-        if early_stopping_rounds is not None:
-            raise RuntimeError('early_stopping_rounds is not currently supported in lightgbm.dask')
-
         return self._lgb_dask_fit(
             model_factory=LGBMRegressor,
             X=X,
@@ -1384,7 +1369,7 @@ def fit(
         eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)"
     )
 
-    # DaskLGBMRegressor does not support group, eval_class_weight, eval_group, early_stopping_rounds.
+    # DaskLGBMRegressor does not support group, eval_class_weight, eval_group.
     _base_doc = (_base_doc[:_base_doc.find('group :')]
                  + _base_doc[_base_doc.find('eval_set :'):])
 
@@ -1394,9 +1379,6 @@ def fit(
     _base_doc = (_base_doc[:_base_doc.find('eval_group :')]
                  + _base_doc[_base_doc.find('eval_metric :'):])
 
-    _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
-                 + _base_doc[_base_doc.find('feature_name :'):])
-
     # DaskLGBMRegressor support for callbacks and init_model is not tested
     fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
         Other parameters passed through to ``LGBMRegressor.fit()``.
@@ -1519,13 +1501,9 @@ def fit(
         eval_group: Optional[List[_DaskVectorLike]] = None,
         eval_metric: Optional[Union[_LGBM_ScikitCustomEvalFunction, str, List[Union[_LGBM_ScikitCustomEvalFunction, str]]]] = None,
         eval_at: Iterable[int] = (1, 2, 3, 4, 5),
-        early_stopping_rounds: Optional[int] = None,
         **kwargs: Any
     ) -> "DaskLGBMRanker":
         """Docstring is inherited from the lightgbm.LGBMRanker.fit."""
-        if early_stopping_rounds is not None:
-            raise RuntimeError('early_stopping_rounds is not currently supported in lightgbm.dask')
-
         return self._lgb_dask_fit(
             model_factory=LGBMRanker,
             X=X,
@@ -1558,7 +1536,7 @@ def fit(
     _base_doc = (_base_doc[:_base_doc.find('eval_class_weight :')]
                  + _base_doc[_base_doc.find('eval_init_score :'):])
 
-    _base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+    _base_doc = (_base_doc[:_base_doc.find('feature_name :')]
                  + "eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))\n"
                  + f"{' ':8}The evaluation positions of the specified metric.\n"
                  + f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}")

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
@@ -250,14 +250,6 @@ def __call__(self, preds, dataset):
         If list, it can be a list of built-in metrics, a list of custom evaluation metrics, or a mix of both.
         In either case, the ``metric`` from the model parameters will be evaluated and used as well.
         Default: 'l2' for LGBMRegressor, 'logloss' for LGBMClassifier, 'ndcg' for LGBMRanker.
-    early_stopping_rounds : int or None, optional (default=None)
-        Activates early stopping. The model will train until the validation score stops improving.
-        Validation score needs to improve at least every ``early_stopping_rounds`` round(s)
-        to continue training.
-        Requires at least one validation data and one metric.
-        If there's more than one, will check all of them. But the training data is ignored anyway.
-        To check only the first metric, set the ``first_metric_only`` parameter to ``True``
-        in additional parameters ``**kwargs`` of the model constructor.
     feature_name : list of str, or 'auto', optional (default='auto')
         Feature names.
         If 'auto' and data is pandas DataFrame, data columns names are used.
@@ -661,13 +653,25 @@ def _process_params(self, stage: str) -> Dict[str, Any]:
 
         return params
 
-    def fit(self, X, y,
-            sample_weight=None, init_score=None, group=None,
-            eval_set=None, eval_names=None, eval_sample_weight=None,
-            eval_class_weight=None, eval_init_score=None, eval_group=None,
-            eval_metric=None, early_stopping_rounds=None,
-            feature_name='auto', categorical_feature='auto',
-            callbacks=None, init_model=None):
+    def fit(
+        self,
+        X,
+        y,
+        sample_weight=None,
+        init_score=None,
+        group=None,
+        eval_set=None,
+        eval_names=None,
+        eval_sample_weight=None,
+        eval_class_weight=None,
+        eval_init_score=None,
+        eval_group=None,
+        eval_metric=None,
+        feature_name='auto',
+        categorical_feature='auto',
+        callbacks=None,
+        init_model=None
+    ):
         """Docstring is set after definition, using a template."""
         params = self._process_params(stage="fit")
 
@@ -754,11 +758,6 @@ def _get_meta_data(collection, name, i):
         if isinstance(init_model, LGBMModel):
             init_model = init_model.booster_
 
-        if early_stopping_rounds is not None and early_stopping_rounds > 0:
-            _log_warning("'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. "
-                         "Pass 'early_stopping()' callback via 'callbacks' argument instead.")
-            params['early_stopping_rounds'] = early_stopping_rounds
-
         if callbacks is None:
             callbacks = []
         else:
@@ -940,18 +939,38 @@ def feature_name_(self):
 class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
     """LightGBM regressor."""
 
-    def fit(self, X, y,
-            sample_weight=None, init_score=None,
-            eval_set=None, eval_names=None, eval_sample_weight=None,
-            eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
-            feature_name='auto', categorical_feature='auto',
-            callbacks=None, init_model=None):
+    def fit(
+        self,
+        X,
+        y,
+        sample_weight=None,
+        init_score=None,
+        eval_set=None,
+        eval_names=None,
+        eval_sample_weight=None,
+        eval_init_score=None,
+        eval_metric=None,
+        feature_name='auto',
+        categorical_feature='auto',
+        callbacks=None,
+        init_model=None
+    ):
         """Docstring is inherited from the LGBMModel."""
-        super().fit(X, y, sample_weight=sample_weight, init_score=init_score,
-                    eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
-                    eval_init_score=eval_init_score, eval_metric=eval_metric,
-                    early_stopping_rounds=early_stopping_rounds, feature_name=feature_name,
-                    categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
+        super().fit(
+            X,
+            y,
+            sample_weight=sample_weight,
+            init_score=init_score,
+            eval_set=eval_set,
+            eval_names=eval_names,
+            eval_sample_weight=eval_sample_weight,
+            eval_init_score=eval_init_score,
+            eval_metric=eval_metric,
+            feature_name=feature_name,
+            categorical_feature=categorical_feature,
+            callbacks=callbacks,
+            init_model=init_model
+        )
         return self
 
     _base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMRegressor")  # type: ignore
@@ -966,13 +985,23 @@ def fit(self, X, y,
 class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
     """LightGBM classifier."""
 
-    def fit(self, X, y,
-            sample_weight=None, init_score=None,
-            eval_set=None, eval_names=None, eval_sample_weight=None,
-            eval_class_weight=None, eval_init_score=None, eval_metric=None,
-            early_stopping_rounds=None,
-            feature_name='auto', categorical_feature='auto',
-            callbacks=None, init_model=None):
+    def fit(
+        self,
+        X,
+        y,
+        sample_weight=None,
+        init_score=None,
+        eval_set=None,
+        eval_names=None,
+        eval_sample_weight=None,
+        eval_class_weight=None,
+        eval_init_score=None,
+        eval_metric=None,
+        feature_name='auto',
+        categorical_feature='auto',
+        callbacks=None,
+        init_model=None
+    ):
         """Docstring is inherited from the LGBMModel."""
         _LGBMAssertAllFinite(y)
         _LGBMCheckClassificationTargets(y)
@@ -1013,12 +1042,22 @@ def fit(self, X, y,
                 else:
                     valid_sets[i] = (valid_x, self._le.transform(valid_y))
 
-        super().fit(X, _y, sample_weight=sample_weight, init_score=init_score, eval_set=valid_sets,
-                    eval_names=eval_names, eval_sample_weight=eval_sample_weight,
-                    eval_class_weight=eval_class_weight, eval_init_score=eval_init_score,
-                    eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds,
-                    feature_name=feature_name, categorical_feature=categorical_feature,
-                    callbacks=callbacks, init_model=init_model)
+        super().fit(
+            X,
+            _y,
+            sample_weight=sample_weight,
+            init_score=init_score,
+            eval_set=valid_sets,
+            eval_names=eval_names,
+            eval_sample_weight=eval_sample_weight,
+            eval_class_weight=eval_class_weight,
+            eval_init_score=eval_init_score,
+            eval_metric=eval_metric,
+            feature_name=feature_name,
+            categorical_feature=categorical_feature,
+            callbacks=callbacks,
+            init_model=init_model
+        )
         return self
 
     _base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMClassifier")  # type: ignore
@@ -1088,13 +1127,25 @@ class LGBMRanker(LGBMModel):
         Please use this class mainly for training and applying ranking models in common sklearnish way.
     """
 
-    def fit(self, X, y,
-            sample_weight=None, init_score=None, group=None,
-            eval_set=None, eval_names=None, eval_sample_weight=None,
-            eval_init_score=None, eval_group=None, eval_metric=None,
-            eval_at=(1, 2, 3, 4, 5), early_stopping_rounds=None,
-            feature_name='auto', categorical_feature='auto',
-            callbacks=None, init_model=None):
+    def fit(
+        self,
+        X,
+        y,
+        sample_weight=None,
+        init_score=None,
+        group=None,
+        eval_set=None,
+        eval_names=None,
+        eval_sample_weight=None,
+        eval_init_score=None,
+        eval_group=None,
+        eval_metric=None,
+        eval_at=(1, 2, 3, 4, 5),
+        feature_name='auto',
+        categorical_feature='auto',
+        callbacks=None,
+        init_model=None
+    ):
         """Docstring is inherited from the LGBMModel."""
         # check group data
         if group is None:
@@ -1113,18 +1164,30 @@ def fit(self, X, y,
                                  "if you use dict, the index should start from 0")
 
         self._eval_at = eval_at
-        super().fit(X, y, sample_weight=sample_weight, init_score=init_score, group=group,
-                    eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
-                    eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric,
-                    early_stopping_rounds=early_stopping_rounds, feature_name=feature_name,
-                    categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
+        super().fit(
+            X,
+            y,
+            sample_weight=sample_weight,
+            init_score=init_score,
+            group=group,
+            eval_set=eval_set,
+            eval_names=eval_names,
+            eval_sample_weight=eval_sample_weight,
+            eval_init_score=eval_init_score,
+            eval_group=eval_group,
+            eval_metric=eval_metric,
+            feature_name=feature_name,
+            categorical_feature=categorical_feature,
+            callbacks=callbacks,
+            init_model=init_model
+        )
         return self
 
     _base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMRanker")  # type: ignore
     fit.__doc__ = (_base_doc[:_base_doc.find('eval_class_weight :')]  # type: ignore
                    + _base_doc[_base_doc.find('eval_init_score :'):])  # type: ignore
     _base_doc = fit.__doc__
-    _before_early_stop, _early_stop, _after_early_stop = _base_doc.partition('early_stopping_rounds :')
-    fit.__doc__ = f"""{_before_early_stop}eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))
+    _before_feature_name, _feature_name, _after_feature_name = _base_doc.partition('feature_name :')
+    fit.__doc__ = f"""{_before_feature_name}eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))
         The evaluation positions of the specified metric.
-    {_early_stop}{_after_early_stop}"""
+    {_feature_name}{_after_feature_name}"""