Skip to content

Commit

Permalink
[python][sklearn] Remove early_stopping_rounds argument of fit()
Browse files Browse the repository at this point in the history
…method (microsoft#4846)
  • Loading branch information
StrikerRUS authored Dec 10, 2021
1 parent 1114ec8 commit f71328d
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 109 deletions.
6 changes: 3 additions & 3 deletions examples/python-guide/sklearn_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
gbm.fit(X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric='l1',
early_stopping_rounds=5)
callbacks=[lgb.early_stopping(5)])

print('Starting predicting...')
# predict
Expand All @@ -52,7 +52,7 @@ def rmsle(y_true, y_pred):
gbm.fit(X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric=rmsle,
early_stopping_rounds=5)
callbacks=[lgb.early_stopping(5)])


# another self-defined eval metric
Expand All @@ -67,7 +67,7 @@ def rae(y_true, y_pred):
gbm.fit(X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric=[rmsle, rae],
early_stopping_rounds=5)
callbacks=[lgb.early_stopping(5)])

print('Starting predicting...')
# predict
Expand Down
28 changes: 3 additions & 25 deletions python-package/lightgbm/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,15 +1038,11 @@ def _lgb_dask_fit(
eval_group: Optional[List[_DaskVectorLike]] = None,
eval_metric: Optional[Union[_LGBM_ScikitCustomEvalFunction, str, List[Union[_LGBM_ScikitCustomEvalFunction, str]]]] = None,
eval_at: Optional[Iterable[int]] = None,
early_stopping_rounds: Optional[int] = None,
**kwargs: Any
) -> "_DaskLGBMModel":
if not all((DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED)):
raise LightGBMError('dask, pandas and scikit-learn are required for lightgbm.dask')

if early_stopping_rounds is not None:
raise RuntimeError('early_stopping_rounds is not currently supported in lightgbm.dask')

params = self.get_params(True)
params.pop("client", None)

Expand Down Expand Up @@ -1171,13 +1167,9 @@ def fit(
eval_class_weight: Optional[List[Union[dict, str]]] = None,
eval_init_score: Optional[List[_DaskCollection]] = None,
eval_metric: Optional[Union[_LGBM_ScikitCustomEvalFunction, str, List[Union[_LGBM_ScikitCustomEvalFunction, str]]]] = None,
early_stopping_rounds: Optional[int] = None,
**kwargs: Any
) -> "DaskLGBMClassifier":
"""Docstring is inherited from the lightgbm.LGBMClassifier.fit."""
if early_stopping_rounds is not None:
raise RuntimeError('early_stopping_rounds is not currently supported in lightgbm.dask')

return self._lgb_dask_fit(
model_factory=LGBMClassifier,
X=X,
Expand All @@ -1204,16 +1196,13 @@ def fit(
eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)"
)

# DaskLGBMClassifier does not support group, eval_group, early_stopping_rounds.
# DaskLGBMClassifier does not support group, eval_group.
_base_doc = (_base_doc[:_base_doc.find('group :')]
+ _base_doc[_base_doc.find('eval_set :'):])

_base_doc = (_base_doc[:_base_doc.find('eval_group :')]
+ _base_doc[_base_doc.find('eval_metric :'):])

_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+ _base_doc[_base_doc.find('feature_name :'):])

# DaskLGBMClassifier support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
Other parameters passed through to ``LGBMClassifier.fit()``.
Expand Down Expand Up @@ -1352,13 +1341,9 @@ def fit(
eval_sample_weight: Optional[List[_DaskVectorLike]] = None,
eval_init_score: Optional[List[_DaskVectorLike]] = None,
eval_metric: Optional[Union[_LGBM_ScikitCustomEvalFunction, str, List[Union[_LGBM_ScikitCustomEvalFunction, str]]]] = None,
early_stopping_rounds: Optional[int] = None,
**kwargs: Any
) -> "DaskLGBMRegressor":
"""Docstring is inherited from the lightgbm.LGBMRegressor.fit."""
if early_stopping_rounds is not None:
raise RuntimeError('early_stopping_rounds is not currently supported in lightgbm.dask')

return self._lgb_dask_fit(
model_factory=LGBMRegressor,
X=X,
Expand All @@ -1384,7 +1369,7 @@ def fit(
eval_group_shape="list of Dask Array or Dask Series, or None, optional (default=None)"
)

# DaskLGBMRegressor does not support group, eval_class_weight, eval_group, early_stopping_rounds.
# DaskLGBMRegressor does not support group, eval_class_weight, eval_group.
_base_doc = (_base_doc[:_base_doc.find('group :')]
+ _base_doc[_base_doc.find('eval_set :'):])

Expand All @@ -1394,9 +1379,6 @@ def fit(
_base_doc = (_base_doc[:_base_doc.find('eval_group :')]
+ _base_doc[_base_doc.find('eval_metric :'):])

_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
+ _base_doc[_base_doc.find('feature_name :'):])

# DaskLGBMRegressor support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
Other parameters passed through to ``LGBMRegressor.fit()``.
Expand Down Expand Up @@ -1519,13 +1501,9 @@ def fit(
eval_group: Optional[List[_DaskVectorLike]] = None,
eval_metric: Optional[Union[_LGBM_ScikitCustomEvalFunction, str, List[Union[_LGBM_ScikitCustomEvalFunction, str]]]] = None,
eval_at: Iterable[int] = (1, 2, 3, 4, 5),
early_stopping_rounds: Optional[int] = None,
**kwargs: Any
) -> "DaskLGBMRanker":
"""Docstring is inherited from the lightgbm.LGBMRanker.fit."""
if early_stopping_rounds is not None:
raise RuntimeError('early_stopping_rounds is not currently supported in lightgbm.dask')

return self._lgb_dask_fit(
model_factory=LGBMRanker,
X=X,
Expand Down Expand Up @@ -1558,7 +1536,7 @@ def fit(
_base_doc = (_base_doc[:_base_doc.find('eval_class_weight :')]
+ _base_doc[_base_doc.find('eval_init_score :'):])

_base_doc = (_base_doc[:_base_doc.find('early_stopping_rounds :')]
_base_doc = (_base_doc[:_base_doc.find('feature_name :')]
+ "eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))\n"
+ f"{' ':8}The evaluation positions of the specified metric.\n"
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}")
Expand Down
181 changes: 122 additions & 59 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,14 +250,6 @@ def __call__(self, preds, dataset):
If list, it can be a list of built-in metrics, a list of custom evaluation metrics, or a mix of both.
In either case, the ``metric`` from the model parameters will be evaluated and used as well.
Default: 'l2' for LGBMRegressor, 'logloss' for LGBMClassifier, 'ndcg' for LGBMRanker.
early_stopping_rounds : int or None, optional (default=None)
Activates early stopping. The model will train until the validation score stops improving.
Validation score needs to improve at least every ``early_stopping_rounds`` round(s)
to continue training.
Requires at least one validation data and one metric.
If there's more than one, will check all of them. But the training data is ignored anyway.
To check only the first metric, set the ``first_metric_only`` parameter to ``True``
in additional parameters ``**kwargs`` of the model constructor.
feature_name : list of str, or 'auto', optional (default='auto')
Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used.
Expand Down Expand Up @@ -661,13 +653,25 @@ def _process_params(self, stage: str) -> Dict[str, Any]:

return params

def fit(self, X, y,
sample_weight=None, init_score=None, group=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_class_weight=None, eval_init_score=None, eval_group=None,
eval_metric=None, early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
def fit(
self,
X,
y,
sample_weight=None,
init_score=None,
group=None,
eval_set=None,
eval_names=None,
eval_sample_weight=None,
eval_class_weight=None,
eval_init_score=None,
eval_group=None,
eval_metric=None,
feature_name='auto',
categorical_feature='auto',
callbacks=None,
init_model=None
):
"""Docstring is set after definition, using a template."""
params = self._process_params(stage="fit")

Expand Down Expand Up @@ -754,11 +758,6 @@ def _get_meta_data(collection, name, i):
if isinstance(init_model, LGBMModel):
init_model = init_model.booster_

if early_stopping_rounds is not None and early_stopping_rounds > 0:
_log_warning("'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. "
"Pass 'early_stopping()' callback via 'callbacks' argument instead.")
params['early_stopping_rounds'] = early_stopping_rounds

if callbacks is None:
callbacks = []
else:
Expand Down Expand Up @@ -940,18 +939,38 @@ def feature_name_(self):
class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
"""LightGBM regressor."""

def fit(self, X, y,
sample_weight=None, init_score=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
def fit(
self,
X,
y,
sample_weight=None,
init_score=None,
eval_set=None,
eval_names=None,
eval_sample_weight=None,
eval_init_score=None,
eval_metric=None,
feature_name='auto',
categorical_feature='auto',
callbacks=None,
init_model=None
):
"""Docstring is inherited from the LGBMModel."""
super().fit(X, y, sample_weight=sample_weight, init_score=init_score,
eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score, eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds, feature_name=feature_name,
categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
super().fit(
X,
y,
sample_weight=sample_weight,
init_score=init_score,
eval_set=eval_set,
eval_names=eval_names,
eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score,
eval_metric=eval_metric,
feature_name=feature_name,
categorical_feature=categorical_feature,
callbacks=callbacks,
init_model=init_model
)
return self

_base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMRegressor") # type: ignore
Expand All @@ -966,13 +985,23 @@ def fit(self, X, y,
class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
"""LightGBM classifier."""

def fit(self, X, y,
sample_weight=None, init_score=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_class_weight=None, eval_init_score=None, eval_metric=None,
early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
def fit(
self,
X,
y,
sample_weight=None,
init_score=None,
eval_set=None,
eval_names=None,
eval_sample_weight=None,
eval_class_weight=None,
eval_init_score=None,
eval_metric=None,
feature_name='auto',
categorical_feature='auto',
callbacks=None,
init_model=None
):
"""Docstring is inherited from the LGBMModel."""
_LGBMAssertAllFinite(y)
_LGBMCheckClassificationTargets(y)
Expand Down Expand Up @@ -1013,12 +1042,22 @@ def fit(self, X, y,
else:
valid_sets[i] = (valid_x, self._le.transform(valid_y))

super().fit(X, _y, sample_weight=sample_weight, init_score=init_score, eval_set=valid_sets,
eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_class_weight=eval_class_weight, eval_init_score=eval_init_score,
eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds,
feature_name=feature_name, categorical_feature=categorical_feature,
callbacks=callbacks, init_model=init_model)
super().fit(
X,
_y,
sample_weight=sample_weight,
init_score=init_score,
eval_set=valid_sets,
eval_names=eval_names,
eval_sample_weight=eval_sample_weight,
eval_class_weight=eval_class_weight,
eval_init_score=eval_init_score,
eval_metric=eval_metric,
feature_name=feature_name,
categorical_feature=categorical_feature,
callbacks=callbacks,
init_model=init_model
)
return self

_base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMClassifier") # type: ignore
Expand Down Expand Up @@ -1088,13 +1127,25 @@ class LGBMRanker(LGBMModel):
Please use this class mainly for training and applying ranking models in common sklearnish way.
"""

def fit(self, X, y,
sample_weight=None, init_score=None, group=None,
eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_group=None, eval_metric=None,
eval_at=(1, 2, 3, 4, 5), early_stopping_rounds=None,
feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None):
def fit(
self,
X,
y,
sample_weight=None,
init_score=None,
group=None,
eval_set=None,
eval_names=None,
eval_sample_weight=None,
eval_init_score=None,
eval_group=None,
eval_metric=None,
eval_at=(1, 2, 3, 4, 5),
feature_name='auto',
categorical_feature='auto',
callbacks=None,
init_model=None
):
"""Docstring is inherited from the LGBMModel."""
# check group data
if group is None:
Expand All @@ -1113,18 +1164,30 @@ def fit(self, X, y,
"if you use dict, the index should start from 0")

self._eval_at = eval_at
super().fit(X, y, sample_weight=sample_weight, init_score=init_score, group=group,
eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds, feature_name=feature_name,
categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
super().fit(
X,
y,
sample_weight=sample_weight,
init_score=init_score,
group=group,
eval_set=eval_set,
eval_names=eval_names,
eval_sample_weight=eval_sample_weight,
eval_init_score=eval_init_score,
eval_group=eval_group,
eval_metric=eval_metric,
feature_name=feature_name,
categorical_feature=categorical_feature,
callbacks=callbacks,
init_model=init_model
)
return self

_base_doc = LGBMModel.fit.__doc__.replace("self : LGBMModel", "self : LGBMRanker") # type: ignore
fit.__doc__ = (_base_doc[:_base_doc.find('eval_class_weight :')] # type: ignore
+ _base_doc[_base_doc.find('eval_init_score :'):]) # type: ignore
_base_doc = fit.__doc__
_before_early_stop, _early_stop, _after_early_stop = _base_doc.partition('early_stopping_rounds :')
fit.__doc__ = f"""{_before_early_stop}eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))
_before_feature_name, _feature_name, _after_feature_name = _base_doc.partition('feature_name :')
fit.__doc__ = f"""{_before_feature_name}eval_at : iterable of int, optional (default=(1, 2, 3, 4, 5))
The evaluation positions of the specified metric.
{_early_stop}{_after_early_stop}"""
{_feature_name}{_after_feature_name}"""
Loading

0 comments on commit f71328d

Please sign in to comment.