"""XGBDistribution model
"""
import importlib
import json
import os
from typing import Any, Callable, List, Optional, Sequence, Tuple, Union, no_type_check
import numpy as np
from sklearn.base import RegressorMixin
from sklearn.utils.validation import check_is_fitted
from xgboost._typing import ArrayLike
from xgboost.callback import TrainingCallback
from xgboost.config import config_context
from xgboost.core import Booster, DMatrix, _deprecate_positional_args
from xgboost.sklearn import XGBModel, _wrap_evaluation_matrices, xgboost_model_doc
from xgboost.training import train
from xgboost_distribution.distributions import get_distribution, get_distribution_doc
[docs]@xgboost_model_doc(
"Implementation of XGBoost to estimate distributions (in scikit-learn API).",
["estimators", "model"],
extra_parameters=get_distribution_doc()
+ """
natural_gradient : bool, default=True
Whether or not natural gradients should be used.""",
)
class XGBDistribution(XGBModel, RegressorMixin):
[docs] @_deprecate_positional_args
def __init__(
self,
*,
distribution: str = None,
natural_gradient: bool = True,
objective: str = None,
**kwargs: Any,
) -> None:
self.distribution = distribution or "normal"
self.natural_gradient = natural_gradient
if objective is not None:
raise ValueError(
"Please do not set objective directly! Use the `distribution` kwarg"
)
super().__init__(objective=None, **kwargs)
[docs] @_deprecate_positional_args
def fit(
self,
X: ArrayLike,
y: ArrayLike,
*,
sample_weight: Optional[ArrayLike] = None,
eval_set: Optional[Sequence[Tuple[ArrayLike, ArrayLike]]] = None,
early_stopping_rounds: Optional[int] = None,
verbose: Optional[Union[bool, int]] = True,
xgb_model: Optional[Union[Booster, str, XGBModel]] = None,
sample_weight_eval_set: Optional[Sequence[ArrayLike]] = None,
feature_weights: Optional[ArrayLike] = None,
callbacks: Optional[Sequence[TrainingCallback]] = None,
) -> "XGBDistribution":
"""Fit gradient boosting distribution model.
Note that calling ``fit()`` multiple times will cause the model object to be
re-fit from scratch. To resume training from a previous checkpoint, explicitly
pass ``xgb_model`` argument.
Parameters
----------
X :
Feature matrix. See :ref:`py-data` for a list of supported types.
When the ``tree_method`` is set to ``hist``, internally, the
:py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`
for conserving memory. However, this has performance implications when the
device of input data is not matched with algorithm. For instance, if the
input is a numpy array on CPU but ``cuda`` is used for training, then the
data is first processed on CPU then transferred to GPU.
y :
Labels
sample_weight :
instance weights
eval_set :
A list of (X, y) tuple pairs to use as validation sets, for which
metrics will be computed.
Validation metrics will help us track the performance of the model.
early_stopping_rounds : int
.. deprecated:: 1.6.0
Use `early_stopping_rounds` in :py:meth:`__init__` or :py:meth:`set_params`
instead.
verbose :
If `verbose` is True and an evaluation set is used, the evaluation metric
measured on the validation set is printed to stdout at each boosting stage.
If `verbose` is an integer, the evaluation metric is printed at each
`verbose` boosting stage. The last boosting stage / the boosting stage found
by using `early_stopping_rounds` is also printed.
xgb_model :
file name of stored XGBoost model or 'Booster' instance XGBoost model to be
loaded before training (allows training continuation).
sample_weight_eval_set :
A list of the form [L_1, L_2, ..., L_n], where each L_i is an array like
object storing instance weights for the i-th validation set.
feature_weights :
Weight for each feature, defines the probability of each feature being
selected when colsample is being used. All values must be greater than 0,
otherwise a `ValueError` is thrown.
callbacks :
.. deprecated:: 1.6.0
Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.
"""
with config_context(verbosity=self.verbosity):
evals_result: TrainingCallback.EvalsLog = {}
self._distribution = get_distribution(self.distribution)
self._distribution.check_target(y)
params = self.get_xgb_params()
# we remove unexpected (i.e. not xgb native) params before fitting
for param in ["distribution", "natural_gradient"]:
params.pop(param)
params["objective"] = None
params["disable_default_eval_metric"] = True
params["num_class"] = len(self._distribution.params)
# we set `base_score` to zero and instead use base_margin in dmatrices
# -> this allows different starting values for each distribution parameter
params["base_score"] = 0.0
self._starting_params = self._distribution.starting_params(y)
base_margin = self._get_base_margin(len(y))
if eval_set is not None:
base_margin_eval_set: Optional[List[np.ndarray]] = [
self._get_base_margin(len(evals[1])) for evals in eval_set
]
else:
base_margin_eval_set = None
model, _, params, early_stopping_rounds, callbacks = self._configure_fit(
booster=xgb_model,
eval_metric=None,
params=params,
early_stopping_rounds=early_stopping_rounds,
callbacks=callbacks,
)
train_dmatrix, evals = _wrap_evaluation_matrices(
missing=self.missing,
X=X,
y=y,
group=None,
qid=None,
sample_weight=sample_weight,
base_margin=base_margin,
feature_weights=feature_weights,
eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set,
base_margin_eval_set=base_margin_eval_set,
eval_group=None,
eval_qid=None,
create_dmatrix=self._create_dmatrix,
enable_categorical=self.enable_categorical,
feature_types=self.feature_types,
)
self._Booster = train(
params,
train_dmatrix,
num_boost_round=self.get_num_boosting_rounds(),
evals=evals,
early_stopping_rounds=early_stopping_rounds,
evals_result=evals_result,
obj=self._objective_func(),
custom_metric=self._evaluation_func(),
verbose_eval=verbose,
xgb_model=model,
callbacks=callbacks,
)
self._set_evaluation_result(evals_result)
self.objective = f"distribution:{self.distribution}"
# we set additional params needed for XGBDistribution on the Booster object,
# in order to make use of the Booster's serialisation methods
self._Booster.set_attr(distribution=self.distribution)
self._Booster.set_attr(starting_params=json.dumps(self._starting_params))
return self
[docs] @no_type_check
def predict(
self,
X: ArrayLike,
validate_features: bool = True,
iteration_range: Optional[Tuple[int, int]] = None,
) -> Tuple[np.ndarray]:
"""Predict all params of distribution of each `X` example.
Parameters
----------
X : ArrayLike
Feature matrix.
validate_features : bool
When this is True, validate that the Booster's and data's feature_names are
identical. Otherwise, it is assumed that the feature_names are the same.
iteration_range :
Specifies which layer of trees are used in prediction. For example, if a
random forest is trained with 100 rounds. Specifying `iteration_range=(10,
20)`, then only the forests built during [10, 20) (half open set) rounds are
used in this prediction.
Returns
-------
predictions : namedtuple
A namedtuple of the distribution parameters. Each parameter is a
numpy array of shape (n_samples,).
"""
with config_context(verbosity=self.verbosity):
check_is_fitted(self, attributes=("_distribution", "_starting_params"))
base_margin = self._get_base_margin(X.shape[0])
params = super().predict(
X=X,
output_margin=True,
validate_features=validate_features,
base_margin=base_margin,
iteration_range=iteration_range,
)
return self._distribution.predict(params)
[docs] def save_model(self, fname: Union[str, os.PathLike]) -> None:
# self._distribution class cannot be saved by `super().save_model`, as it
# attempts to call `json.dumps({"_distribution": self._distribution})`
# Hence we delete, and then reinstantiate
# (this is safe as distributions are by definition stateless)
del self._distribution
super().save_model(fname)
self._distribution = get_distribution(self.distribution)
[docs] def load_model(self, fname: Union[str, bytearray, os.PathLike]) -> None:
super().load_model(fname)
# See above: Currently need to reinstantiate distribution post loading
self.distribution = self._Booster.attr("distribution")
self._distribution = get_distribution(self.distribution)
distribution_module = importlib.import_module(self._distribution.__module__)
self._starting_params = distribution_module.Params(
*json.loads(self._Booster.attr("starting_params"))
)
del distribution_module
def _objective_func(
self,
) -> Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]]:
def obj(params: np.ndarray, data: DMatrix) -> Tuple[np.ndarray, np.ndarray]:
y = data.get_label()
grad, hess = self._distribution.gradient_and_hessian(
y=y, params=params, natural_gradient=self.natural_gradient
)
weights = data.get_weight()
if weights.size != 0:
weights = weights.reshape(-1, 1)
grad *= weights
hess *= weights
return grad.flatten(), hess.flatten()
return obj
def _evaluation_func(self) -> Callable[[np.ndarray, DMatrix], Tuple[str, float]]:
def feval(params: np.ndarray, data: DMatrix) -> Tuple[str, float]:
y = data.get_label()
weights = data.get_weight()
if weights.size == 0:
weights = None
loss_name, loss = self._distribution.loss(y=y, params=params)
return loss_name, np.average(loss, weights=weights)
return feval
def _get_base_margin(self, n_samples: int) -> np.ndarray:
return np.ones(shape=(n_samples, 1)) * np.array(self._starting_params)