Source code for xgboost_distribution.distributions.normal

"""Normal distribution
"""
from collections import namedtuple

import numpy as np
from scipy.stats import norm

from xgboost_distribution.distributions.base import BaseDistribution
from xgboost_distribution.distributions.utils import MAX_EXPONENT, MIN_EXPONENT

# Note: due to reparameterization, we need to ensure that the converted
# variance, exp(2 * std), is within bounds of np.float32 arrays
MIN_LOG_SCALE = MIN_EXPONENT / 2
MAX_LOG_SCALE = MAX_EXPONENT / 2


Params = namedtuple("Params", ("loc", "scale"))


[docs]class Normal(BaseDistribution): """Normal distribution with log scoring Definition: f(x) = exp( -[ (x-mean) / std ]^2 / 2 ) / std We reparameterize: a = mean | a = mean b = log ( std ) | e^b = std (Note: reparameterizing to log(std) ensures that std >= 0, regardless of what the xgboost booster internally outputs, as std = e^b > 0.) The gradients are: d/da -log[f(x)] = e^(-2b) * (x-a) = (x-a) / var d/db -log[f(x)] = 1 - e^(-2b) * (x-a)^2 = 1 - (x-a)^2 / var as var = std^2 = e^(2b) The Fisher Information (diagonal): I(mean) = 1 / var I(std) = 2 / var In reparameterized form, we find I_r: 1 / var = I_r [ d/d(mean) mean ]^2 = I 2 / var = I_r [ d/d(std) log(std) ]^2 = I ( 1/(std) )^2 Hence the reparameterized Fisher information: [ 1 / var, 0 ] [ 0, 2 ] Ref: https://www.wolframalpha.com/input/?i=d%2Fda+-log%28%28e%5E%28-%5B%28x-a%29%2Fe%5Eb%29%5D%5E2+%2F+2%29+%2F+e%5Eb%29%29 https://www.wolframalpha.com/input/?i=d%2Fdb+-log%28%28e%5E%28-%5B%28x-a%29%2Fe%5Eb%29%5D%5E2+%2F+2%29+%2F+e%5Eb%29%29 """ @property def params(self): return Params._fields
[docs] def gradient_and_hessian(self, y, params, natural_gradient=True): """Gradient and diagonal hessian""" loc, log_scale = self._safe_params(params) var = np.exp(2 * log_scale) grad = np.zeros(shape=(len(y), 2), dtype="float32") grad[:, 0] = (loc - y) / var grad[:, 1] = 1 - ((y - loc) ** 2) / var if natural_gradient: fisher_matrix = np.zeros(shape=(len(y), 2, 2), dtype="float32") fisher_matrix[:, 0, 0] = 1 / var fisher_matrix[:, 1, 1] = 2 grad = np.linalg.solve(fisher_matrix, grad) hess = np.ones(shape=(len(y), 2), dtype="float32") # constant hessian else: hess = np.zeros(shape=(len(y), 2), dtype="float32") # diagonal elems only hess[:, 0] = 1 / var hess[:, 1] = 2 * ((y - loc) ** 2) / var return grad, hess
[docs] def loss(self, y, params): loc, scale = self.predict(params) return "NormalDistribution-NLL", -norm.logpdf(y, loc=loc, scale=scale)
[docs] def predict(self, params): loc, log_scale = self._safe_params(params) scale = np.exp(log_scale) return Params(loc=loc, scale=scale)
[docs] def starting_params(self, y): return Params(loc=np.mean(y), scale=np.log(np.std(y)))
def _safe_params(self, params): """Return safe loc and log_scale from params""" loc = params[:, 0] log_scale = np.clip(params[:, 1], a_min=MIN_LOG_SCALE, a_max=MAX_LOG_SCALE) return loc, log_scale