Source code for xgboost_distribution.distributions.negative_binomial

"""Negative binomial distribution
"""
from collections import namedtuple

import numpy as np
from scipy.special import digamma, expit
from scipy.stats import nbinom

from xgboost_distribution.distributions.base import BaseDistribution
from xgboost_distribution.distributions.utils import (
    MAX_EXPONENT,
    MIN_EXPONENT,
    check_all_ge_zero,
    check_all_integer,
    safe_exp,
)

Params = namedtuple("Params", ("n", "p"))


[docs]class NegativeBinomial(BaseDistribution): """Negative binomial distribution with log score Definition: f(k) = p^n (1 - p)^k binomial(n + k - 1, n - 1) with parameter (n, p), where n >= 0 and 1 >= p >= 0 We reparameterize: n -> log(n) = a | e^a = n p -> log(p/(1-p)) = b | e^b = p / (1-p) | p = 1 / (1 + e^-b) The gradients are: d/da -log[f(k)] = -e^a [ digamma(k+e^a) - digamma(e^a) + log(p) ] = -n [ digamma(k+n) - digamma(n) + log(p) ] d/db -log[f(k)] = (k e^b - e^a) / (e^b + 1) = (k - e^a e^-b) / (e^-b + 1) = p * (k - e^a e^-b) = p * (k - n e^-b) The Fisher Information: I(n) ~ p / [ n (p+1) ] I(p) = n / [ p (1-p)^2 ] where we used an approximation for I(n) presented here: http://erepository.uonbi.ac.ke:8080/xmlui/handle/123456789/33803 In reparameterized form, we find I_r(n) and I_r(p): p / [ n (p+1) ] = I_r(n) [ d/dn log(n) ]^2 = I_r(n) ( 1/n )^2 -> I_r(n) = np / (p+1) n / [ p (1-p)^2 ] = I_r(p) [ d/dp log(p/(1-p)) ]^2 = I_r(p) ( 1/ [ p (1-p) ] )^2 -> I_r(p) = [ p^2 (1-p)^2 n ] / [ p (1-p)^2 ] = np Hence the reparameterized Fisher information: [ np / (p+1), 0 ] [ 0, np ] Ref: https://www.wolframalpha.com/input/?i=d%2Fda+-log%28+%5B1+%2F+%281+%2B+e%5E%28-b%29%29%5D+%5E%28e%5Ea%29+%281+-+%5B1+%2F+%281+%2B+e%5E%28-b%29%29%5D%29%5Ek+binomial%28%28e%5Ea%29+%2B+k+-+1%2C+%28e%5Ea%29+-+1%29+%29 """ @property def params(self): return Params._fields
[docs] def check_target(self, y): check_all_integer(y) check_all_ge_zero(y)
[docs] def gradient_and_hessian(self, y, params, natural_gradient=True): """Gradient and diagonal hessian""" n, p = self.predict(params) grad = np.zeros(shape=(len(y), 2), dtype="float32") grad[:, 0] = -n * (digamma(y + n) - digamma(n) + np.log(p)) grad[:, 1] = p * (y - n * (1 - p) / p) if natural_gradient: fisher_matrix = np.zeros(shape=(len(y), 2, 2), dtype="float32") fisher_matrix[:, 0, 0] = (n * p) / (p + 1) fisher_matrix[:, 1, 1] = n * p grad = np.linalg.solve(fisher_matrix, grad) hess = np.ones(shape=(len(y), 2), dtype="float32") # constant hessian else: raise NotImplementedError( "Normal gradients are currently not supported by this " "distribution. Please use natural gradients!" ) return grad, hess
[docs] def loss(self, y, params): n, p = self.predict(params) return "NegativeBinomial-NLL", -nbinom.logpmf(y, n=n, p=p)
[docs] def predict(self, params): log_n, logits = params[:, 0], params[:, 1] n = safe_exp(log_n) logits = np.clip(logits, a_min=MIN_EXPONENT, a_max=MAX_EXPONENT) p = expit(logits) return Params(n=n, p=p)
[docs] def starting_params(self, y): # TODO: starting params can matter a lot? return Params(n=np.log(np.mean(y)), p=0) # expit(0) = 0.5