Source code for xgboost_distribution.distributions.negative_binomial

"""Negative binomial distribution
"""
from collections import namedtuple

import numpy as np
from scipy.special import digamma, expit
from scipy.stats import nbinom

from xgboost_distribution.distributions.base import BaseDistribution
from xgboost_distribution.distributions.utils import (
    MAX_EXPONENT,
    MIN_EXPONENT,
    check_all_ge_zero,
    check_all_integer,
    safe_exp,
)

Params = namedtuple("Params", ("n", "p"))


[docs]class NegativeBinomial(BaseDistribution):
    """Negative binomial distribution with log score

    Definition:

        f(k) = p^n (1 - p)^k binomial(n + k - 1, n - 1)

    with parameter (n, p), where n >= 0 and 1 >= p >= 0

    We reparameterize:

        n -> log(n) = a        |  e^a = n
        p -> log(p/(1-p)) = b  |  e^b = p / (1-p)   |  p = 1 / (1 + e^-b)

    The gradients are:

        d/da -log[f(k)] = -e^a [ digamma(k+e^a) - digamma(e^a) + log(p) ]
                        = -n   [ digamma(k+n) - digamma(n) + log(p) ]

        d/db -log[f(k)] = (k e^b - e^a) / (e^b + 1)
                        = (k - e^a e^-b) / (e^-b + 1)
                        = p * (k - e^a e^-b)
                        = p * (k - n e^-b)

    The Fisher Information:

        I(n) ~ p / [ n (p+1) ]
        I(p) = n / [ p (1-p)^2 ]

    where we used an approximation for I(n) presented here:
        http://erepository.uonbi.ac.ke:8080/xmlui/handle/123456789/33803

    In reparameterized form, we find I_r(n) and I_r(p):

        p / [ n (p+1) ] = I_r(n) [ d/dn log(n) ]^2
                        = I_r(n) ( 1/n )^2

        -> I_r(n) = np / (p+1)

        n / [ p (1-p)^2 ] = I_r(p) [ d/dp log(p/(1-p)) ]^2
                          = I_r(p) ( 1/ [ p (1-p) ] )^2

        -> I_r(p) = [ p^2 (1-p)^2 n ] / [ p (1-p)^2 ] = np

    Hence the reparameterized Fisher information:

        [  np / (p+1), 0 ]
        [  0,         np ]

    Ref:

        https://www.wolframalpha.com/input/?i=d%2Fda+-log%28+%5B1+%2F+%281+%2B+e%5E%28-b%29%29%5D+%5E%28e%5Ea%29+%281+-+%5B1+%2F+%281+%2B+e%5E%28-b%29%29%5D%29%5Ek+binomial%28%28e%5Ea%29+%2B+k+-+1%2C+%28e%5Ea%29+-+1%29+%29

    """

    @property
    def params(self):
        return Params._fields

[docs]    def check_target(self, y):
        check_all_integer(y)
        check_all_ge_zero(y)

[docs]    def gradient_and_hessian(self, y, params, natural_gradient=True):
        """Gradient and diagonal hessian"""

        n, p = self.predict(params)

        grad = np.zeros(shape=(len(y), 2), dtype="float32")

        grad[:, 0] = -n * (digamma(y + n) - digamma(n) + np.log(p))
        grad[:, 1] = p * (y - n * (1 - p) / p)

        if natural_gradient:
            fisher_matrix = np.zeros(shape=(len(y), 2, 2), dtype="float32")
            fisher_matrix[:, 0, 0] = (n * p) / (p + 1)
            fisher_matrix[:, 1, 1] = n * p

            grad = np.linalg.solve(fisher_matrix, grad)
            hess = np.ones(shape=(len(y), 2), dtype="float32")  # constant hessian
        else:
            raise NotImplementedError(
                "Normal gradients are currently not supported by this "
                "distribution. Please use natural gradients!"
            )

        return grad, hess

[docs]    def loss(self, y, params):
        n, p = self.predict(params)
        return "NegativeBinomial-NLL", -nbinom.logpmf(y, n=n, p=p)

[docs]    def predict(self, params):
        log_n, logits = params[:, 0], params[:, 1]

        n = safe_exp(log_n)
        logits = np.clip(logits, a_min=MIN_EXPONENT, a_max=MAX_EXPONENT)

        p = expit(logits)
        return Params(n=n, p=p)

[docs]    def starting_params(self, y):
        # TODO: starting params can matter a lot?
        return Params(n=np.log(np.mean(y)), p=0)  # expit(0) = 0.5