Source code for xgboost_distribution.distributions.poisson

"""Poisson distribution
"""
from collections import namedtuple

import numpy as np
from scipy.stats import poisson

from xgboost_distribution.distributions.base import BaseDistribution
from xgboost_distribution.distributions.utils import (
    check_all_ge_zero,
    check_all_integer,
    safe_exp,
)

Params = namedtuple("Params", ("mu"))


[docs]class Poisson(BaseDistribution): """Poisson distribution with log score Definition: f(k) = e^(-mu) mu^k / k! We reparameterize mu -> log(mu) = a to ensure mu >= 0. Gradient: d/da -log[f(k)] = e^a - k = mu - k The Fisher information = 1 / mu, which needs to be expressed in the reparameterized form: 1 / mu = I ( d/dmu log(mu) )^2 = I ( 1/ mu )^2 Hence we find: I = mu """ @property def params(self): return Params._fields
[docs] def check_target(self, y): check_all_integer(y) check_all_ge_zero(y)
[docs] def gradient_and_hessian(self, y, params, natural_gradient=True): """Gradient and diagonal hessian""" (mu,) = self.predict(params) grad = np.zeros(shape=(len(y), 1), dtype="float32") grad[:, 0] = mu - y if natural_gradient: fisher_matrix = np.zeros(shape=(len(y), 1, 1), dtype="float32") fisher_matrix[:, 0, 0] = mu grad = np.linalg.solve(fisher_matrix, grad) hess = np.ones(shape=(len(y), 1), dtype="float32") # constant hessian else: hess = mu return grad, hess
[docs] def loss(self, y, params): (mu,) = self.predict(params) return "Poisson-NLL", -poisson.logpmf(y, mu=mu)
[docs] def predict(self, params): mu = safe_exp(params) return Params(mu=mu)
[docs] def starting_params(self, y): return Params(mu=np.log(np.mean(y)))