"""Defines estimators which estimate a model's ability to represent the data.
All estimators have to implement the `.Estimator` interface.
"""
from typing import Callable, Dict, Mapping, Optional
import numpy as np
from tensorwaves.function._backend import find_function
from tensorwaves.interface import (
DataSample,
Estimator,
ParameterValue,
ParametrizedFunction,
)
[docs]def gradient_creator(
function: Callable[[Mapping[str, ParameterValue]], ParameterValue],
backend: str,
) -> Callable[[Mapping[str, ParameterValue]], Dict[str, ParameterValue]]:
# pylint: disable=import-outside-toplevel
if backend == "jax":
import jax
from jax.config import config
config.update("jax_enable_x64", True)
return jax.grad(function)
def raise_gradient_not_implemented(
parameters: Mapping[str, ParameterValue]
) -> Dict[str, ParameterValue]:
raise NotImplementedError(
f"Gradient not implemented for back-end {backend}."
)
return raise_gradient_not_implemented
[docs]class ChiSquared(Estimator):
r"""Chi-squared test estimator.
.. math:: \chi^2 = \sum_{i=1}^n w_i\left(y_i - f_\mathbf{p}(x_i)\right)^2
Args:
function: A `.ParametrizedFunction` :math:`f_\mathbf{p}` with
a set of free `~.ParametrizedFunction.parameters`
:math:`\mathbf{p}`.
domain: Input data-set :math:`\mathbf{x}` of :math:`n` events
:math:`x_i` over which to compute :code:`function`
:math:`f_\mathbf{p}`.
observed_values: Observed values :math:`y_i`.
weights: Optional weights :math:`w_i`. Default: :math:`w_i=1`
(unweighted). A common choice is :math:`w_i = 1/\sigma_i^2`, with
:math:`\sigma_i` the uncertainty in each measured value of
:math:`y_i`.
backend: Computational backend with which to compute the sum
:math:`\sum_{i=1}^n`.
.. seealso:: :doc:`/usage/chi-squared`
"""
def __init__(
self,
function: ParametrizedFunction,
domain: DataSample,
observed_values: np.ndarray,
weights: Optional[np.ndarray] = None,
backend: str = "numpy",
) -> None:
self.__function = function
self.__domain = domain
self.__observed_values = observed_values
if weights is None:
ones = find_function("ones", backend)
self.__weights = ones(len(self.__domain))
else:
self.__weights = weights
self.__gradient = gradient_creator(self.__call__, backend)
self.__sum = find_function("sum", backend)
[docs] def __call__(self, parameters: Mapping[str, ParameterValue]) -> float:
self.__function.update_parameters(parameters)
computed_values = self.__function(self.__domain)
chi_squared = (
self.__weights * (computed_values - self.__observed_values) ** 2
)
return self.__sum(chi_squared)
[docs] def gradient(
self, parameters: Mapping[str, ParameterValue]
) -> Dict[str, ParameterValue]:
return self.__gradient(parameters)
[docs]class UnbinnedNLL(Estimator): # pylint: disable=too-many-instance-attributes
r"""Unbinned negative log likelihood estimator.
The **log likelihood** :math:`\log\mathcal{L}` for a given function
:math:`f_\mathbf{p}: X^m \rightarrow \mathbb{R}` over :math:`N` data points
:math:`\mathbf{x}` and over a (phase space) domain of
:math:`n_\mathrm{phsp}` points :math:`\mathbf{x}_\mathrm{phsp}`, is given
by:
.. math::
-\log\mathcal{L} = N\log\lambda
-\sum_{i=1}^N \log\left(f_\mathbf{p}(x_i)\right)
with :math:`\lambda` the normalization integral over :math:`f_\mathbf{p}`.
The integral is computed numerically by averaging over a significantly
large (phase space) domain sample :math:`\mathbf{x}_\mathrm{phsp}` of size
:math:`n`:
.. math::
\lambda = \frac{\sum_{j=1}^n V f_\mathbf{p}(x_{\mathrm{phsp},j})}{n}.
Args:
function: A `.ParametrizedFunction` :math:`f_\mathbf{p}` that describes
a distribution over a certain domain.
data: The `.DataSample` :math:`\mathbf{x}` over which to compute
:math:`f_\mathbf{p}`.
phsp: The domain (phase space) with which the likelihood is normalized.
When correcting for the detector efficiency, use a phase space
sample that passed the detector reconstruction.
phsp_volume: Optional phase space volume :math:`V`, used in the
normalization factor. Default: :math:`V=1`.
backend: The computational back-end with which the sums and averages
should be computed.
.. seealso:: :doc:`/usage/unbinned-fit`
"""
def __init__( # pylint: disable=too-many-arguments
self,
function: ParametrizedFunction,
data: DataSample,
phsp: DataSample,
phsp_volume: float = 1.0,
backend: str = "numpy",
) -> None:
self.__data = {k: np.array(v) for k, v in data.items()}
self.__phsp = {k: np.array(v) for k, v in phsp.items()}
self.__function = function
self.__gradient = gradient_creator(self.__call__, backend)
self.__mean_function = find_function("mean", backend)
self.__sum_function = find_function("sum", backend)
self.__log_function = find_function("log", backend)
self.__phsp_volume = phsp_volume
[docs] def __call__(self, parameters: Mapping[str, ParameterValue]) -> float:
self.__function.update_parameters(parameters)
bare_intensities = self.__function(self.__data)
phsp_intensities = self.__function(self.__phsp)
normalization_factor = 1.0 / (
self.__phsp_volume * self.__mean_function(phsp_intensities)
)
likelihoods = normalization_factor * bare_intensities
return -self.__sum_function(self.__log_function(likelihoods))
[docs] def gradient(
self, parameters: Mapping[str, ParameterValue]
) -> Dict[str, ParameterValue]:
return self.__gradient(parameters)