"""Module with some mathematical utilities."""
# stdlib
from typing import Tuple
import math
# external
import numpy as np
import scipy.special
[docs]def kolmogorov_prob(z: float) -> float:
"""Calculate the Kolmogorov distribution function.
See ROOT's implementation in TMath_ (TMath::KolmogorovProb).
.. _TMath: https://root.cern.ch/doc/master/namespaceTMath.html
Parameters
----------
z : float
the value to test
Returns
-------
float
the probability that the test statistic exceeds :math:`z`
(assuming the null hypothesis).
Examples
--------
>>> from tdub.math import kolmogorov_prob
>>> kolmogorov_prob(1.13)
0.15549781841748692
"""
w = 2.50662827
# c1 - -pi**2/8, c2 = 9*c1, c3 = 25*c1
c1 = -1.2337005501361697
c2 = -11.103304951225528
c3 = -30.842513753404244
u = abs(z)
if u < 0.2:
return 1.0
elif u < 0.755:
v = 1.0 / (u * u)
return float(1 - w * (math.exp(c1 * v) + math.exp(c2 * v) + math.exp(c3 * v)) / u)
elif u < 6.8116:
fj = np.array([-2, -8, -18, -32], dtype=np.float64)
r = np.zeros((4,), dtype=np.float64)
v = u * u
maxj = max(1.0, round(3.0 / u))
for j in range(int(maxj)):
r[j] = math.exp(fj[j] * v)
return float(2 * (r[0] - r[1] + r[2] - r[3]))
else:
return float(0.0)
[docs]def ks_twosample_binned(
hist1: np.ndarray, hist2: np.ndarray, err1: np.ndarray, err2: np.ndarray
) -> Tuple[float, float]:
"""Calculate KS statistic and p-value for two binned distributions.
See ROOT's implementation in TH1_ (TH1::KolmogorovTest).
.. _TH1: https://root.cern.ch/doc/master/classTH1.html
Parameters
----------
hist1 : numpy.ndarray
the histogram counts for the first distribution
hist2 : numpy.ndarray
the histogram counts for the second distribution
err1 : numpy.ndarray
the error on the histogram counts for the first distribution
err2 : numpy.ndarray
the error on the histogram counts for the second distribution
Returns
-------
(float, float)
first: the test-statistic; second: the probability of the test
(much less than 1 means distributions are incompatible)
Examples
--------
>>> import pygram11
>>> from tdub.math import ks_twosample_binned
>>> data1, data2, w1, w2 = some_function_to_get_data()
>>> h1, err1 = pygram11.histogram(data1, weights=w1, bins=40, range=(-3, 3))
>>> h2, err2 = pygram11.histogram(data2, weights=w2, bins=40, range=(-3, 3))
>>> kst, ksp = ks_twosample_binned(h1, h2, err1, err2)
"""
sum1 = np.sum(hist1)
sum2 = np.sum(hist2)
w1 = np.sum(err1 * err1)
w2 = np.sum(err2 * err2)
esum1 = sum1 * sum1 / w1
esum2 = sum2 * sum2 / w2
s1 = 1 / sum1
s2 = 1 / sum2
rsum1 = s1 * hist1
rsum2 = s2 * hist2
dfmax = float(np.max(np.abs(rsum1 - rsum2)))
z = float(dfmax * math.sqrt(esum1 * esum2 / (esum1 + esum2)))
return dfmax, kolmogorov_prob(z)
[docs]def chisquared_cdf_c(chi2: float, ndf: float) -> float:
r"""Calculate :math:`\chi^2` probability from the value and NDF.
See ROOT's ``TMath::Prob`` & ``ROOT::Math::chisquared_cdf_c``.
Quoting the ROOT documentation:
Computation of the probability for a certain :math:`\chi^2` and
number of degrees of freedom (ndf). Calculations are based on the
incomplete gamma function :math:`P(a,x)`, where
:math:`a=\mathrm{ndf}/2` and :math:`x=\chi^2/2`.
:math:`P(a,x)` represents the probability that the observed
:math:`\chi^2` for a correct model should be less than the value
:math:`\chi^2`. The returned probability corresponds to
:math:`1-P(a,x)`, which denotes the probability that an observed
:math:`\chi^2` exceeds the value :math:`\chi^2` by chance, even
for a correct model.
Parameters
----------
chi2 : float
the :math:`\chi^2` value
ndf : float
the degrees of freedom
Returns
-------
float
the :math:`\chi^2` probability
"""
return scipy.special.gammaincc(0.5 * ndf, 0.5 * chi2)
[docs]def chisquared_test(
h1: np.ndarray,
err1: np.ndarray,
h2: np.ndarray,
err2: np.ndarray,
) -> Tuple[float, float, float]:
r"""Perform :math:`\chi^2` test on two histograms.
Parameters
----------
h1 : :py:obj:`numpy.ndarray`
the first histogram bin contents
h2 : :py:obj:`numpy.ndarray`
the second histogram bin contents
err1 : :py:obj:`numpy.ndarray`
the first histogram bin errors
err2 : :py:obj:`numpy.ndarray`
the second histogram bin errors
Returns
-------
(float, int, float)
the :math:`\chi^2` test value, the degrees of freedom, and the probability
"""
# remove 0 bin heights
badh1 = h1 * h1 == 0
badh2 = h2 * h2 == 0
good_bins = np.invert(np.logical_or(badh1, badh2))
histo1 = h1[good_bins]
histo2 = h2[good_bins]
error1 = err1[good_bins]
error2 = err2[good_bins]
# degrees of freedom
nbins = histo1.shape[0]
ndf = nbins - 1
# sums
sum1 = np.sum(histo1)
sum2 = np.sum(histo2)
sumw1 = np.sum(error1 * error1)
sumw2 = np.sum(error2 * error2)
# chi2 calculation
error1sq = error1 * error1
error2sq = error2 * error2
sigma = sum1 * sum2 * error2sq + sum2 * sum2 * error1sq
delta = sum2 * histo1 - sum1 * histo2
chi2 = np.sum(delta * delta / sigma)
return (chi2, ndf, chisquared_cdf_c(chi2, ndf))