Source code for tdub.math

"""Module with some mathematical utilities."""

# stdlib
from typing import Tuple
import math

# external
import numpy as np
import scipy.special


[docs]def kolmogorov_prob(z: float) -> float: """Calculate the Kolmogorov distribution function. See ROOT's implementation in TMath_ (TMath::KolmogorovProb). .. _TMath: https://root.cern.ch/doc/master/namespaceTMath.html Parameters ---------- z : float the value to test Returns ------- float the probability that the test statistic exceeds :math:`z` (assuming the null hypothesis). Examples -------- >>> from tdub.math import kolmogorov_prob >>> kolmogorov_prob(1.13) 0.15549781841748692 """ w = 2.50662827 # c1 - -pi**2/8, c2 = 9*c1, c3 = 25*c1 c1 = -1.2337005501361697 c2 = -11.103304951225528 c3 = -30.842513753404244 u = abs(z) if u < 0.2: return 1.0 elif u < 0.755: v = 1.0 / (u * u) return float(1 - w * (math.exp(c1 * v) + math.exp(c2 * v) + math.exp(c3 * v)) / u) elif u < 6.8116: fj = np.array([-2, -8, -18, -32], dtype=np.float64) r = np.zeros((4,), dtype=np.float64) v = u * u maxj = max(1.0, round(3.0 / u)) for j in range(int(maxj)): r[j] = math.exp(fj[j] * v) return float(2 * (r[0] - r[1] + r[2] - r[3])) else: return float(0.0)
[docs]def ks_twosample_binned( hist1: np.ndarray, hist2: np.ndarray, err1: np.ndarray, err2: np.ndarray ) -> Tuple[float, float]: """Calculate KS statistic and p-value for two binned distributions. See ROOT's implementation in TH1_ (TH1::KolmogorovTest). .. _TH1: https://root.cern.ch/doc/master/classTH1.html Parameters ---------- hist1 : numpy.ndarray the histogram counts for the first distribution hist2 : numpy.ndarray the histogram counts for the second distribution err1 : numpy.ndarray the error on the histogram counts for the first distribution err2 : numpy.ndarray the error on the histogram counts for the second distribution Returns ------- (float, float) first: the test-statistic; second: the probability of the test (much less than 1 means distributions are incompatible) Examples -------- >>> import pygram11 >>> from tdub.math import ks_twosample_binned >>> data1, data2, w1, w2 = some_function_to_get_data() >>> h1, err1 = pygram11.histogram(data1, weights=w1, bins=40, range=(-3, 3)) >>> h2, err2 = pygram11.histogram(data2, weights=w2, bins=40, range=(-3, 3)) >>> kst, ksp = ks_twosample_binned(h1, h2, err1, err2) """ sum1 = np.sum(hist1) sum2 = np.sum(hist2) w1 = np.sum(err1 * err1) w2 = np.sum(err2 * err2) esum1 = sum1 * sum1 / w1 esum2 = sum2 * sum2 / w2 s1 = 1 / sum1 s2 = 1 / sum2 rsum1 = s1 * hist1 rsum2 = s2 * hist2 dfmax = float(np.max(np.abs(rsum1 - rsum2))) z = float(dfmax * math.sqrt(esum1 * esum2 / (esum1 + esum2))) return dfmax, kolmogorov_prob(z)
[docs]def chisquared_cdf_c(chi2: float, ndf: float) -> float: r"""Calculate :math:`\chi^2` probability from the value and NDF. See ROOT's ``TMath::Prob`` & ``ROOT::Math::chisquared_cdf_c``. Quoting the ROOT documentation: Computation of the probability for a certain :math:`\chi^2` and number of degrees of freedom (ndf). Calculations are based on the incomplete gamma function :math:`P(a,x)`, where :math:`a=\mathrm{ndf}/2` and :math:`x=\chi^2/2`. :math:`P(a,x)` represents the probability that the observed :math:`\chi^2` for a correct model should be less than the value :math:`\chi^2`. The returned probability corresponds to :math:`1-P(a,x)`, which denotes the probability that an observed :math:`\chi^2` exceeds the value :math:`\chi^2` by chance, even for a correct model. Parameters ---------- chi2 : float the :math:`\chi^2` value ndf : float the degrees of freedom Returns ------- float the :math:`\chi^2` probability """ return scipy.special.gammaincc(0.5 * ndf, 0.5 * chi2)
[docs]def chisquared_test( h1: np.ndarray, err1: np.ndarray, h2: np.ndarray, err2: np.ndarray, ) -> Tuple[float, float, float]: r"""Perform :math:`\chi^2` test on two histograms. Parameters ---------- h1 : :py:obj:`numpy.ndarray` the first histogram bin contents h2 : :py:obj:`numpy.ndarray` the second histogram bin contents err1 : :py:obj:`numpy.ndarray` the first histogram bin errors err2 : :py:obj:`numpy.ndarray` the second histogram bin errors Returns ------- (float, int, float) the :math:`\chi^2` test value, the degrees of freedom, and the probability """ # remove 0 bin heights badh1 = h1 * h1 == 0 badh2 = h2 * h2 == 0 good_bins = np.invert(np.logical_or(badh1, badh2)) histo1 = h1[good_bins] histo2 = h2[good_bins] error1 = err1[good_bins] error2 = err2[good_bins] # degrees of freedom nbins = histo1.shape[0] ndf = nbins - 1 # sums sum1 = np.sum(histo1) sum2 = np.sum(histo2) sumw1 = np.sum(error1 * error1) sumw2 = np.sum(error2 * error2) # chi2 calculation error1sq = error1 * error1 error2sq = error2 * error2 sigma = sum1 * sum2 * error2sq + sum2 * sum2 * error1sq delta = sum2 * histo1 - sum1 * histo2 chi2 = np.sum(delta * delta / sigma) return (chi2, ndf, chisquared_cdf_c(chi2, ndf))