"""Utilities for parsing TRExFitter."""
# stdlib
import io
import logging
import math
import multiprocessing
import os
import random
import sys
from dataclasses import dataclass
from pathlib import PosixPath
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
# external
import matplotlib
matplotlib.use("pdf")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import uproot
from uproot.reading import ReadOnlyDirectory
import yaml
# tdub
import tdub.config
from .art import (
canvas_from_counts,
setup_tdub_style,
draw_atlas_label,
draw_impact_barh,
legend_last_to_first,
)
from .root import TGraphAsymmErrors, TH1
setup_tdub_style()
log = logging.getLogger(__name__)
[docs]@dataclass
class FitParam:
"""Fit parameter description as a dataclass.
Attributes
----------
name : str
Technical name of the nuisance parameter.
label : str
Pretty name for plotting.
pre_down : float
Prefit down variation impact on mu.
pre_up : float
Prefit up variation impact on mu.
post_down : float
Postfit down variation impact on mu.
post_up : float
Postfit up variation impact on mu.
central : float
Central value of the NP.
sig_lo : float
Lo error on the NP.
sig_hi : float
Hi error on the NP.
"""
name: str = ""
label: str = ""
pre_down: float = 0.0
pre_up: float = 0.0
post_down: float = 0.0
post_up: float = 0.0
central: float = 0.0
sig_lo: float = 0.0
sig_hi: float = 0.0
post_max: float = 0.0
def __post_init__(self):
"""Execute after init."""
self.post_max = max(abs(self.post_down), abs(self.post_up))
[docs]def available_regions(rex_dir: Union[str, os.PathLike]) -> List[str]:
"""Get a list of available regions from a TRExFitter result directory.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
Returns
-------
list(str)
Regions discovered in the TRExFitter result directory.
"""
root_files = (PosixPath(rex_dir) / "Histograms").glob("*_preFit.root")
return [rf.name[:-12] for rf in root_files if "asimov" not in rf.name]
[docs]def data_histogram(
rex_dir: Union[str, os.PathLike], region: str, fit_name: str = "tW"
) -> TH1:
"""Get the histogram for the Data in a region from a TRExFitter result.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
region : str
TRExFitter region name.
fit_name : str
Name of the Fit
Returns
-------
tdub.root.TH1
Histogram for the Data sample.
"""
root_path = PosixPath(rex_dir) / "Histograms" / f"{fit_name}_{region}_histos.root"
return TH1(uproot.open(root_path).get(f"{region}_Data"))
[docs]def chisq(
rex_dir: Union[str, os.PathLike], region: str, stage: str = "pre"
) -> Tuple[float, int, float]:
r"""Get prefit :math:`\chi^2` information from TRExFitter region.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
region : str
TRExFitter region name.
stage : str
Drawing fit stage, ('pre' or 'post').
Returns
-------
:py:obj:`float`
:math:`\chi^2` value for the region.
:py:obj:`int`
Number of degrees of freedom.
:py:obj:`float`
:math:`\chi^2` probability for the region.
"""
if stage not in ("pre", "post"):
raise ValueError("stage can only be 'pre' or 'post'")
txt_path = PosixPath(rex_dir) / "Histograms" / f"{region}_{stage}Fit_Chi2.txt"
table = yaml.full_load(txt_path.read_text())
return table["chi2"], table["ndof"], table["probability"]
[docs]def chisq_text(rex_dir: Union[str, os.PathLike], region: str, stage: str = "pre") -> None:
r"""Generate nicely formatted text for :math:`\chi^2` information.
Deploys :py:func:`tdub.rex.chisq` for grab the info.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
region : str
TRExFitter region name.
stage : str
Drawing fit stage, ('pre' or 'post').
Returns
-------
str
Formatted string showing the :math:`\chi^2` information.
"""
chi2, ndof, prob = chisq(rex_dir, region, stage=stage)
return (
f"$\\chi^2/\\mathrm{{ndf}} = {chi2:3.2f} / {ndof}$, "
f"$\\chi^2_{{\\mathrm{{prob}}}} = {prob:3.2f}$"
)
[docs]def prefit_histogram(root_file: ReadOnlyDirectory, sample: str, region: str) -> TH1:
"""Get a prefit histogram from a file.
Parameters
----------
root_file : uproot.reading.ReadOnlyDirectory
File containing the desired prefit histogram.
sample : str
Physics sample name.
region : str
TRExFitter region name.
Returns
-------
tdub.root.TH1
Desired histogram.
"""
histname = f"{region}_{sample}"
try:
h = root_file.get(histname)
h = TH1(root_file.get(histname))
return h
except KeyError:
log.fatal(f"{histname} histogram not found in {root_file}")
exit(1)
[docs]def prefit_histograms(
rex_dir: Union[str, os.PathLike],
samples: Iterable[str],
region: str,
fit_name: str = "tW",
) -> Dict[str, TH1]:
"""Retrieve sample prefit histograms for a region.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
samples : Iterable(str)
Physics samples of the desired histograms
region : str
Region to get histograms for
fit_name : str
Name of the Fit
Returns
-------
dict(str, tdub.root.TH1)
Prefit histograms.
"""
root_path = PosixPath(rex_dir) / "Histograms" / f"{fit_name}_{region}_histos.root"
root_file = uproot.open(root_path)
histograms = {}
for samp in samples:
h = prefit_histogram(root_file, samp, region)
if h is None:
log.warn(f"Histogram for sample {samp} in region: {region} not found")
histograms[samp] = h
return histograms
def hepdata(
rex_dir: Union[str, os.PathLike],
region: str,
stage: str = "pre",
) -> Dict[Any, Any]:
"""Parse HEPData information.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
region : str
Region to get histograms for
stage : str
Fitting stage (`"pre"` or `"post"`).
"""
yaml_path = PosixPath(rex_dir) / "Plots" / f"{region}_{stage}fit.yaml"
return yaml.full_load(yaml_path.read_text())
[docs]def prefit_total_and_uncertainty(
rex_dir: Union[str, os.PathLike], region: str
) -> Tuple[TH1, TGraphAsymmErrors]:
"""Get the prefit total MC prediction and uncertainty band for a region.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory.
region : str
Region to get error band for.
Returns
-------
:py:obj:`tdub.root.TH1`
The total MC expectation histogram.
:py:obj:`tdub.root.TGraphAsymmErrors`
The error TGraph.
"""
root_path = PosixPath(rex_dir) / "Histograms" / f"{region}_preFit.root"
root_file = uproot.open(root_path)
err = TGraphAsymmErrors(root_file.get("g_totErr"))
tot = TH1(root_file.get("h_tot"))
return tot, err
[docs]def postfit_available(rex_dir: Union[str, os.PathLike]) -> bool:
"""Check if TRExFitter result directory contains postFit information.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
Returns
-------
bool
True of postFit discovered
"""
histdir = PosixPath(rex_dir) / "Histograms"
for f in histdir.iterdir():
if "postFit" in f.name:
return True
return False
[docs]def postfit_histogram(root_file: ReadOnlyDirectory, sample: str) -> TH1:
"""Get a postfit histogram from a file.
Parameters
----------
root_file : uproot.reading.ReadOnlyDirectory
File containing the desired postfit histogram.
sample : str
Physics sample name.
Returns
-------
tdub.root.TH1
Desired histogram.
"""
histname = f"h_{sample}_postFit"
try:
h = TH1(root_file.get(histname))
return h
except KeyError:
log.fatal(f"{histname} histogram not found in {root_file}")
exit(1)
[docs]def postfit_histograms(
rex_dir: Union[str, os.PathLike], samples: Iterable[str], region: str
) -> Dict[str, TH1]:
"""Retrieve sample postfit histograms for a region.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
region : str
Region to get histograms for
samples : Iterable(str)
Physics samples of the desired histograms
Returns
-------
dict(str, tdub.root.TH1)
Postfit histograms detected in the TRExFitter result directory.
"""
root_path = PosixPath(rex_dir) / "Histograms" / f"{region}_postFit.root"
root_file = uproot.open(root_path)
histograms = {}
for samp in samples:
if samp == "Data":
continue
h = postfit_histogram(root_file, samp)
if h is None:
log.warn(f"Histogram for sample {samp} in region {region} not found")
histograms[samp] = h
return histograms
[docs]def postfit_total_and_uncertainty(
rex_dir: Union[str, os.PathLike], region: str
) -> Tuple[Any, Any]:
"""Get the postfit total MC prediction and uncertainty band for a region.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory.
region : str
Region to get error band for.
Returns
-------
:py:obj:`tdub.root.TH1`
The total MC expectation histogram.
:py:obj:`tdub.root.TGraphAsymmErrors`
The error TGraph.
"""
root_path = PosixPath(rex_dir) / "Histograms" / f"{region}_postFit.root"
root_file = uproot.open(root_path)
err = TGraphAsymmErrors(root_file.get("g_totErr_postFit"))
tot = TH1(root_file.get("h_tot_postFit"))
return tot, err
[docs]def meta_text(region: str, stage: str) -> str:
"""Construct a piece of text based on the region and fit stage.
Parameters
----------
region : str
TRExFitter Region to use.
stage : str
Fitting stage (`"pre"` or `"post"`).
Returns
-------
str
Resulting metadata text
"""
if stage == "pre":
stage = "Pre-fit"
elif stage == "post":
stage = "Post-fit"
else:
raise ValueError("stage can be 'pre' or 'post'")
if "1j1b" in region:
region = "1j1b"
elif "2j1b" in region:
region = "2j1b"
elif "2j2b" in region:
region = "2j2b"
else:
raise ValueError("region must contain '1j1b', '2j1b', or '2j2b'")
return f"$tW$ Dilepton, {region}, {stage}"
[docs]def stack_canvas(
rex_dir: Union[str, os.PathLike],
region: str,
stage: str = "pre",
fit_name: str = "tW",
show_chisq: bool = True,
meta_table: Optional[Dict[str, Any]] = None,
log_patterns: Optional[List[Any]] = None,
thesis: bool = False,
) -> Tuple[plt.Figure, plt.Axes, plt.Axes]:
r"""Create a pre- or post-fit plot canvas for a TRExFitter region.
Parameters
---------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory.
region : str
Region to get error band for.
stage : str
Drawing fit stage, (`"pre"` or `"post"`).
fit_name : str
Name of the Fit
show_chisq : bool
Print :math:`\chi^2` information on ratio canvas.
meta_table : dict, optional
Table of metadata for labeling plotting axes.
log_patterns : list, optional
List of region patterns to use a log scale on y-axis.
thesis: bool
Flag for thesis label.
Returns
-------
:py:obj:`matplotlib.figure.Figure`
Figure for housing the plot.
:py:obj:`matplotlib.axes.Axes`
Main axes for the histogram stack.
:py:obj:`matplotlib.axes.Axes`
Ratio axes to show Data/MC.
"""
samples = ("tW", "ttbar", "Zjets", "Diboson", "MCNP")
if stage == "pre":
histograms = prefit_histograms(rex_dir, samples, region, fit_name=fit_name)
total_mc, uncertainty = prefit_total_and_uncertainty(rex_dir, region)
elif stage == "post":
histograms = postfit_histograms(rex_dir, samples, region)
total_mc, uncertainty = postfit_total_and_uncertainty(rex_dir, region)
else:
raise ValueError("stage must be 'pre' or 'post'")
histograms["Data"] = data_histogram(rex_dir, region)
bin_edges = histograms["Data"].edges
counts = {k: v.counts for k, v in histograms.items()}
errors = {k: v.errors for k, v in histograms.items()}
if log_patterns is None:
log_patterns = tdub.config.PLOTTING_LOGY
logy = False
for pat in log_patterns:
if pat.search(region) is not None:
logy = True
fig, ax0, ax1 = canvas_from_counts(
counts,
errors,
bin_edges,
uncertainty=uncertainty,
total_mc=total_mc,
logy=logy,
)
bw = histograms["Data"].bin_width
xlab, ylab = meta_axis_label(region, bw, meta_table)
# stack axes cosmetics
ax0.set_ylabel(ylab, horizontalalignment="right", y=1.0)
draw_atlas_label(ax0, extra_lines=[meta_text(region, stage)], thesis=thesis)
legend_last_to_first(ax0, ncol=2, loc="upper right")
# ratio axes cosmetics
ax1.set_xlabel(xlab, horizontalalignment="right", x=1.0)
ax1.set_ylabel("Data/MC")
if stage == "post":
ax1.set_ylim([0.9, 1.1])
ax1.set_yticks([0.9, 0.95, 1.0, 1.05])
if show_chisq:
ax1.text(
0.02, 0.8, chisq_text(rex_dir, region, stage), transform=ax1.transAxes, size=11
)
ax1.legend(loc="lower left", fontsize=11)
# return objects
return fig, ax0, ax1
[docs]def plot_region_stage_ff(args):
"""Free (multiprocessing compatible) function to plot a region + stage.
This function is designed to be used internally by
:py:func:`plot_all_regions`, where it is sent to a multiprocessing
pool. Not meant for generic usage.
Parameters
----------
args: list(Any)
Arguments passed to :py:func:`stack_canvas`.
"""
fig, ax0, ax1 = stack_canvas(
rex_dir=args[0],
region=args[1],
stage=args[3],
show_chisq=args[4],
meta_table=args[5],
log_patterns=args[6],
thesis=args[7],
)
output_file = f"{args[2]}/{args[1]}_{args[3]}Fit.pdf"
fig.savefig(output_file)
[docs]def plot_all_regions(
rex_dir: Union[str, os.PathLike],
outdir: Union[str, os.PathLike],
stage: str = "pre",
fit_name: str = "tW",
show_chisq: bool = True,
n_test: int = -1,
thesis: bool = False,
) -> None:
r"""Plot all regions discovered in a TRExFitter result directory.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory
outdir : str or os.PathLike
Path to save resulting files to
stage : str
Fitting stage (`"pre"` or `"post"`).
fit_name : str
Name of the Fit
show_chisq : bool
Print :math:`\chi^2` information on ratio canvas.
n_test : int
Maximum number of regions to plot (for quick tests).
thesis : bool
Flag for thesis label.
"""
PosixPath(outdir).mkdir(parents=True, exist_ok=True)
regions = available_regions(rex_dir)
if n_test > 0:
regions = random.sample(regions, n_test)
meta_table = tdub.config.PLOTTING_META_TABLE.copy()
log_patterns = tdub.config.PLOTTING_LOGY.copy()
args = [
[rex_dir, region, outdir, stage, show_chisq, meta_table, log_patterns, thesis]
for region in regions
]
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
pool.map(plot_region_stage_ff, args)
plt.close("all")
log.info("Done creating %s-fit plots in %s." % (stage, outdir))
[docs]def nuispar_impact(
rex_dir: Union[str, os.PathLike], name: str, label: Optional[str] = None
) -> FitParam:
"""Extract a specific nuisance parameter from a fit.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory.
name : str
Name of the nuisance parameter.
label : str, optional
Give the nuisance parameter a label other than its name.
Returns
-------
tdub.rex.FitParam
Desired nuisance parameter summary.
"""
n, c, su, sd, postup, postdn, preup, predn = (
(PosixPath(rex_dir) / "Fits" / f"NPRanking_{name}.txt").read_text().strip().split()
)
npar = FitParam(
name,
name,
round(float(predn), 6),
round(float(preup), 6),
round(float(postdn), 6),
round(float(postup), 6),
round(float(c), 6),
round(float(sd), 6),
round(float(su), 6),
)
if label is not None:
npar.label = label
return npar
[docs]def nuispar_impacts(rex_dir: Union[str, os.PathLike], sort: bool = True) -> List[FitParam]:
"""Extract a list of nuisance parameter impacts from a fit.
Parameters
----------
rex_dir : str or os.PathLike
Path of the TRExFitter result directory.
Returns
-------
list(tdub.rex.FitParam)
The nuisance parameters.
"""
nuispars = []
np_ranking_yaml = yaml.full_load((PosixPath(rex_dir) / "Ranking.yaml").read_text())
for entry in np_ranking_yaml:
nuispars.append(
FitParam(
entry["Name"],
entry["Name"],
entry["POIdownPreFit"],
entry["POIupPreFit"],
entry["POIdown"],
entry["POIup"],
entry["NPhat"],
entry["NPerrLo"],
entry["NPerrHi"],
)
)
if sort:
return sorted(nuispars, key=lambda par: par.post_max)
return nuispars
[docs]def nuispar_impact_plot_df(nuispars: List[FitParam]) -> pd.DataFrame:
"""Construct a DataFrame to organize impact plot ingredients.
Parameters
----------
nuispars : list(FitParam)
The nuisance parameters.
Returns
-------
pandas.DataFrame
DataFrame describing the plot ingredients.
"""
pre_down = np.array([p.pre_down for p in nuispars])
pre_up = np.array([p.pre_up for p in nuispars])
post_down = np.array([p.post_down for p in nuispars])
post_up = np.array([p.post_up for p in nuispars])
central = np.array([p.central for p in nuispars])
sig_hi = np.array([p.sig_hi for p in nuispars])
sig_lo = np.array([p.sig_lo for p in nuispars])
pre_down_lefts = np.zeros_like(pre_down)
pre_down_lefts[pre_down < 0] = pre_down[pre_down < 0]
pre_up_lefts = np.zeros_like(pre_up)
pre_up_lefts[pre_up < 0] = pre_up[pre_up < 0]
post_down_lefts = np.zeros_like(post_down)
post_down_lefts[post_down < 0] = post_down[post_down < 0]
post_up_lefts = np.zeros_like(post_up)
post_up_lefts[post_up < 0] = post_up[post_up < 0]
ys = np.arange(len(pre_down))
return pd.DataFrame.from_dict(
dict(
pre_down=pre_down,
pre_up=pre_up,
post_down=post_down,
post_up=post_up,
central=central,
sig_hi=sig_hi,
sig_lo=sig_lo,
pre_down_lefts=pre_down_lefts,
pre_up_lefts=pre_up_lefts,
post_down_lefts=post_down_lefts,
post_up_lefts=post_up_lefts,
ys=ys,
)
)
[docs]def prettify_label(label: str) -> str:
"""Fix parameter label to look nice for plots.
Replace underscores with whitespace, TeXify some stuff, remove
unnecessary things, etc.
Parameters
----------
label : str
Original label.
Returns
-------
str
Prettified label.
"""
return (
label.replace("_", " ")
.replace("ttbar", r"$t\bar{t}$")
.replace("tW", r"$tW$")
.replace("muF", r"$\mu_F$")
.replace("muR", r"$\mu_R$")
.replace("AR ", "")
.replace("RhoT", "Rho T")
.replace("hdamp", r"$h_{\mathrm{damp}}$")
.replace("DRDS", "DR vs DS")
.replace("EffectiveNP", "Eff. NP")
.replace("EffNP", "Eff. NP")
.replace("ptreweight", r"top-$p_{\mathrm{T}}$-reweight")
.replace("MET", r"$E_{\mathrm{T}}^{\mathrm{miss}}$")
)
[docs]def nuispar_impact_plot_top20(
rex_dir: Union[str, os.PathLike], thesis: bool = False
) -> None:
"""Plot the top 20 nuisance parameters based on impact.
Parameters
----------
rex_dir : str, os.PathLike
Path of the TRExFitter result directory.
thesis: : bool
Flag for thesis label.
"""
nuispars = nuispar_impacts(rex_dir, sort=True)[-20:]
for npar in nuispars:
npar.label = prettify_label(npar.label)
df = nuispar_impact_plot_df(nuispars)
ys = np.array(df.ys)
# fmt: off
fig, ax = plt.subplots(figsize=(5, 8))
ax, ax2 = draw_impact_barh(ax, df)
ax.legend(ncol=1, loc="upper left", bbox_to_anchor=(-0.75, 1.11))
ax.set_xticks([-0.2, -0.1, 0.0, 0.1, 0.2])
ax.set_ylim([-1, ys[-1] + 2.4])
ax.set_yticklabels([p.label for p in nuispars])
ax2.legend(loc="lower left", bbox_to_anchor=(-0.75, -0.09))
ax2.set_xlabel(r"$\Delta\mu$", labelpad=25)
ax.set_xlabel(r"$(\hat{\theta}-\theta_0)/\Delta\theta$", labelpad=20)
if thesis:
ax.text(
0.10,
0.95,
"D. Davis Thesis",
fontstyle="italic",
fontweight="bold",
size=14,
transform=ax.transAxes
)
else:
ax.text(
0.10, 0.95, "ATLAS", fontstyle="italic", fontweight="bold", size=14, transform=ax.transAxes
)
ax.text(
0.37, 0.95, "Internal", size=14, transform=ax.transAxes
)
ax.text(
0.10, 0.92, "$\\sqrt{s}$ = 13 TeV, $L = {139}$ fb$^{-1}$", size=12, transform=ax.transAxes
)
fig.subplots_adjust(left=0.45, bottom=0.085, top=0.915, right=0.975)
mpl_dir = PosixPath(rex_dir) / "matplotlib"
mpl_dir.mkdir(exist_ok=True)
output_file = str(mpl_dir / "Impact.pdf")
fig.savefig(output_file)
log.info("Created %s" % output_file)
plt.close(fig)
# fmt: on
return 0
[docs]def fit_parameter(fit_file: PosixPath, name: str, prettify: bool = False) -> FitParam:
"""Retrieve a parameter from fit result text file.
Parameters
----------
fit_file : pathlib.PosixPath
Path of the TRExFitter fit result text file.
name : str
Name of desired parameter.
prettify : bool
Prettify the parameter label using
:py:func:`tdub.rex.prettify_label`.
Raises
------
ValueError
If the parameter name isn't discovered.
Returns
-------
tdub.rex.FitParam
Fit parameter description.
"""
with fit_file.open("r") as f:
for line in f.readlines():
if name in line:
n, c, u, d = line.split()
return FitParam(
name=n,
label=prettify_label(name) if prettify else name,
central=float(c),
sig_hi=float(u),
sig_lo=float(d),
)
# if we don't find the name, raise ValueError
raise ValueError("{} parameter not found in {}".format(name, str(fit_file)))
[docs]def delta_poi(
rex_dir1: Union[str, os.PathLike],
rex_dir2: Union[str, os.PathLike],
fit_name1: str = "tW",
fit_name2: str = "tW",
poi: str = "SigXsecOverSM",
) -> Tuple[float, float, float]:
r"""Calculate difference of a POI between two results directories.
The default arguments will perform a calculation of
:math:`\Delta\mu` between two different fits. Standard error
propagation is performed on both the up and down uncertainties.
Parameters
----------
rex_dir1 : str or os.PathLike
Path of the first TRExFitter result directory.
rex_dir2 : str or os.PathLike
Path of the second TRExFitter result directory.
fit_name1 : str
Name of the first fit.
fit_name2 : str
Name of the second fit.
poi : str
Name of the parameter of interest.
Returns
-------
:py:obj:`float`
Central value of delta mu.
:py:obj:`float`
Up uncertainty on delta mu.
:py:obj:`float`
Down uncertainty on delta mu.
"""
fit_file1 = PosixPath(rex_dir1) / "Fits" / f"{fit_name1}.txt"
fit_file2 = PosixPath(rex_dir2) / "Fits" / f"{fit_name2}.txt"
mu1 = fit_parameter(fit_file1, poi)
mu2 = fit_parameter(fit_file2, poi)
# delta_mu = mu1.central - mu2.central
# sig_delta_mu_up = math.sqrt(mu1.sig_hi ** 2 + mu2.sig_hi ** 2)
# sig_delta_mu_dn = math.sqrt(mu1.sig_lo ** 2 + mu2.sig_lo ** 2)
# return delta_mu, sig_delta_mu_up, sig_delta_mu_dn
return delta_param(mu1, mu2)
[docs]def delta_param(param1: FitParam, param2: FitParam) -> Tuple[float, float, float]:
r"""Calculate difference between two fit parameters.
Parameters
----------
param1 : tdub.rex.FitParam
First fit parameter.
param2 : tdub.rex.FitParam
Second fit parameter.
Returns
-------
:py:obj:`float`
Difference between the central values
:py:obj:`float`
Up uncertainty
:py:obj:`float`
Down uncertainty
"""
del_par = param1.central - param2.central
sig_del_par_up = math.sqrt(param1.sig_hi ** 2 + param2.sig_hi ** 2)
sig_del_par_dn = math.sqrt(param1.sig_lo ** 2 + param2.sig_lo ** 2)
return del_par, sig_del_par_up, sig_del_par_dn
[docs]def compare_uncertainty(
rex_dir1: Union[str, os.PathLike],
rex_dir2: Union[str, os.PathLike],
fit_name1: str = "tW",
fit_name2: str = "tW",
label1: Optional[str] = None,
label2: Optional[str] = None,
poi: str = "SigXsecOverSM",
print_to: Optional[io.TextIOBase] = None,
) -> None:
"""Compare uncertainty between two fits.
Parameters
----------
rex_dir1 : str or os.PathLike
Path of the first TRExFitter result directory.
rex_dir2 : str or os.PathLike
Path of the second TRExFitter result directory.
fit_name1 : str
Name of the first fit.
fit_name2 : str
Name of the second fit.
label1 : str, optional
Define label for the first fit (defaults to rex_dir1).
label2 : str, optional
Define label for the second fit (defaults to rex_dir2).
poi : str
Name of the parameter of interest.
print_to : io.TextIOBase, optional
Where to print results (defaults to sys.stdout).
"""
if print_to is None:
print_to = sys.stdout
path1 = PosixPath(rex_dir1).resolve()
path2 = PosixPath(rex_dir2).resolve()
p1 = path1 if label1 is None else label1
p2 = path2 if label2 is None else label2
fit_file1 = path1 / "Fits" / f"{fit_name1}.txt"
fit_file2 = path2 / "Fits" / f"{fit_name2}.txt"
mu1 = fit_parameter(fit_file1, poi)
mu2 = fit_parameter(fit_file2, poi)
up1, down1 = mu1.sig_hi, mu1.sig_lo
up2, down2 = mu2.sig_hi, mu2.sig_lo
if abs(up1) > abs(up2):
print(f"{p1} has a larger up uncertainty on {poi}", file=print_to)
plarger = (abs(up1) - abs(up2)) / abs(up2) * 100.0
else:
print(f"{p2} has a larger up uncertainty on {poi}", file=print_to)
plarger = (abs(up2) - abs(up1)) / abs(up1) * 100.0
print(f"{p1:<24}: {up1}", file=print_to)
print(f"{p2:<24}: {up2}", file=print_to)
print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to)
print(f"{'-' * 60}", file=print_to)
if abs(down1) > abs(down2):
print(f"{p1} has a larger down uncertainty on {poi}", file=print_to)
plarger = (abs(down1) - abs(down2)) / abs(down2) * 100.0
else:
print(f"{p2} has a larger down uncertainty on {poi}", file=print_to)
plarger = (abs(down2) - abs(down1)) / abs(down1) * 100.0
print(f"{p1:<24}: {down1}", file=print_to)
print(f"{p2:<24}: {down2}", file=print_to)
print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to)
[docs]def compare_nuispar(
name: str,
rex_dir1: Union[str, os.PathLike],
rex_dir2: Union[str, os.PathLike],
label1: Optional[str] = None,
label2: Optional[str] = None,
np_label: Optional[str] = None,
print_to: Optional[io.TextIOBase] = None,
) -> None:
"""Compare nuisance parameter info between two fits.
Parameters
----------
name : str
Name of the nuisance parameter.
rex_dir1 : str or os.PathLike
Path of the first TRExFitter result directory.
rex_dir2 : str or os.PathLike
Path of the second TRExFitter result directory.
label1 : str, optional
Define label for the first fit (defaults to rex_dir1).
label2 : str, optional
Define label for the second fit (defaults to rex_dir2).
np_label : str, optional
Give the nuisance parameter a label other than its name.
print_to : io.TextIOBase, optional
Where to print results (defaults to sys.stdout).
"""
if print_to is None:
print_to = sys.stdout
path1 = PosixPath(rex_dir1).resolve()
path2 = PosixPath(rex_dir2).resolve()
p1 = path1 if label1 is None else label1
p2 = path2 if label2 is None else label2
np1 = nuispar_impact(rex_dir1, name=name, label=np_label)
np2 = nuispar_impact(rex_dir2, name=name, label=np_label)
print(f"{'=' * 15} Comparison for NP: {name} {'=' * 15}", file=print_to)
if abs(np1.sig_lo) < abs(np2.sig_lo):
print(f"{p1} has more aggressive sig lo {name} constraint", file=print_to)
d = abs(np2.sig_lo) - abs(np1.sig_lo)
else:
print(f"{p2} has more aggresive sig lo {name} constraint", file=print_to)
d = abs(np1.sig_lo) - abs(np2.sig_lo)
print(f"{p1:<24}: {np1.sig_lo}", file=print_to)
print(f"{p2:<24}: {np2.sig_lo}", file=print_to)
print(f"{'Difference':<24}: {d:3.4f}", file=print_to)
print(f"{'-' * 60}", file=print_to)
if abs(np1.sig_hi) < abs(np2.sig_hi):
print(f"{p1} has a more aggressive sig hi {name} constraint", file=print_to)
d = abs(np2.sig_hi) - abs(np1.sig_hi)
else:
print(f"{p2} has a move aggresive sig hi {name} constraint", file=print_to)
d = abs(np1.sig_hi) - abs(np2.sig_hi)
print(f"{p1:<24}: {np1.sig_hi}", file=print_to)
print(f"{p2:<24}: {np2.sig_hi}", file=print_to)
print(f"{'Difference':<24}: {d:3.4f}", file=print_to)
print(f"{'-' * 60}", file=print_to)
if abs(np1.pre_up) > abs(np2.pre_up):
print(f"{p1} has larger prefit up variation impact from {name}", file=print_to)
plarger = (abs(np1.pre_up) - abs(np2.pre_up)) / abs(np2.pre_up) * 100.0
else:
print(f"{p2} has larger prefit up variation impact from {name}", file=print_to)
plarger = (abs(np2.pre_up) - abs(np1.pre_up)) / abs(np1.pre_up) * 100.0
print(f"{p1:<24}: {np1.pre_up}", file=print_to)
print(f"{p2:<24}: {np2.pre_up}", file=print_to)
print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to)
print(f"{'-' * 60}", file=print_to)
if abs(np1.pre_down) > abs(np2.pre_down):
print(f"{p1} has larger prefit down variation impact from {name}", file=print_to)
plarger = (abs(np1.pre_down) - abs(np2.pre_down)) / abs(np2.pre_down) * 100.0
else:
print(f"{p2} has larger prefit down variation impact from {name}", file=print_to)
plarger = (abs(np2.pre_down) - abs(np1.pre_down)) / abs(np1.pre_down) * 100.0
print(f"{p1:<24}: {np1.pre_down}", file=print_to)
print(f"{p2:<24}: {np2.pre_down}", file=print_to)
print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to)
print(f"{'-' * 60}", file=print_to)
if abs(np1.post_up) > abs(np2.post_up):
print(f"{p1} has larger postfit up variation impact from {name}", file=print_to)
plarger = (abs(np1.post_up) - abs(np2.post_up)) / abs(np2.post_up) * 100.0
else:
print(f"{p2} has larger postfit up variation impact from {name}", file=print_to)
plarger = (abs(np2.post_up) - abs(np1.post_up)) / abs(np1.post_up) * 100.0
print(f"{p1:<24}: {np1.post_up}", file=print_to)
print(f"{p2:<24}: {np2.post_up}", file=print_to)
print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to)
print(f"{'-' * 60}", file=print_to)
if abs(np1.post_down) > abs(np2.post_down):
print(f"{p1} has larger postfit down variation impact from {name}", file=print_to)
plarger = (abs(np1.post_down) - abs(np2.post_down)) / abs(np2.post_down) * 100.0
else:
print(f"{p2} has larger postfit down variation impact from {name}", file=print_to)
plarger = (abs(np2.post_down) - abs(np1.post_down)) / abs(np1.post_down) * 100.0
print(f"{p1:<24}: {np1.post_down}", file=print_to)
print(f"{p2:<24}: {np2.post_down}", file=print_to)
print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to)
[docs]def comparison_summary(
rex_dir1,
rex_dir2,
fit_name1: str = "tW",
fit_name2: str = "tW",
label1: Optional[str] = None,
label2: Optional[str] = None,
fit_poi: str = "SigXsecOverSM",
nuispars: Optional[Iterable[str]] = None,
nuispar_labels: Optional[Iterable[str]] = None,
print_to: Optional[io.TextIOBase] = None,
) -> None:
"""Summarize a comparison of two fits.
Parameters
----------
rex_dir1 : str or os.PathLike
Path of the first TRExFitter result directory.
rex_dir2 : str or os.PathLike
Path of the second TRExFitter result directory.
fit_name1 : str
Name of the first fit.
fit_name2 : str
Name of the second fit.
label1 : str, optional
Define label for the first fit (defaults to rex_dir1).
label2 : str, optional
Define label for the second fit (defaults to rex_dir2).
fit_poi : str
Name of the parameter of interest.
nuispars : list(str), optional
Nuisance parameters to compare.
nuispar_labels: list(str), optional
Labels to give each nuisance parameter other than the default
name.
print_to : io.TextIOBase, optional
Where to print results (defaults to sys.stdout).
"""
if print_to is None:
print_to = sys.stdout
print(f"{'*' * 80}", file=print_to)
print("Fit comparison summary", file=print_to)
if label1 is not None and label2 is not None:
print(f"Fit 1: {rex_dir1} as {label1}", file=print_to)
print(f"Fit 2: {rex_dir2} as {label2}", file=print_to)
print(f"{'-' * 60}", file=print_to)
compare_uncertainty(
rex_dir1,
rex_dir2,
fit_name1=fit_name1,
fit_name2=fit_name2,
label1=label1,
label2=label2,
poi=fit_poi,
print_to=print_to,
)
if nuispars is not None:
if nuispar_labels is not None:
pairs = [(np, npl) for np, npl in zip(nuispars, nuispar_labels)]
else:
pairs = [(np, None) for np in nuispars]
for np_name, np_label in pairs:
compare_nuispar(
np_name,
rex_dir1,
rex_dir2,
label1=label1,
label2=label2,
np_label=np_label,
print_to=print_to,
)
print(f"{'*' * 80}", file=print_to)
[docs]def stability_test_standard(
umbrella: PosixPath,
outdir: Optional[PosixPath] = None,
tests: Union[str, List[str]] = "all",
) -> None:
"""Perform a battery of standard stability tests.
This function expects a rigid `umbrella` directory structure,
based on the output of results that are generated by rexpy_.
.. _rexpy: https://github.com/douglasdavis/rexpy
Parameters
----------
umbrella : pathlib.PosixPath
Umbrella directory containing all fits run via rexpy's
standard fits.
outdir : pathlib.PosixPath, optional
Directory to save results (defaults to current working
directory).
tests : str or list(str)
Which tests to execute. (default is "all"). The possible tests
include:
* ``"sys-drops"``, which shows the stability test for dropping
some systematics.
* ``"indiv-camps"``, which shows the stability test for
limiting the fit to individual campaigns.
* ``"regions"``, which shows the stability test for limiting
the fit to subsets of the analysis regions.
* ``"b0-check"``, which shows the stability test for limiting
the fit to individual analysis regions and checking the B0
eigenvector uncertainty.
"""
import tdub.internal.stab_tests as tist
umbrella = umbrella.resolve()
curdir = PosixPath.cwd().resolve()
if outdir is None:
outdir = curdir
else:
outdir.mkdir(parents=True, exist_ok=True)
if tests == "all":
tests = ["sys-drops", "indiv-camps", "regions", "b0-check"]
os.chdir(outdir)
if "sys-drops" in tests:
nom, names, labels, vals = tist.excluded_systematics_delta_mu_summary(
umbrella / "main.force-data.d" / "tW"
)
fig, ax = plt.subplots(figsize=(5.2, 1.5 + len(names) * 0.315))
fig.subplots_adjust(left=0.50, right=0.925)
tist.make_delta_mu_plot(
ax, nom.sig_hi, nom.sig_lo, vals["c"], vals["d"], vals["u"], labels
)
fig.savefig("stability-tests-sys-drops.pdf")
if "indiv-camps" in tests:
nom, names, labels, vals = tist.indiv_camp_delta_mu_summary(umbrella)
fig, ax = plt.subplots(figsize=(5.2, 1.5 + len(names) * 0.315))
fig.subplots_adjust(left=0.350, right=0.925, bottom=0.3, top=0.99)
tist.make_delta_mu_plot(
ax, nom.sig_hi, nom.sig_lo, vals["c"], vals["d"], vals["u"], labels
)
fig.savefig("stability-tests-indiv-camps.pdf")
if "regions" in tests:
nom, names, labels, vals = tist.region_delta_mu_summary(umbrella)
fig, ax = plt.subplots(figsize=(5.2, 1.5 + len(names) * 0.315))
fig.subplots_adjust(left=0.350, right=0.925, bottom=0.3, top=0.99)
tist.make_delta_mu_plot(
ax, nom.sig_hi, nom.sig_lo, vals["c"], vals["d"], vals["u"], labels
)
fig.savefig("stability-tests-regions.pdf")
if "b0-check" in tests:
fig, ax = tist.b0_by_year_fig_and_ax(umbrella)
fig.subplots_adjust(left=0.350, right=0.925, bottom=0.3, top=0.8)
fig.savefig("stability-tests-b0-check.pdf")
os.chdir(curdir)
return None
[docs]def stability_test_parton_shower_impacts(
herwig704: PosixPath,
herwig713: PosixPath,
outdir: Optional[PosixPath] = None,
) -> None:
"""Perform a battery of parton shower impact stability tests.
This function expects a rigid pair of Herwig 7.0.4 and 7.1.3
directories based on the output of results that are generated by
rexpy_.
.. _rexpy: https://github.com/douglasdavis/rexpy
Parameters
----------
herwig704 : pathlib.PosixPath
Path of the Herwig 7.1.4 fit results
herwig713 : pathlib.PosixPath
Path of the Herwig 7.1.3 fit results
outdir : pathlib.PosixPath, optional
Directory to save results (defaults to current working
directory).
"""
import tdub.internal.stab_tests as tist
herwig704 = herwig704.resolve()
herwig713 = herwig713.resolve()
curdir = PosixPath.cwd().resolve()
if outdir is None:
outdir = curdir
else:
outdir = outdir.resolve()
outdir.mkdir(exist_ok=True, parents=True)
os.chdir(outdir)
fig, _ = tist.ps_impact_h704_vs_h713(herwig704, herwig713)
fig.savefig("main_vs_main.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_h704_vs_h713(herwig704, herwig713, sort=True)
fig.savefig("main_vs_main_sorted.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r1j1b(herwig704, "ttbar_PS_1j1b")
fig.savefig("indivpoi_ttbar_PS_1j1b_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r1j1b(herwig713, "ttbar_PS_1j1b")
fig.savefig("indivpoi_ttbar_PS_1j1b_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r2j1b(herwig704, "ttbar_PS_2j1b")
fig.savefig("indivpoi_ttbar_PS_2j1b_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r2j1b(herwig713, "ttbar_PS_2j1b")
fig.savefig("indivpoi_ttbar_PS_2j1b_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r2j2b(herwig704, "ttbar_PS_2j2b")
fig.savefig("indivpoi_ttbar_PS_2j2b_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r2j2b(herwig713, "ttbar_PS_2j2b")
fig.savefig("indivpoi_ttbar_PS_2j2b_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r1j1b(herwig704, "tW_PS_1j1b")
fig.savefig("indivpoi_tW_PS_1j1b_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r1j1b(herwig713, "tW_PS_1j1b")
fig.savefig("indivpoi_tW_PS_1j1b_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r2j1b(herwig704, "tW_PS_2j1b")
fig.savefig("indivpoi_tW_PS_2j1b_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r2j1b(herwig713, "tW_PS_2j1b")
fig.savefig("indivpoi_tW_PS_2j1b_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r2j2b(herwig704, "tW_PS_2j2b")
fig.savefig("indivpoi_tW_PS_2j2b_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_r2j2b(herwig713, "tW_PS_2j2b")
fig.savefig("indivpoi_tW_PS_2j2b_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_norm_mig(herwig704, "ttbar_PS_norm")
fig.savefig("indivpoi_ttbar_PS_norm_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_norm_mig(herwig713, "ttbar_PS_norm")
fig.savefig("indivpoi_ttbar_PS_norm_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_norm_mig(herwig704, "ttbar_PS_migration")
fig.savefig("indivpoi_ttbar_PS_migration_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_norm_mig(herwig713, "ttbar_PS_migration")
fig.savefig("indivpoi_ttbar_PS_migration_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_norm_mig(herwig704, "tW_PS_norm")
fig.savefig("indivpoi_tW_PS_norm_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_norm_mig(herwig713, "tW_PS_norm")
fig.savefig("indivpoi_tW_PS_norm_h713.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_norm_mig(herwig704, "tW_PS_migration")
fig.savefig("indivpoi_tW_PS_migration_h704.pdf")
plt.close(fig)
fig, _ = tist.ps_impact_norm_mig(herwig713, "tW_PS_migration")
fig.savefig("indivpoi_tW_PS_migration_h713.pdf")
plt.close(fig)
return 0