Source code for tdub.rex

"""Utilities for parsing TRExFitter."""

# stdlib
import io
import logging
import math
import multiprocessing
import os
import random
import sys
from dataclasses import dataclass
from pathlib import PosixPath
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

# external
import matplotlib

matplotlib.use("pdf")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import uproot
from uproot.reading import ReadOnlyDirectory

import yaml

# tdub
import tdub.config
from .art import (
    canvas_from_counts,
    setup_tdub_style,
    draw_atlas_label,
    draw_impact_barh,
    legend_last_to_first,
)
from .root import TGraphAsymmErrors, TH1

setup_tdub_style()

log = logging.getLogger(__name__)


[docs]@dataclass class FitParam: """Fit parameter description as a dataclass. Attributes ---------- name : str Technical name of the nuisance parameter. label : str Pretty name for plotting. pre_down : float Prefit down variation impact on mu. pre_up : float Prefit up variation impact on mu. post_down : float Postfit down variation impact on mu. post_up : float Postfit up variation impact on mu. central : float Central value of the NP. sig_lo : float Lo error on the NP. sig_hi : float Hi error on the NP. """ name: str = "" label: str = "" pre_down: float = 0.0 pre_up: float = 0.0 post_down: float = 0.0 post_up: float = 0.0 central: float = 0.0 sig_lo: float = 0.0 sig_hi: float = 0.0 post_max: float = 0.0 def __post_init__(self): """Execute after init.""" self.post_max = max(abs(self.post_down), abs(self.post_up))
[docs]def available_regions(rex_dir: Union[str, os.PathLike]) -> List[str]: """Get a list of available regions from a TRExFitter result directory. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory Returns ------- list(str) Regions discovered in the TRExFitter result directory. """ root_files = (PosixPath(rex_dir) / "Histograms").glob("*_preFit.root") return [rf.name[:-12] for rf in root_files if "asimov" not in rf.name]
[docs]def data_histogram( rex_dir: Union[str, os.PathLike], region: str, fit_name: str = "tW" ) -> TH1: """Get the histogram for the Data in a region from a TRExFitter result. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory region : str TRExFitter region name. fit_name : str Name of the Fit Returns ------- tdub.root.TH1 Histogram for the Data sample. """ root_path = PosixPath(rex_dir) / "Histograms" / f"{fit_name}_{region}_histos.root" return TH1(uproot.open(root_path).get(f"{region}_Data"))
[docs]def chisq( rex_dir: Union[str, os.PathLike], region: str, stage: str = "pre" ) -> Tuple[float, int, float]: r"""Get prefit :math:`\chi^2` information from TRExFitter region. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory region : str TRExFitter region name. stage : str Drawing fit stage, ('pre' or 'post'). Returns ------- :py:obj:`float` :math:`\chi^2` value for the region. :py:obj:`int` Number of degrees of freedom. :py:obj:`float` :math:`\chi^2` probability for the region. """ if stage not in ("pre", "post"): raise ValueError("stage can only be 'pre' or 'post'") txt_path = PosixPath(rex_dir) / "Histograms" / f"{region}_{stage}Fit_Chi2.txt" table = yaml.full_load(txt_path.read_text()) return table["chi2"], table["ndof"], table["probability"]
[docs]def chisq_text(rex_dir: Union[str, os.PathLike], region: str, stage: str = "pre") -> None: r"""Generate nicely formatted text for :math:`\chi^2` information. Deploys :py:func:`tdub.rex.chisq` for grab the info. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory region : str TRExFitter region name. stage : str Drawing fit stage, ('pre' or 'post'). Returns ------- str Formatted string showing the :math:`\chi^2` information. """ chi2, ndof, prob = chisq(rex_dir, region, stage=stage) return ( f"$\\chi^2/\\mathrm{{ndf}} = {chi2:3.2f} / {ndof}$, " f"$\\chi^2_{{\\mathrm{{prob}}}} = {prob:3.2f}$" )
[docs]def prefit_histogram(root_file: ReadOnlyDirectory, sample: str, region: str) -> TH1: """Get a prefit histogram from a file. Parameters ---------- root_file : uproot.reading.ReadOnlyDirectory File containing the desired prefit histogram. sample : str Physics sample name. region : str TRExFitter region name. Returns ------- tdub.root.TH1 Desired histogram. """ histname = f"{region}_{sample}" try: h = root_file.get(histname) h = TH1(root_file.get(histname)) return h except KeyError: log.fatal(f"{histname} histogram not found in {root_file}") exit(1)
[docs]def prefit_histograms( rex_dir: Union[str, os.PathLike], samples: Iterable[str], region: str, fit_name: str = "tW", ) -> Dict[str, TH1]: """Retrieve sample prefit histograms for a region. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory samples : Iterable(str) Physics samples of the desired histograms region : str Region to get histograms for fit_name : str Name of the Fit Returns ------- dict(str, tdub.root.TH1) Prefit histograms. """ root_path = PosixPath(rex_dir) / "Histograms" / f"{fit_name}_{region}_histos.root" root_file = uproot.open(root_path) histograms = {} for samp in samples: h = prefit_histogram(root_file, samp, region) if h is None: log.warn(f"Histogram for sample {samp} in region: {region} not found") histograms[samp] = h return histograms
def hepdata( rex_dir: Union[str, os.PathLike], region: str, stage: str = "pre", ) -> Dict[Any, Any]: """Parse HEPData information. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory region : str Region to get histograms for stage : str Fitting stage (`"pre"` or `"post"`). """ yaml_path = PosixPath(rex_dir) / "Plots" / f"{region}_{stage}fit.yaml" return yaml.full_load(yaml_path.read_text())
[docs]def prefit_total_and_uncertainty( rex_dir: Union[str, os.PathLike], region: str ) -> Tuple[TH1, TGraphAsymmErrors]: """Get the prefit total MC prediction and uncertainty band for a region. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory. region : str Region to get error band for. Returns ------- :py:obj:`tdub.root.TH1` The total MC expectation histogram. :py:obj:`tdub.root.TGraphAsymmErrors` The error TGraph. """ root_path = PosixPath(rex_dir) / "Histograms" / f"{region}_preFit.root" root_file = uproot.open(root_path) err = TGraphAsymmErrors(root_file.get("g_totErr")) tot = TH1(root_file.get("h_tot")) return tot, err
[docs]def postfit_available(rex_dir: Union[str, os.PathLike]) -> bool: """Check if TRExFitter result directory contains postFit information. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory Returns ------- bool True of postFit discovered """ histdir = PosixPath(rex_dir) / "Histograms" for f in histdir.iterdir(): if "postFit" in f.name: return True return False
[docs]def postfit_histogram(root_file: ReadOnlyDirectory, sample: str) -> TH1: """Get a postfit histogram from a file. Parameters ---------- root_file : uproot.reading.ReadOnlyDirectory File containing the desired postfit histogram. sample : str Physics sample name. Returns ------- tdub.root.TH1 Desired histogram. """ histname = f"h_{sample}_postFit" try: h = TH1(root_file.get(histname)) return h except KeyError: log.fatal(f"{histname} histogram not found in {root_file}") exit(1)
[docs]def postfit_histograms( rex_dir: Union[str, os.PathLike], samples: Iterable[str], region: str ) -> Dict[str, TH1]: """Retrieve sample postfit histograms for a region. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory region : str Region to get histograms for samples : Iterable(str) Physics samples of the desired histograms Returns ------- dict(str, tdub.root.TH1) Postfit histograms detected in the TRExFitter result directory. """ root_path = PosixPath(rex_dir) / "Histograms" / f"{region}_postFit.root" root_file = uproot.open(root_path) histograms = {} for samp in samples: if samp == "Data": continue h = postfit_histogram(root_file, samp) if h is None: log.warn(f"Histogram for sample {samp} in region {region} not found") histograms[samp] = h return histograms
[docs]def postfit_total_and_uncertainty( rex_dir: Union[str, os.PathLike], region: str ) -> Tuple[Any, Any]: """Get the postfit total MC prediction and uncertainty band for a region. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory. region : str Region to get error band for. Returns ------- :py:obj:`tdub.root.TH1` The total MC expectation histogram. :py:obj:`tdub.root.TGraphAsymmErrors` The error TGraph. """ root_path = PosixPath(rex_dir) / "Histograms" / f"{region}_postFit.root" root_file = uproot.open(root_path) err = TGraphAsymmErrors(root_file.get("g_totErr_postFit")) tot = TH1(root_file.get("h_tot_postFit")) return tot, err
[docs]def meta_text(region: str, stage: str) -> str: """Construct a piece of text based on the region and fit stage. Parameters ---------- region : str TRExFitter Region to use. stage : str Fitting stage (`"pre"` or `"post"`). Returns ------- str Resulting metadata text """ if stage == "pre": stage = "Pre-fit" elif stage == "post": stage = "Post-fit" else: raise ValueError("stage can be 'pre' or 'post'") if "1j1b" in region: region = "1j1b" elif "2j1b" in region: region = "2j1b" elif "2j2b" in region: region = "2j2b" else: raise ValueError("region must contain '1j1b', '2j1b', or '2j2b'") return f"$tW$ Dilepton, {region}, {stage}"
[docs]def meta_axis_label( region: str, bin_width: float, meta_table: Optional[Dict[str, Any]] = None ) -> Tuple[str, str]: """Construct an axis label from metadata table. Parameters ---------- region : str TRExFitter region to use. bin_width : float Bin width for y-axis label. meta_table : dict, optional Table of metadata for labeling plotting axes. If ``None`` (default), the definition stored in the variable ``tdub.config.PLOTTING_META_TABLE`` is used. Returns ------- str x-axis label for the region. str y-axis label for the region. """ if "VRP" in region: region = region[12:] if meta_table is None: if tdub.config.PLOTTING_META_TABLE is None: raise ValueError("tdub.config.PLOTTING_META_TABLE must be defined") else: meta_region = tdub.config.PLOTTING_META_TABLE["titles"][region] else: meta_region = meta_table["titles"][region] main_label = meta_region["mpl"] unit_label = meta_region["unit"] if unit_label: xunit_label = f" [{unit_label}]" yunit_label = f" {unit_label}" else: xunit_label = "" yunit_label = "" xl = f"{main_label}{xunit_label}" if bin_width.is_integer(): yl = f"Events/{int(bin_width)}{yunit_label}" else: yl = f"Events/{bin_width:.2f}{yunit_label}" return xl, yl
[docs]def stack_canvas( rex_dir: Union[str, os.PathLike], region: str, stage: str = "pre", fit_name: str = "tW", show_chisq: bool = True, meta_table: Optional[Dict[str, Any]] = None, log_patterns: Optional[List[Any]] = None, thesis: bool = False, ) -> Tuple[plt.Figure, plt.Axes, plt.Axes]: r"""Create a pre- or post-fit plot canvas for a TRExFitter region. Parameters --------- rex_dir : str or os.PathLike Path of the TRExFitter result directory. region : str Region to get error band for. stage : str Drawing fit stage, (`"pre"` or `"post"`). fit_name : str Name of the Fit show_chisq : bool Print :math:`\chi^2` information on ratio canvas. meta_table : dict, optional Table of metadata for labeling plotting axes. log_patterns : list, optional List of region patterns to use a log scale on y-axis. thesis: bool Flag for thesis label. Returns ------- :py:obj:`matplotlib.figure.Figure` Figure for housing the plot. :py:obj:`matplotlib.axes.Axes` Main axes for the histogram stack. :py:obj:`matplotlib.axes.Axes` Ratio axes to show Data/MC. """ samples = ("tW", "ttbar", "Zjets", "Diboson", "MCNP") if stage == "pre": histograms = prefit_histograms(rex_dir, samples, region, fit_name=fit_name) total_mc, uncertainty = prefit_total_and_uncertainty(rex_dir, region) elif stage == "post": histograms = postfit_histograms(rex_dir, samples, region) total_mc, uncertainty = postfit_total_and_uncertainty(rex_dir, region) else: raise ValueError("stage must be 'pre' or 'post'") histograms["Data"] = data_histogram(rex_dir, region) bin_edges = histograms["Data"].edges counts = {k: v.counts for k, v in histograms.items()} errors = {k: v.errors for k, v in histograms.items()} if log_patterns is None: log_patterns = tdub.config.PLOTTING_LOGY logy = False for pat in log_patterns: if pat.search(region) is not None: logy = True fig, ax0, ax1 = canvas_from_counts( counts, errors, bin_edges, uncertainty=uncertainty, total_mc=total_mc, logy=logy, ) bw = histograms["Data"].bin_width xlab, ylab = meta_axis_label(region, bw, meta_table) # stack axes cosmetics ax0.set_ylabel(ylab, horizontalalignment="right", y=1.0) draw_atlas_label(ax0, extra_lines=[meta_text(region, stage)], thesis=thesis) legend_last_to_first(ax0, ncol=2, loc="upper right") # ratio axes cosmetics ax1.set_xlabel(xlab, horizontalalignment="right", x=1.0) ax1.set_ylabel("Data/MC") if stage == "post": ax1.set_ylim([0.9, 1.1]) ax1.set_yticks([0.9, 0.95, 1.0, 1.05]) if show_chisq: ax1.text( 0.02, 0.8, chisq_text(rex_dir, region, stage), transform=ax1.transAxes, size=11 ) ax1.legend(loc="lower left", fontsize=11) # return objects return fig, ax0, ax1
[docs]def plot_region_stage_ff(args): """Free (multiprocessing compatible) function to plot a region + stage. This function is designed to be used internally by :py:func:`plot_all_regions`, where it is sent to a multiprocessing pool. Not meant for generic usage. Parameters ---------- args: list(Any) Arguments passed to :py:func:`stack_canvas`. """ fig, ax0, ax1 = stack_canvas( rex_dir=args[0], region=args[1], stage=args[3], show_chisq=args[4], meta_table=args[5], log_patterns=args[6], thesis=args[7], ) output_file = f"{args[2]}/{args[1]}_{args[3]}Fit.pdf" fig.savefig(output_file)
[docs]def plot_all_regions( rex_dir: Union[str, os.PathLike], outdir: Union[str, os.PathLike], stage: str = "pre", fit_name: str = "tW", show_chisq: bool = True, n_test: int = -1, thesis: bool = False, ) -> None: r"""Plot all regions discovered in a TRExFitter result directory. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory outdir : str or os.PathLike Path to save resulting files to stage : str Fitting stage (`"pre"` or `"post"`). fit_name : str Name of the Fit show_chisq : bool Print :math:`\chi^2` information on ratio canvas. n_test : int Maximum number of regions to plot (for quick tests). thesis : bool Flag for thesis label. """ PosixPath(outdir).mkdir(parents=True, exist_ok=True) regions = available_regions(rex_dir) if n_test > 0: regions = random.sample(regions, n_test) meta_table = tdub.config.PLOTTING_META_TABLE.copy() log_patterns = tdub.config.PLOTTING_LOGY.copy() args = [ [rex_dir, region, outdir, stage, show_chisq, meta_table, log_patterns, thesis] for region in regions ] pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) pool.map(plot_region_stage_ff, args) plt.close("all") log.info("Done creating %s-fit plots in %s." % (stage, outdir))
[docs]def nuispar_impact( rex_dir: Union[str, os.PathLike], name: str, label: Optional[str] = None ) -> FitParam: """Extract a specific nuisance parameter from a fit. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory. name : str Name of the nuisance parameter. label : str, optional Give the nuisance parameter a label other than its name. Returns ------- tdub.rex.FitParam Desired nuisance parameter summary. """ n, c, su, sd, postup, postdn, preup, predn = ( (PosixPath(rex_dir) / "Fits" / f"NPRanking_{name}.txt").read_text().strip().split() ) npar = FitParam( name, name, round(float(predn), 6), round(float(preup), 6), round(float(postdn), 6), round(float(postup), 6), round(float(c), 6), round(float(sd), 6), round(float(su), 6), ) if label is not None: npar.label = label return npar
[docs]def nuispar_impacts(rex_dir: Union[str, os.PathLike], sort: bool = True) -> List[FitParam]: """Extract a list of nuisance parameter impacts from a fit. Parameters ---------- rex_dir : str or os.PathLike Path of the TRExFitter result directory. Returns ------- list(tdub.rex.FitParam) The nuisance parameters. """ nuispars = [] np_ranking_yaml = yaml.full_load((PosixPath(rex_dir) / "Ranking.yaml").read_text()) for entry in np_ranking_yaml: nuispars.append( FitParam( entry["Name"], entry["Name"], entry["POIdownPreFit"], entry["POIupPreFit"], entry["POIdown"], entry["POIup"], entry["NPhat"], entry["NPerrLo"], entry["NPerrHi"], ) ) if sort: return sorted(nuispars, key=lambda par: par.post_max) return nuispars
[docs]def nuispar_impact_plot_df(nuispars: List[FitParam]) -> pd.DataFrame: """Construct a DataFrame to organize impact plot ingredients. Parameters ---------- nuispars : list(FitParam) The nuisance parameters. Returns ------- pandas.DataFrame DataFrame describing the plot ingredients. """ pre_down = np.array([p.pre_down for p in nuispars]) pre_up = np.array([p.pre_up for p in nuispars]) post_down = np.array([p.post_down for p in nuispars]) post_up = np.array([p.post_up for p in nuispars]) central = np.array([p.central for p in nuispars]) sig_hi = np.array([p.sig_hi for p in nuispars]) sig_lo = np.array([p.sig_lo for p in nuispars]) pre_down_lefts = np.zeros_like(pre_down) pre_down_lefts[pre_down < 0] = pre_down[pre_down < 0] pre_up_lefts = np.zeros_like(pre_up) pre_up_lefts[pre_up < 0] = pre_up[pre_up < 0] post_down_lefts = np.zeros_like(post_down) post_down_lefts[post_down < 0] = post_down[post_down < 0] post_up_lefts = np.zeros_like(post_up) post_up_lefts[post_up < 0] = post_up[post_up < 0] ys = np.arange(len(pre_down)) return pd.DataFrame.from_dict( dict( pre_down=pre_down, pre_up=pre_up, post_down=post_down, post_up=post_up, central=central, sig_hi=sig_hi, sig_lo=sig_lo, pre_down_lefts=pre_down_lefts, pre_up_lefts=pre_up_lefts, post_down_lefts=post_down_lefts, post_up_lefts=post_up_lefts, ys=ys, ) )
[docs]def prettify_label(label: str) -> str: """Fix parameter label to look nice for plots. Replace underscores with whitespace, TeXify some stuff, remove unnecessary things, etc. Parameters ---------- label : str Original label. Returns ------- str Prettified label. """ return ( label.replace("_", " ") .replace("ttbar", r"$t\bar{t}$") .replace("tW", r"$tW$") .replace("muF", r"$\mu_F$") .replace("muR", r"$\mu_R$") .replace("AR ", "") .replace("RhoT", "Rho T") .replace("hdamp", r"$h_{\mathrm{damp}}$") .replace("DRDS", "DR vs DS") .replace("EffectiveNP", "Eff. NP") .replace("EffNP", "Eff. NP") .replace("ptreweight", r"top-$p_{\mathrm{T}}$-reweight") .replace("MET", r"$E_{\mathrm{T}}^{\mathrm{miss}}$") )
[docs]def nuispar_impact_plot_top20( rex_dir: Union[str, os.PathLike], thesis: bool = False ) -> None: """Plot the top 20 nuisance parameters based on impact. Parameters ---------- rex_dir : str, os.PathLike Path of the TRExFitter result directory. thesis: : bool Flag for thesis label. """ nuispars = nuispar_impacts(rex_dir, sort=True)[-20:] for npar in nuispars: npar.label = prettify_label(npar.label) df = nuispar_impact_plot_df(nuispars) ys = np.array(df.ys) # fmt: off fig, ax = plt.subplots(figsize=(5, 8)) ax, ax2 = draw_impact_barh(ax, df) ax.legend(ncol=1, loc="upper left", bbox_to_anchor=(-0.75, 1.11)) ax.set_xticks([-0.2, -0.1, 0.0, 0.1, 0.2]) ax.set_ylim([-1, ys[-1] + 2.4]) ax.set_yticklabels([p.label for p in nuispars]) ax2.legend(loc="lower left", bbox_to_anchor=(-0.75, -0.09)) ax2.set_xlabel(r"$\Delta\mu$", labelpad=25) ax.set_xlabel(r"$(\hat{\theta}-\theta_0)/\Delta\theta$", labelpad=20) if thesis: ax.text( 0.10, 0.95, "D. Davis Thesis", fontstyle="italic", fontweight="bold", size=14, transform=ax.transAxes ) else: ax.text( 0.10, 0.95, "ATLAS", fontstyle="italic", fontweight="bold", size=14, transform=ax.transAxes ) ax.text( 0.37, 0.95, "Internal", size=14, transform=ax.transAxes ) ax.text( 0.10, 0.92, "$\\sqrt{s}$ = 13 TeV, $L = {139}$ fb$^{-1}$", size=12, transform=ax.transAxes ) fig.subplots_adjust(left=0.45, bottom=0.085, top=0.915, right=0.975) mpl_dir = PosixPath(rex_dir) / "matplotlib" mpl_dir.mkdir(exist_ok=True) output_file = str(mpl_dir / "Impact.pdf") fig.savefig(output_file) log.info("Created %s" % output_file) plt.close(fig) # fmt: on return 0
[docs]def fit_parameter(fit_file: PosixPath, name: str, prettify: bool = False) -> FitParam: """Retrieve a parameter from fit result text file. Parameters ---------- fit_file : pathlib.PosixPath Path of the TRExFitter fit result text file. name : str Name of desired parameter. prettify : bool Prettify the parameter label using :py:func:`tdub.rex.prettify_label`. Raises ------ ValueError If the parameter name isn't discovered. Returns ------- tdub.rex.FitParam Fit parameter description. """ with fit_file.open("r") as f: for line in f.readlines(): if name in line: n, c, u, d = line.split() return FitParam( name=n, label=prettify_label(name) if prettify else name, central=float(c), sig_hi=float(u), sig_lo=float(d), ) # if we don't find the name, raise ValueError raise ValueError("{} parameter not found in {}".format(name, str(fit_file)))
[docs]def delta_poi( rex_dir1: Union[str, os.PathLike], rex_dir2: Union[str, os.PathLike], fit_name1: str = "tW", fit_name2: str = "tW", poi: str = "SigXsecOverSM", ) -> Tuple[float, float, float]: r"""Calculate difference of a POI between two results directories. The default arguments will perform a calculation of :math:`\Delta\mu` between two different fits. Standard error propagation is performed on both the up and down uncertainties. Parameters ---------- rex_dir1 : str or os.PathLike Path of the first TRExFitter result directory. rex_dir2 : str or os.PathLike Path of the second TRExFitter result directory. fit_name1 : str Name of the first fit. fit_name2 : str Name of the second fit. poi : str Name of the parameter of interest. Returns ------- :py:obj:`float` Central value of delta mu. :py:obj:`float` Up uncertainty on delta mu. :py:obj:`float` Down uncertainty on delta mu. """ fit_file1 = PosixPath(rex_dir1) / "Fits" / f"{fit_name1}.txt" fit_file2 = PosixPath(rex_dir2) / "Fits" / f"{fit_name2}.txt" mu1 = fit_parameter(fit_file1, poi) mu2 = fit_parameter(fit_file2, poi) # delta_mu = mu1.central - mu2.central # sig_delta_mu_up = math.sqrt(mu1.sig_hi ** 2 + mu2.sig_hi ** 2) # sig_delta_mu_dn = math.sqrt(mu1.sig_lo ** 2 + mu2.sig_lo ** 2) # return delta_mu, sig_delta_mu_up, sig_delta_mu_dn return delta_param(mu1, mu2)
[docs]def delta_param(param1: FitParam, param2: FitParam) -> Tuple[float, float, float]: r"""Calculate difference between two fit parameters. Parameters ---------- param1 : tdub.rex.FitParam First fit parameter. param2 : tdub.rex.FitParam Second fit parameter. Returns ------- :py:obj:`float` Difference between the central values :py:obj:`float` Up uncertainty :py:obj:`float` Down uncertainty """ del_par = param1.central - param2.central sig_del_par_up = math.sqrt(param1.sig_hi ** 2 + param2.sig_hi ** 2) sig_del_par_dn = math.sqrt(param1.sig_lo ** 2 + param2.sig_lo ** 2) return del_par, sig_del_par_up, sig_del_par_dn
[docs]def compare_uncertainty( rex_dir1: Union[str, os.PathLike], rex_dir2: Union[str, os.PathLike], fit_name1: str = "tW", fit_name2: str = "tW", label1: Optional[str] = None, label2: Optional[str] = None, poi: str = "SigXsecOverSM", print_to: Optional[io.TextIOBase] = None, ) -> None: """Compare uncertainty between two fits. Parameters ---------- rex_dir1 : str or os.PathLike Path of the first TRExFitter result directory. rex_dir2 : str or os.PathLike Path of the second TRExFitter result directory. fit_name1 : str Name of the first fit. fit_name2 : str Name of the second fit. label1 : str, optional Define label for the first fit (defaults to rex_dir1). label2 : str, optional Define label for the second fit (defaults to rex_dir2). poi : str Name of the parameter of interest. print_to : io.TextIOBase, optional Where to print results (defaults to sys.stdout). """ if print_to is None: print_to = sys.stdout path1 = PosixPath(rex_dir1).resolve() path2 = PosixPath(rex_dir2).resolve() p1 = path1 if label1 is None else label1 p2 = path2 if label2 is None else label2 fit_file1 = path1 / "Fits" / f"{fit_name1}.txt" fit_file2 = path2 / "Fits" / f"{fit_name2}.txt" mu1 = fit_parameter(fit_file1, poi) mu2 = fit_parameter(fit_file2, poi) up1, down1 = mu1.sig_hi, mu1.sig_lo up2, down2 = mu2.sig_hi, mu2.sig_lo if abs(up1) > abs(up2): print(f"{p1} has a larger up uncertainty on {poi}", file=print_to) plarger = (abs(up1) - abs(up2)) / abs(up2) * 100.0 else: print(f"{p2} has a larger up uncertainty on {poi}", file=print_to) plarger = (abs(up2) - abs(up1)) / abs(up1) * 100.0 print(f"{p1:<24}: {up1}", file=print_to) print(f"{p2:<24}: {up2}", file=print_to) print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to) print(f"{'-' * 60}", file=print_to) if abs(down1) > abs(down2): print(f"{p1} has a larger down uncertainty on {poi}", file=print_to) plarger = (abs(down1) - abs(down2)) / abs(down2) * 100.0 else: print(f"{p2} has a larger down uncertainty on {poi}", file=print_to) plarger = (abs(down2) - abs(down1)) / abs(down1) * 100.0 print(f"{p1:<24}: {down1}", file=print_to) print(f"{p2:<24}: {down2}", file=print_to) print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to)
[docs]def compare_nuispar( name: str, rex_dir1: Union[str, os.PathLike], rex_dir2: Union[str, os.PathLike], label1: Optional[str] = None, label2: Optional[str] = None, np_label: Optional[str] = None, print_to: Optional[io.TextIOBase] = None, ) -> None: """Compare nuisance parameter info between two fits. Parameters ---------- name : str Name of the nuisance parameter. rex_dir1 : str or os.PathLike Path of the first TRExFitter result directory. rex_dir2 : str or os.PathLike Path of the second TRExFitter result directory. label1 : str, optional Define label for the first fit (defaults to rex_dir1). label2 : str, optional Define label for the second fit (defaults to rex_dir2). np_label : str, optional Give the nuisance parameter a label other than its name. print_to : io.TextIOBase, optional Where to print results (defaults to sys.stdout). """ if print_to is None: print_to = sys.stdout path1 = PosixPath(rex_dir1).resolve() path2 = PosixPath(rex_dir2).resolve() p1 = path1 if label1 is None else label1 p2 = path2 if label2 is None else label2 np1 = nuispar_impact(rex_dir1, name=name, label=np_label) np2 = nuispar_impact(rex_dir2, name=name, label=np_label) print(f"{'=' * 15} Comparison for NP: {name} {'=' * 15}", file=print_to) if abs(np1.sig_lo) < abs(np2.sig_lo): print(f"{p1} has more aggressive sig lo {name} constraint", file=print_to) d = abs(np2.sig_lo) - abs(np1.sig_lo) else: print(f"{p2} has more aggresive sig lo {name} constraint", file=print_to) d = abs(np1.sig_lo) - abs(np2.sig_lo) print(f"{p1:<24}: {np1.sig_lo}", file=print_to) print(f"{p2:<24}: {np2.sig_lo}", file=print_to) print(f"{'Difference':<24}: {d:3.4f}", file=print_to) print(f"{'-' * 60}", file=print_to) if abs(np1.sig_hi) < abs(np2.sig_hi): print(f"{p1} has a more aggressive sig hi {name} constraint", file=print_to) d = abs(np2.sig_hi) - abs(np1.sig_hi) else: print(f"{p2} has a move aggresive sig hi {name} constraint", file=print_to) d = abs(np1.sig_hi) - abs(np2.sig_hi) print(f"{p1:<24}: {np1.sig_hi}", file=print_to) print(f"{p2:<24}: {np2.sig_hi}", file=print_to) print(f"{'Difference':<24}: {d:3.4f}", file=print_to) print(f"{'-' * 60}", file=print_to) if abs(np1.pre_up) > abs(np2.pre_up): print(f"{p1} has larger prefit up variation impact from {name}", file=print_to) plarger = (abs(np1.pre_up) - abs(np2.pre_up)) / abs(np2.pre_up) * 100.0 else: print(f"{p2} has larger prefit up variation impact from {name}", file=print_to) plarger = (abs(np2.pre_up) - abs(np1.pre_up)) / abs(np1.pre_up) * 100.0 print(f"{p1:<24}: {np1.pre_up}", file=print_to) print(f"{p2:<24}: {np2.pre_up}", file=print_to) print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to) print(f"{'-' * 60}", file=print_to) if abs(np1.pre_down) > abs(np2.pre_down): print(f"{p1} has larger prefit down variation impact from {name}", file=print_to) plarger = (abs(np1.pre_down) - abs(np2.pre_down)) / abs(np2.pre_down) * 100.0 else: print(f"{p2} has larger prefit down variation impact from {name}", file=print_to) plarger = (abs(np2.pre_down) - abs(np1.pre_down)) / abs(np1.pre_down) * 100.0 print(f"{p1:<24}: {np1.pre_down}", file=print_to) print(f"{p2:<24}: {np2.pre_down}", file=print_to) print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to) print(f"{'-' * 60}", file=print_to) if abs(np1.post_up) > abs(np2.post_up): print(f"{p1} has larger postfit up variation impact from {name}", file=print_to) plarger = (abs(np1.post_up) - abs(np2.post_up)) / abs(np2.post_up) * 100.0 else: print(f"{p2} has larger postfit up variation impact from {name}", file=print_to) plarger = (abs(np2.post_up) - abs(np1.post_up)) / abs(np1.post_up) * 100.0 print(f"{p1:<24}: {np1.post_up}", file=print_to) print(f"{p2:<24}: {np2.post_up}", file=print_to) print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to) print(f"{'-' * 60}", file=print_to) if abs(np1.post_down) > abs(np2.post_down): print(f"{p1} has larger postfit down variation impact from {name}", file=print_to) plarger = (abs(np1.post_down) - abs(np2.post_down)) / abs(np2.post_down) * 100.0 else: print(f"{p2} has larger postfit down variation impact from {name}", file=print_to) plarger = (abs(np2.post_down) - abs(np1.post_down)) / abs(np1.post_down) * 100.0 print(f"{p1:<24}: {np1.post_down}", file=print_to) print(f"{p2:<24}: {np2.post_down}", file=print_to) print(f"{'Percent larger':<24}: {plarger:3.4f}", file=print_to)
[docs]def comparison_summary( rex_dir1, rex_dir2, fit_name1: str = "tW", fit_name2: str = "tW", label1: Optional[str] = None, label2: Optional[str] = None, fit_poi: str = "SigXsecOverSM", nuispars: Optional[Iterable[str]] = None, nuispar_labels: Optional[Iterable[str]] = None, print_to: Optional[io.TextIOBase] = None, ) -> None: """Summarize a comparison of two fits. Parameters ---------- rex_dir1 : str or os.PathLike Path of the first TRExFitter result directory. rex_dir2 : str or os.PathLike Path of the second TRExFitter result directory. fit_name1 : str Name of the first fit. fit_name2 : str Name of the second fit. label1 : str, optional Define label for the first fit (defaults to rex_dir1). label2 : str, optional Define label for the second fit (defaults to rex_dir2). fit_poi : str Name of the parameter of interest. nuispars : list(str), optional Nuisance parameters to compare. nuispar_labels: list(str), optional Labels to give each nuisance parameter other than the default name. print_to : io.TextIOBase, optional Where to print results (defaults to sys.stdout). """ if print_to is None: print_to = sys.stdout print(f"{'*' * 80}", file=print_to) print("Fit comparison summary", file=print_to) if label1 is not None and label2 is not None: print(f"Fit 1: {rex_dir1} as {label1}", file=print_to) print(f"Fit 2: {rex_dir2} as {label2}", file=print_to) print(f"{'-' * 60}", file=print_to) compare_uncertainty( rex_dir1, rex_dir2, fit_name1=fit_name1, fit_name2=fit_name2, label1=label1, label2=label2, poi=fit_poi, print_to=print_to, ) if nuispars is not None: if nuispar_labels is not None: pairs = [(np, npl) for np, npl in zip(nuispars, nuispar_labels)] else: pairs = [(np, None) for np in nuispars] for np_name, np_label in pairs: compare_nuispar( np_name, rex_dir1, rex_dir2, label1=label1, label2=label2, np_label=np_label, print_to=print_to, ) print(f"{'*' * 80}", file=print_to)
[docs]def stability_test_standard( umbrella: PosixPath, outdir: Optional[PosixPath] = None, tests: Union[str, List[str]] = "all", ) -> None: """Perform a battery of standard stability tests. This function expects a rigid `umbrella` directory structure, based on the output of results that are generated by rexpy_. .. _rexpy: https://github.com/douglasdavis/rexpy Parameters ---------- umbrella : pathlib.PosixPath Umbrella directory containing all fits run via rexpy's standard fits. outdir : pathlib.PosixPath, optional Directory to save results (defaults to current working directory). tests : str or list(str) Which tests to execute. (default is "all"). The possible tests include: * ``"sys-drops"``, which shows the stability test for dropping some systematics. * ``"indiv-camps"``, which shows the stability test for limiting the fit to individual campaigns. * ``"regions"``, which shows the stability test for limiting the fit to subsets of the analysis regions. * ``"b0-check"``, which shows the stability test for limiting the fit to individual analysis regions and checking the B0 eigenvector uncertainty. """ import tdub.internal.stab_tests as tist umbrella = umbrella.resolve() curdir = PosixPath.cwd().resolve() if outdir is None: outdir = curdir else: outdir.mkdir(parents=True, exist_ok=True) if tests == "all": tests = ["sys-drops", "indiv-camps", "regions", "b0-check"] os.chdir(outdir) if "sys-drops" in tests: nom, names, labels, vals = tist.excluded_systematics_delta_mu_summary( umbrella / "main.force-data.d" / "tW" ) fig, ax = plt.subplots(figsize=(5.2, 1.5 + len(names) * 0.315)) fig.subplots_adjust(left=0.50, right=0.925) tist.make_delta_mu_plot( ax, nom.sig_hi, nom.sig_lo, vals["c"], vals["d"], vals["u"], labels ) fig.savefig("stability-tests-sys-drops.pdf") if "indiv-camps" in tests: nom, names, labels, vals = tist.indiv_camp_delta_mu_summary(umbrella) fig, ax = plt.subplots(figsize=(5.2, 1.5 + len(names) * 0.315)) fig.subplots_adjust(left=0.350, right=0.925, bottom=0.3, top=0.99) tist.make_delta_mu_plot( ax, nom.sig_hi, nom.sig_lo, vals["c"], vals["d"], vals["u"], labels ) fig.savefig("stability-tests-indiv-camps.pdf") if "regions" in tests: nom, names, labels, vals = tist.region_delta_mu_summary(umbrella) fig, ax = plt.subplots(figsize=(5.2, 1.5 + len(names) * 0.315)) fig.subplots_adjust(left=0.350, right=0.925, bottom=0.3, top=0.99) tist.make_delta_mu_plot( ax, nom.sig_hi, nom.sig_lo, vals["c"], vals["d"], vals["u"], labels ) fig.savefig("stability-tests-regions.pdf") if "b0-check" in tests: fig, ax = tist.b0_by_year_fig_and_ax(umbrella) fig.subplots_adjust(left=0.350, right=0.925, bottom=0.3, top=0.8) fig.savefig("stability-tests-b0-check.pdf") os.chdir(curdir) return None
[docs]def stability_test_parton_shower_impacts( herwig704: PosixPath, herwig713: PosixPath, outdir: Optional[PosixPath] = None, ) -> None: """Perform a battery of parton shower impact stability tests. This function expects a rigid pair of Herwig 7.0.4 and 7.1.3 directories based on the output of results that are generated by rexpy_. .. _rexpy: https://github.com/douglasdavis/rexpy Parameters ---------- herwig704 : pathlib.PosixPath Path of the Herwig 7.1.4 fit results herwig713 : pathlib.PosixPath Path of the Herwig 7.1.3 fit results outdir : pathlib.PosixPath, optional Directory to save results (defaults to current working directory). """ import tdub.internal.stab_tests as tist herwig704 = herwig704.resolve() herwig713 = herwig713.resolve() curdir = PosixPath.cwd().resolve() if outdir is None: outdir = curdir else: outdir = outdir.resolve() outdir.mkdir(exist_ok=True, parents=True) os.chdir(outdir) fig, _ = tist.ps_impact_h704_vs_h713(herwig704, herwig713) fig.savefig("main_vs_main.pdf") plt.close(fig) fig, _ = tist.ps_impact_h704_vs_h713(herwig704, herwig713, sort=True) fig.savefig("main_vs_main_sorted.pdf") plt.close(fig) fig, _ = tist.ps_impact_r1j1b(herwig704, "ttbar_PS_1j1b") fig.savefig("indivpoi_ttbar_PS_1j1b_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_r1j1b(herwig713, "ttbar_PS_1j1b") fig.savefig("indivpoi_ttbar_PS_1j1b_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_r2j1b(herwig704, "ttbar_PS_2j1b") fig.savefig("indivpoi_ttbar_PS_2j1b_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_r2j1b(herwig713, "ttbar_PS_2j1b") fig.savefig("indivpoi_ttbar_PS_2j1b_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_r2j2b(herwig704, "ttbar_PS_2j2b") fig.savefig("indivpoi_ttbar_PS_2j2b_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_r2j2b(herwig713, "ttbar_PS_2j2b") fig.savefig("indivpoi_ttbar_PS_2j2b_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_r1j1b(herwig704, "tW_PS_1j1b") fig.savefig("indivpoi_tW_PS_1j1b_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_r1j1b(herwig713, "tW_PS_1j1b") fig.savefig("indivpoi_tW_PS_1j1b_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_r2j1b(herwig704, "tW_PS_2j1b") fig.savefig("indivpoi_tW_PS_2j1b_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_r2j1b(herwig713, "tW_PS_2j1b") fig.savefig("indivpoi_tW_PS_2j1b_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_r2j2b(herwig704, "tW_PS_2j2b") fig.savefig("indivpoi_tW_PS_2j2b_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_r2j2b(herwig713, "tW_PS_2j2b") fig.savefig("indivpoi_tW_PS_2j2b_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_norm_mig(herwig704, "ttbar_PS_norm") fig.savefig("indivpoi_ttbar_PS_norm_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_norm_mig(herwig713, "ttbar_PS_norm") fig.savefig("indivpoi_ttbar_PS_norm_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_norm_mig(herwig704, "ttbar_PS_migration") fig.savefig("indivpoi_ttbar_PS_migration_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_norm_mig(herwig713, "ttbar_PS_migration") fig.savefig("indivpoi_ttbar_PS_migration_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_norm_mig(herwig704, "tW_PS_norm") fig.savefig("indivpoi_tW_PS_norm_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_norm_mig(herwig713, "tW_PS_norm") fig.savefig("indivpoi_tW_PS_norm_h713.pdf") plt.close(fig) fig, _ = tist.ps_impact_norm_mig(herwig704, "tW_PS_migration") fig.savefig("indivpoi_tW_PS_migration_h704.pdf") plt.close(fig) fig, _ = tist.ps_impact_norm_mig(herwig713, "tW_PS_migration") fig.savefig("indivpoi_tW_PS_migration_h713.pdf") plt.close(fig) return 0