Source code for pypesto.profile.util

"""Utility function for profile module."""

import warnings
from collections.abc import Iterable
from dataclasses import dataclass
from typing import Any, Literal

import numpy as np
import scipy.stats

from ..C import GRAD
from ..problem import Problem
from ..result import ProfileResult, ProfilerResult, Result
from .options import ProfileOptions

PROFILE_STEP_PRECHECK_NOMINAL_WARN_THRESHOLD = 500
PROFILE_STEP_PRECHECK_DENSE_WARN_THRESHOLD = 1000


@dataclass(frozen=True)
class ResolvedProfileStepSizes:
    """
    Effective step sizes for one profiled parameter.

    The minimum, default, and maximum values always come from the same
    step-size family.

    Attributes
    ----------
    mode:
        Selected step-size family, either `"absolute"` or `"relative"`.
    default_step_size:
        Resolved default step size.
    min_step_size:
        Resolved minimum step size.
    max_step_size:
        Resolved maximum step size.
    span:
        Parameter span `ub - lb` on the optimization scale.
    """

    mode: Literal["absolute", "relative"]
    default_step_size: float
    min_step_size: float
    max_step_size: float
    span: float


ResolvedProfileStepSizeMap = dict[int, ResolvedProfileStepSizes]



[docs]
def chi2_quantile_to_ratio(alpha: float = 0.95, df: int = 1):
    """
    Compute profile likelihood threshold.

    Transform lower tail probability `alpha` for a chi2 distribution with `df`
    degrees of freedom to a profile likelihood ratio threshold.

    Parameters
    ----------
    alpha:
        Lower tail probability, defaults to 95% interval.
    df:
        Degrees of freedom.

    Returns
    -------
    The computed likelihood ratio threshold.
    """
    quantile = scipy.stats.chi2.ppf(alpha, df=df)
    ratio = np.exp(-quantile / 2)
    return ratio




[docs]
def calculate_approximate_ci(
    xs: np.ndarray, ratios: np.ndarray, confidence_ratio: float
) -> tuple[float, float]:
    """
    Calculate approximate confidence interval based on profile.

    Interval bounds are linearly interpolated.

    Parameters
    ----------
    xs:
        The ordered parameter values along the profile for the coordinate of
        interest.
    ratios:
        The likelihood ratios corresponding to the parameter values.
    confidence_ratio:
        Minimum confidence ratio to base the confidence interval upon, as
        obtained via :func:`pypesto.profile.chi2_quantile_to_ratio`.

    Returns
    -------
    Bounds of the approximate confidence interval.
    """
    # extract indices where the ratio is larger than the minimum ratio
    (indices,) = np.where(ratios >= confidence_ratio)
    l_ind, u_ind = indices[0], indices[-1]

    # lower bound
    if l_ind == 0:
        lb = xs[l_ind]
    else:
        # linear interpolation with next smaller value
        ind = [l_ind - 1, l_ind]
        lb = np.interp(confidence_ratio, ratios[ind], xs[ind])

    # upper bound
    if u_ind == len(ratios) - 1:
        ub = xs[u_ind]
    else:
        # linear interpolation with next larger value
        ind = [u_ind + 1, u_ind]  # flipped as interp expects increasing xs
        ub = np.interp(confidence_ratio, ratios[ind], xs[ind])

    return lb, ub



def validate_profile_parameter_bounds(problem: Problem, i_par: int) -> float:
    """Validate finite profile bounds for one parameter and return its span."""
    lb = float(problem.lb_full[i_par])
    ub = float(problem.ub_full[i_par])
    if not np.isfinite(lb) or not np.isfinite(ub):
        raise ValueError(
            "Profiling requires finite lower and upper bounds for parameter "
            f"'{problem.x_names[i_par]}' (index={i_par})."
        )
    span = ub - lb
    if span <= 0:
        raise ValueError(
            "Profiling requires an upper bound greater than the lower bound "
            f"for parameter '{problem.x_names[i_par]}' (index={i_par})."
        )
    return span


def resolve_profile_step_sizes(
    problem: Problem,
    i_par: int,
    options: ProfileOptions,
) -> ResolvedProfileStepSizes:
    """
    Resolve effective profile step sizes for one parameter.

    Relative step sizes are scaled by the parameter span `ub - lb`. If the
    resolved relative default is at least as large as the absolute default,
    the full relative family is used. Otherwise the full absolute family is
    used.

    Parameters
    ----------
    problem:
        The parameter estimation problem containing bounds and scales.
    i_par:
        Index of the profiled parameter in full dimension.
    options:
        Profile options containing absolute and relative step-size settings.

    Returns
    -------
    resolved_steps:
        Resolved step sizes and selection metadata.
    """
    # Bounds are required here because relative steps are defined from the
    # finite parameter span on the optimization scale.
    span = validate_profile_parameter_bounds(problem, i_par)

    if options.default_step_size_relative > 0:
        relative_default_step_size = options.default_step_size_relative * span
        relative_min_step_size = options.min_step_size_relative * span
        relative_max_step_size = options.max_step_size_relative * span

        # Select one complete step-size family based on the default step size.
        if (
            options.default_step_size_absolute == 0
            or relative_default_step_size >= options.default_step_size_absolute
        ):
            return ResolvedProfileStepSizes(
                mode="relative",
                default_step_size=relative_default_step_size,
                min_step_size=relative_min_step_size,
                max_step_size=relative_max_step_size,
                span=span,
            )

    return ResolvedProfileStepSizes(
        mode="absolute",
        default_step_size=options.default_step_size_absolute,
        min_step_size=options.min_step_size_absolute,
        max_step_size=options.max_step_size_absolute,
        span=span,
    )


def resolve_profile_step_sizes_for_parameters(
    problem: Problem,
    parameter_indices: Iterable[int],
    options: ProfileOptions,
) -> ResolvedProfileStepSizeMap:
    """Resolve effective profile step sizes for multiple parameters."""
    return {
        i_par: resolve_profile_step_sizes(problem, i_par, options)
        for i_par in parameter_indices
    }


def _format_profile_step_size_resolution_summary(
    problem: Problem,
    i_par: int,
    resolved_steps: ResolvedProfileStepSizes,
) -> str:
    """Create a one-line summary of the resolved step-size family."""
    scale = str(problem.x_scales[i_par]).lower()
    parameter_name = problem.x_names[i_par]

    return (
        "Resolved profile step sizes for "
        f"{parameter_name} (index={i_par}): "
        f"family={resolved_steps.mode}, "
        f"scale={scale}, span={resolved_steps.span}, "
        f"min={resolved_steps.min_step_size}, "
        f"default={resolved_steps.default_step_size}, "
        f"max={resolved_steps.max_step_size}."
    )


def precheck_profile_step_size(
    current_profile: ProfilerResult,
    problem: Problem,
    i_par: int,
    par_direction: int,
    options: ProfileOptions,
    resolved_steps: ResolvedProfileStepSizes,
) -> None:
    """
    Warn or raise if the resolved step sizes imply many profile steps.

    Two estimates are formed: a nominal one from the default step size and a
    worst-case one from the minimum step size. In ``"raise"`` mode, an error
    is raised only when the worst-case estimate is excessive; a merely large
    nominal estimate only triggers a warning, so valid runs are not broken.

    Parameters
    ----------
    current_profile:
        Current profile path.
    problem:
        The parameter estimation problem.
    i_par:
        Index of the profiled parameter in full dimension.
    par_direction:
        Profiling direction, either `-1` for descending or `1` for ascending.
    options:
        Profile options.
    resolved_steps:
        Pre-resolved step sizes for the profiled parameter.
    """
    if options.step_size_precheck_mode == "off":
        return

    # Estimate how much of the bounded parameter range is left in this
    # profiling direction.
    x0 = float(current_profile.x_path[i_par, -1])
    if par_direction == -1:
        available_span = x0 - float(problem.lb_full[i_par])
    elif par_direction == 1:
        available_span = float(problem.ub_full[i_par]) - x0
    else:
        raise ValueError("par_direction must be either -1 or 1.")

    if not np.isfinite(available_span) or available_span <= 0:
        return

    # Use the resolved default and minimum steps as nominal and dense estimates.
    nominal_count = available_span / resolved_steps.default_step_size
    dense_count = available_span / resolved_steps.min_step_size

    nominal_warn = nominal_count > PROFILE_STEP_PRECHECK_NOMINAL_WARN_THRESHOLD
    dense_warn = dense_count > PROFILE_STEP_PRECHECK_DENSE_WARN_THRESHOLD
    if not nominal_warn and not dense_warn:
        return

    parameter_name = problem.x_names[i_par]
    message = (
        f"Profiling parameter '{parameter_name}' may require many steps "
        f"({nominal_count:.1f} with the default step size, "
        f"up to {dense_count:.1f} with the minimum step size). "
        "Consider increasing the profile step sizes."
    )
    if not options.whole_path:
        message += (
            " This is a bound-based upper estimate; profiling may stop "
            "earlier at the likelihood-ratio threshold."
        )

    if dense_warn and options.step_size_precheck_mode == "raise":
        raise ValueError(message)

    warnings.warn(message, UserWarning, stacklevel=2)


def initialize_profile(
    problem: Problem,
    result: Result,
    result_index: int,
    profile_index: Iterable[int],
    profile_list: int,
) -> float:
    """
    Initialize profiling based on a previous optimization.

    Parameters
    ----------
    problem:
        The problem to be solved.
    result:
        A result object to initialize profiling and to append the profiling
        results to. For example, one might append more profiling runs to a
        previous profile, in order to merge these.
        The existence of an optimization result is obligatory.
    result_index:
        index from which optimization result profiling should be started
    profile_index:
        array with parameter indices, whether a profile should
        be computed (1) or not (0)
        Default is all profiles should be computed
    profile_list:
        integer which specifies whether a call to the profiler should create
        a new list of profiles (default) or should be added to a specific
        profile list

    Returns
    -------
    global_opt:
        log-posterior at global optimum.
    """
    # Check whether an optimization result is existing
    if result.optimize_result is None:
        raise ValueError(
            "Optimization has to be carried out before profiling can be done."
        )

    tmp_optimize_result = result.optimize_result.as_list()

    # Check if new profile_list is to be created
    if profile_list is None:
        result.profile_result.append_empty_profile_list()

    # get the log-posterior of the global optimum
    global_opt = tmp_optimize_result[0]["fval"]

    # fill the list with optimization results where necessary
    fill_profile_list(
        profile_result=result.profile_result,
        optimizer_result=tmp_optimize_result[result_index],
        profile_index=profile_index,
        profile_list=profile_list,
        problem_dimension=problem.dim_full,
        global_opt=global_opt,
    )

    # return the log-posterior of the global optimum (needed in order to
    # compute the log-posterior-ratio)
    return global_opt


def fill_profile_list(
    profile_result: ProfileResult,
    optimizer_result: dict[str, Any],
    profile_index: Iterable[int],
    profile_list: int,
    problem_dimension: int,
    global_opt: float,
) -> None:
    """Fill a ProfileResult.

    Helper function for `initialize_profile`.

    Parameters
    ----------
    profile_result:
        A list of profiler result objects.
    optimizer_result:
        A local optimization result.
    profile_index:
        array with parameter indices, whether a profile should
        be computed (1) or not (0).
        Default is all profiles should be computed.
    profile_list:
        integer which specifies whether a call to the profiler should
        create a new list of profiles (default) or should be added to a
        specific profile list.
    problem_dimension:
        number of parameters in the unreduced problem.
    global_opt:
        log-posterior at global optimum.
    """
    if optimizer_result[GRAD] is not None:
        gradnorm = np.linalg.norm(optimizer_result[GRAD])
    else:
        gradnorm = np.nan

    # create blank profile
    new_profile = ProfilerResult(
        x_path=optimizer_result["x"][..., np.newaxis],
        fval_path=np.array([optimizer_result["fval"]]),
        ratio_path=np.array([np.exp(global_opt - optimizer_result["fval"])]),
        gradnorm_path=np.array([gradnorm]),
        exitflag_path=np.array([optimizer_result["exitflag"]]),
        time_path=np.array([0.0]),
        color_path=np.array([[1, 0, 0, 1]]),
        time_total=0.0,
        n_fval=0,
        n_grad=0,
        n_hess=0,
        message=None,
    )

    if profile_list is None:
        # All profiles have to be created from scratch
        for i_parameter in range(0, problem_dimension):
            if i_parameter in profile_index:
                # Should we create a profile for this index?
                profile_result.append_profiler_result(new_profile)
            else:
                # if no profile should be computed for this parameter
                profile_result.append_profiler_result(None)

    else:
        for i_parameter in range(0, problem_dimension):
            # We append to an existing list
            if i_parameter in profile_index:
                # Do we have to create a new profile?
                create_new = (
                    profile_result.list[profile_list][i_parameter] is None
                )
                if create_new:
                    profile_result.set_profiler_result(
                        new_profile, i_parameter
                    )