Source code for pypesto.sample.util

"""A set of helper functions."""
import logging
import os
from typing import Tuple

import numpy as np

from ..C import PYPESTO_MAX_N_SAMPLES
from ..result import Result
from .diagnostics import geweke_test

logger = logging.getLogger(__name__)


[docs]def calculate_ci_mcmc_sample(
        result: Result,
        ci_level: float = 0.95,
        exclude_burn_in: bool = True,
) -> Tuple[np.ndarray, np.ndarray]:
    """Calculate parameter credibility intervals based on MCMC samples.

    Parameters
    ----------
    result:
        The pyPESTO result object with filled sample result.
    ci_level:
        Lower tail probability, defaults to 95% interval.

    Returns
    -------
    lb, ub:
        Bounds of the MCMC percentile-based confidence interval.
    """
    burn_in = 0
    if exclude_burn_in:
        # Check if burn in index is available
        if result.sample_result.burn_in is None:
            geweke_test(result)

        # Get burn in index
        burn_in = result.sample_result.burn_in

    # Get converged parameter samples as numpy arrays
    chain = np.asarray(result.sample_result.trace_x[0, burn_in:, :])

    lb, ub = calculate_ci(chain, ci_level=ci_level, axis=0)
    return lb, ub


[docs]def calculate_ci_mcmc_sample_prediction(
        simulated_values: np.ndarray,
        ci_level: float = 0.95,
) -> Tuple[np.ndarray, np.ndarray]:
    """Calculate prediction credibility intervals based on MCMC samples.

    Parameters
    ----------
    simulated_values:
        Simulated model states or model observables.
    ci_level:
        Lower tail probability, defaults to 95% interval.

    Returns
    -------
    lb, ub:
        Bounds of the MCMC-based prediction confidence interval.
    """
    lb, ub = calculate_ci(simulated_values, ci_level=ci_level, axis=1)
    return lb, ub


def calculate_ci(
        values: np.ndarray,
        ci_level: float,
        **kwargs,
) -> Tuple[np.ndarray, np.ndarray]:
    """Calculate confidence/credibility levels using percentiles.

    Parameters
    ----------
    values:
        The values used to calculate percentiles.
    ci_level:
        Lower tail probability.
    kwargs:
        Additional keyword arguments are passed to the `numpy.percentile` call.

    Returns
    -------
    lb, ub:
        Bounds of the confidence/credibility interval.
    """
    # Percentile values corresponding to the CI level
    percentiles = 100 * np.array([(1-ci_level)/2, 1-(1-ci_level)/2])
    # Upper and lower bounds
    lb, ub = np.percentile(values, percentiles, **kwargs)
    return lb, ub


def bound_n_samples_from_env(n_samples: int):
    """Bound number of samples from environment variable.

    Uses environment variable `PYPESTO_MAX_N_SAMPLES`.
    This is used to speed up testing, while in application it should not
    be used.

    Parameters
    ----------
    n_samples: Number of samples desired.

    Returns
    -------
    n_samples_new:
        The original number of samples, or the minimum with the environment
        variable, if exists.
    """
    if PYPESTO_MAX_N_SAMPLES not in os.environ:
        return n_samples
    n_samples_new = min(n_samples, int(os.environ[PYPESTO_MAX_N_SAMPLES]))

    logger.info(
        f"Bounding number of samples from {n_samples} to {n_samples_new} via "
        f"environment variable {PYPESTO_MAX_N_SAMPLES}"
    )

    return n_samples_new