Source code for pypesto.sample.diagnostics

"""Calculate different diagnostics of the sampling result."""
import logging

import numpy as np

from ..result import Result
from .auto_correlation import autocorrelation_sokal
from .geweke_test import burn_in_by_sequential_geweke

logger = logging.getLogger(__name__)


[docs]def geweke_test(result: Result, zscore: float = 2.0) -> int:
    """
    Calculate the burn-in of MCMC chains.

    Parameters
    ----------
    result:
        The pyPESTO result object with filled sample result.
    zscore:
        The Geweke test threshold.

    Returns
    -------
    burn_in:
        Iteration where the first and the last fraction of the chain
        do not differ significantly regarding Geweke test -> Burn-In

    """
    # Get parameter samples as numpy arrays
    chain = np.asarray(result.sample_result.trace_x[0])

    # Calculate burn in index
    burn_in = burn_in_by_sequential_geweke(chain=chain, zscore=zscore)

    # Log
    logger.info(f'Geweke burn-in index: {burn_in}')

    # Fill in burn-in value into result
    result.sample_result.burn_in = burn_in

    return burn_in


[docs]def auto_correlation(result: Result) -> float:
    """
    Calculate the autocorrelation of the MCMC chains.

    Parameters
    ----------
    result:
        The pyPESTO result object with filled sample result.

    Returns
    -------
    auto_correlation:
        Estimate of the integrated autocorrelation time of
        the MCMC chains.
    """
    # Check if burn in index is available
    if result.sample_result.burn_in is None:
        geweke_test(result)

    # Get burn in index
    burn_in = result.sample_result.burn_in

    # Get chain length
    chain_length = result.sample_result.trace_x.shape[1]

    if burn_in == chain_length:
        logger.warning(
            "The autocorrelation can not "
            "be estimated. The chain seems to "
            "not have converged yet.\n"
            "You may want to use a larger number "
            "of samples."
        )
        return None

    # Get converged parameter samples as numpy arrays
    chain = np.asarray(result.sample_result.trace_x[0, burn_in:, :])

    # Calculate autocorrelation
    auto_correlation_vector = autocorrelation_sokal(chain=chain)

    # Take the maximum over all components
    _auto_correlation = max(auto_correlation_vector)

    # Log
    logger.info(f'Estimated chain autocorrelation: {_auto_correlation}')

    # Fill in autocorrelation value into result
    result.sample_result.auto_correlation = _auto_correlation

    return _auto_correlation


[docs]def effective_sample_size(result: Result) -> float:
    """
    Calculate the effective sample size of the MCMC chains.

    Parameters
    ----------
    result:
        The pyPESTO result object with filled sample result.

    Returns
    -------
    ess:
        Estimate of the effective sample size of
        the MCMC chains.
    """
    # Check if autocorrelation is available
    if result.sample_result.auto_correlation is None:
        # Calculate autocorrelation
        auto_correlation(result)

    # Get burn in index
    burn_in = result.sample_result.burn_in

    # Get estimated chain autocorrelation
    _auto_correlation = result.sample_result.auto_correlation

    if _auto_correlation is None:
        return None

    # Get converged parameter samples as numpy arrays
    chain = np.asarray(result.sample_result.trace_x[0, burn_in:, :])

    # Get length of the converged chain
    N = chain.shape[0]

    # Calculate effective sample size
    ess = N / (1 + _auto_correlation)

    # Log
    logger.info(f'Estimated effective sample size: {ess}')

    # Fill in effective sample size value into result
    result.sample_result.effective_sample_size = ess

    return ess