Source code for pypesto.util

"""
Utilities
=========

Package-wide utilities.

"""
from typing import Optional, Tuple

import numpy as np
from scipy import cluster


def get_condition_label(condition_id: str) -> str:
    """Convert a condition ID to a label.

    Labels for conditions are used at different locations (e.g. ensemble
    prediction code, and visualization code). This method ensures that the same
    condition is labeled identically everywhere.

    Parameters
    ----------
    condition_id:
        The condition ID that will be used to generate a label.

    Returns
    -------
    The condition label.
    """
    return f'condition_{condition_id}'


[docs]def assign_clusters(vals):
    """
    Find clustering.

    Parameters
    ----------
    vals: numeric list or array
        List to be clustered.

    Returns
    -------
    clust: numeric list
        Indicating the corresponding cluster of each element from
        'vals'.
    clustsize: numeric list
        Size of clusters, length equals number of clusters.
    """
    # sanity checks
    if vals is None or len(vals) == 0:
        return [], []
    elif len(vals) == 1:
        return np.array([0]), np.array([1.0])

    # linkage requires (n, 1) data array
    vals = np.reshape(vals, (-1, 1))

    # however: clusters are sorted by size, not by value... Resort.
    # Create preallocated object first
    cluster_indices = np.zeros(vals.size, dtype=int)

    # get clustering based on distance
    clust = cluster.hierarchy.fcluster(
        cluster.hierarchy.linkage(vals), t=0.1, criterion='distance'
    )

    # get unique clusters
    _, ind_clust = np.unique(clust, return_index=True)
    unique_clust = clust[np.sort(ind_clust)]
    cluster_size = np.zeros(unique_clust.size, dtype=int)

    # loop over clusters: resort and count number of entries
    for index, i_clust in enumerate(unique_clust):
        cluster_indices[np.where(clust == i_clust)] = index
        cluster_size[index] = sum(clust == i_clust)

    return cluster_indices, cluster_size


[docs]def delete_nan_inf(
    fvals: np.ndarray, x: Optional[np.ndarray] = None, xdim: Optional[int] = 1
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Delete nan and inf values in fvals.

    If parameters 'x' are passed, also the corresponding entries are deleted.

    Parameters
    ----------
    x:
        array of parameters
    fvals:
        array of fval
    xdim:
        dimension of x, in case x dimension cannot be inferred

    Returns
    -------
    x:
        array of parameters without nan or inf
    fvals:
        array of fval without nan or inf
    """
    fvals = np.asarray(fvals)
    if x is not None:
        # if we start out with a list of x, the x corresponding
        # to finite fvals may be None, so we cannot stack the x before taking
        # subindexing
        # If none of the fvals are finite, np.vstack will fail and np.take
        # will not yield the correct dimension, so we try to construct an
        # empty np.ndarray with the correct dimension (other functions rely
        # on x.shape[1] to be of correct dimension)
        if np.isfinite(fvals).any():
            x = np.vstack(np.take(x, np.where(np.isfinite(fvals))[0], axis=0))
        else:
            x = np.empty(
                (
                    0,
                    x.shape[1]
                    if x.ndim == 2
                    else x[0].shape[0]
                    if x[0] is not None
                    else xdim,
                )
            )
    return x, fvals[np.isfinite(fvals)]