Source code for pypesto.util

"""
Utilities
=========

Package-wide utilities.

"""
from typing import Optional, Tuple

import numpy as np
from scipy import cluster


def get_condition_label(condition_id: str) -> str:
    """Convert a condition ID to a label.

    Labels for conditions are used at different locations (e.g. ensemble
    prediction code, and visualization code). This method ensures that the same
    condition is labeled identically everywhere.

    Parameters
    ----------
    condition_id:
        The condition ID that will be used to generate a label.

    Returns
    -------
    The condition label.
    """
    return f'condition_{condition_id}'


[docs]def assign_clusters(vals): """ Find clustering. Parameters ---------- vals: numeric list or array List to be clustered. Returns ------- clust: numeric list Indicating the corresponding cluster of each element from 'vals'. clustsize: numeric list Size of clusters, length equals number of clusters. """ # sanity checks if vals is None or len(vals) == 0: return [], [] elif len(vals) == 1: return np.array([0]), np.array([1.0]) # linkage requires (n, 1) data array vals = np.reshape(vals, (-1, 1)) # however: clusters are sorted by size, not by value... Resort. # Create preallocated object first cluster_indices = np.zeros(vals.size, dtype=int) # get clustering based on distance clust = cluster.hierarchy.fcluster( cluster.hierarchy.linkage(vals), t=0.1, criterion='distance' ) # get unique clusters _, ind_clust = np.unique(clust, return_index=True) unique_clust = clust[np.sort(ind_clust)] cluster_size = np.zeros(unique_clust.size, dtype=int) # loop over clusters: resort and count number of entries for index, i_clust in enumerate(unique_clust): cluster_indices[np.where(clust == i_clust)] = index cluster_size[index] = sum(clust == i_clust) return cluster_indices, cluster_size
[docs]def delete_nan_inf( fvals: np.ndarray, x: Optional[np.ndarray] = None, xdim: Optional[int] = 1 ) -> Tuple[np.ndarray, np.ndarray]: """ Delete nan and inf values in fvals. If parameters 'x' are passed, also the corresponding entries are deleted. Parameters ---------- x: array of parameters fvals: array of fval xdim: dimension of x, in case x dimension cannot be inferred Returns ------- x: array of parameters without nan or inf fvals: array of fval without nan or inf """ fvals = np.asarray(fvals) if x is not None: # if we start out with a list of x, the x corresponding # to finite fvals may be None, so we cannot stack the x before taking # subindexing # If none of the fvals are finite, np.vstack will fail and np.take # will not yield the correct dimension, so we try to construct an # empty np.ndarray with the correct dimension (other functions rely # on x.shape[1] to be of correct dimension) if np.isfinite(fvals).any(): x = np.vstack(np.take(x, np.where(np.isfinite(fvals))[0], axis=0)) else: x = np.empty( ( 0, x.shape[1] if x.ndim == 2 else x[0].shape[0] if x[0] is not None else xdim, ) ) return x, fvals[np.isfinite(fvals)]