Source code for pypesto.visualize.waterfall

from typing import List, Optional, Sequence, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import MaxNLocator
from mpl_toolkits.axes_grid1 import inset_locator

from pypesto.util import delete_nan_inf

from ..result import Result
from .clust_color import RGBA, assign_colors
from .misc import (
    process_offset_y,
    process_result_list,
    process_start_indices,
    process_y_limits,
)
from .reference_points import ReferencePoint, create_references


[docs]def waterfall(
    results: Union[Result, Sequence[Result]],
    ax: Optional[plt.Axes] = None,
    size: Optional[Tuple[float]] = (18.5, 10.5),
    y_limits: Optional[Tuple[float]] = None,
    scale_y: Optional[str] = 'log10',
    offset_y: Optional[float] = None,
    start_indices: Optional[Union[Sequence[int], int]] = None,
    n_starts_to_zoom: int = 0,
    reference: Optional[Sequence[ReferencePoint]] = None,
    colors: Optional[Union[RGBA, Sequence[RGBA]]] = None,
    legends: Optional[Union[Sequence[str], str]] = None,
):
    """
    Plot waterfall plot.

    Parameters
    ----------
    results:
        Optimization result obtained by 'optimize.py' or list of those
    ax: matplotlib.Axes, optional
        Axes object to use.
    size:
        Figure size (width, height) in inches. Is only applied when no ax
        object is specified
    y_limits: float or ndarray, optional
        Maximum value to be plotted on the y-axis, or y-limits
    scale_y:
        May be logarithmic or linear ('log10' or 'lin')
    offset_y:
        Offset for the y-axis, if it is supposed to be in log10-scale
    start_indices:
        Integers specifying the multistart to be plotted or int specifying
        up to which start index should be plotted
    n_starts_to_zoom:
        Number of best multistarts that should be zoomed in.
        Should be smaller that the total number of multistarts
    reference:
        Reference points for optimization results, containing at least a
        function value fval
    colors:
        Colors or single color  for plotting. If not set, clustering is done
        and colors are assigned automatically
    legends:
        Labels for line plots, one label per result object

    Returns
    -------
    ax: matplotlib.Axes
        The plot axes.
    """
    # axes
    if ax is None:
        ax = plt.subplots()[1]
        fig = plt.gcf()
        fig.set_size_inches(*size)

    if n_starts_to_zoom:
        # create zoom in
        inset_axes = inset_locator.inset_axes(
            ax, width="30%", height="30%", loc='center right'
        )
        inset_locator.mark_inset(ax, inset_axes, loc1=2, loc2=4)
    else:
        inset_axes = None

    # parse input
    (results, colors, legends) = process_result_list(results, colors, legends)

    refs = create_references(references=reference)

    # precompute y-offset, if needed and if a list of results was passed
    fvals_all, offset_y = process_offset_for_list(
        offset_y, results, scale_y, start_indices, refs
    )

    # plotting routine needs the maximum number of multistarts
    max_len_fvals = np.array([0])

    # loop over results
    for j, fvals in enumerate(fvals_all):
        # extract specific cost function values from result
        max_len_fvals = np.max([max_len_fvals, *fvals.shape])

        # remove colors where value is infinite if colors were passed on
        if colors[j] is not None and fvals.size == colors[j].shape[0]:
            colors[j] = colors[j][np.isfinite(np.transpose(fvals)).flatten()]
        # parse input
        fvals = np.array(fvals)
        # remove nan or inf values in fvals
        _, fvals = delete_nan_inf(fvals)

        fvals.sort()

        # assign colors
        coloring = assign_colors(fvals, colors=colors[j])

        # call lowlevel plot routine
        ax = waterfall_lowlevel(
            fvals=fvals,
            scale_y=scale_y,
            offset_y=offset_y,
            ax=ax,
            size=size,
            colors=coloring,
            legend_text=legends[j],
        )

        if inset_axes is not None:
            inset_axes = waterfall_lowlevel(
                fvals=fvals[:n_starts_to_zoom],
                scale_y=scale_y,
                ax=inset_axes,
                colors=coloring[:n_starts_to_zoom],
            )

    # apply changes specified be the user to the axis object
    ax = handle_options(ax, max_len_fvals, refs, y_limits, offset_y)
    if inset_axes is not None:
        inset_axes = handle_options(
            inset_axes, n_starts_to_zoom, refs, y_limits, offset_y
        )

    if any(legends):
        ax.legend()
    # labels
    ax.set_xlabel('Ordered optimizer run')
    if offset_y == 0.0:
        ax.set_ylabel('Function value')
    else:
        ax.set_ylabel('Offsetted function value (relative to best start)')
    ax.set_title('Waterfall plot')
    return ax


[docs]def waterfall_lowlevel(
    fvals,
    ax: Optional[plt.Axes] = None,
    size: Optional[Tuple[float]] = (18.5, 10.5),
    scale_y: str = 'log10',
    offset_y: float = 0.0,
    colors: Optional[Union[RGBA, Sequence[RGBA]]] = None,
    legend_text: Optional[str] = None,
):
    """
    Plot waterfall plot using list of function values.

    Parameters
    ----------
    fvals: numeric list or array
        Including values need to be plotted.
    ax: matplotlib.Axes
        Axes object to use.
    size:
        Figure size (width, height) in inches. Is only applied when no ax
        object is specified
    scale_y: str, optional
        May be logarithmic or linear ('log10' or 'lin')
    offset_y:
        offset for the y-axis, if it is supposed to be in log10-scale
    colors: list, or RGBA, optional
        list of colors, or single color
        color or list of colors for plotting. If not set, clustering is done
        and colors are assigned automatically
    legend_text:
        Label for line plots

    Returns
    -------
    ax: matplotlib.Axes
        The plot axes.
    """
    # axes
    if ax is None:
        ax = plt.subplots()[1]
        fig = plt.gcf()
        fig.set_size_inches(*size)

    n_fvals = len(fvals)
    start_ind = range(n_fvals)

    # assign colors
    colors = assign_colors(fvals, colors=colors)

    # plot
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    # plot line
    if scale_y == 'log10':
        ax.semilogy(start_ind, fvals, color=[0.7, 0.7, 0.7, 0.6])
    else:
        ax.plot(start_ind, fvals, color=[0.7, 0.7, 0.7, 0.6])

    # plot points
    for j in range(n_fvals):
        # parse data for plotting
        color = colors[j]
        fval = fvals[j]
        if j == 0:
            tmp_legend = legend_text
        else:
            tmp_legend = None

        # line plot (linear or logarithmic)
        if scale_y == 'log10':
            ax.semilogy(
                j, fval, color=color, marker='o', label=tmp_legend, alpha=1.0
            )
        else:
            ax.plot(
                j, fval, color=color, marker='o', label=tmp_legend, alpha=1.0
            )

    # check if y-axis has a reasonable scale
    y_min, y_max = ax.get_ylim()
    if scale_y == 'log10':
        if np.log10(y_max) - np.log10(y_min) < 1.0:
            y_mean = 0.5 * (np.log10(y_min) + np.log10(y_max))
            ax.set_ylim(10.0 ** (y_mean - 0.5), 10.0 ** (y_mean + 0.5))
    else:
        if y_max - y_min < 1.0:
            y_mean = 0.5 * (y_min + y_max)
            ax.set_ylim(y_mean - 0.5, y_mean + 0.5)

    # labels
    ax.set_xlabel('Ordered optimizer run')
    if offset_y == 0.0:
        ax.set_ylabel('Function value')
    else:
        ax.set_ylabel('Offsetted function value (relative to best start)')
    ax.set_title('Waterfall plot')
    if legend_text is not None:
        ax.legend()

    return ax


def process_offset_for_list(
    offset_y: float,
    results: Sequence[Result],
    scale_y: Optional[str],
    start_indices: Optional[Sequence[int]] = None,
    references: Optional[Sequence[ReferencePoint]] = None,
) -> Tuple[List[np.ndarray], float]:
    """
    Compute common offset_y and add it to `fvals` of results.

    Parameters
    ----------
    offset_y:
        User provided offset_y
    results:
        Optimization results obtained by 'optimize.py'
    scale_y:
        May be logarithmic or linear ('log10' or 'lin')
    start_indices:
        Integers specifying the multistart to be plotted or int specifying
        up to which start index should be plotted
    references:
        Reference points that will be plotted along with the results

    Returns
    -------
    fvals:
        List of arrays of function values for each result
    offset_y:
        offset for the y-axis
    """
    min_val = np.inf
    fvals_all = []
    for result in results:
        fvals = np.asarray([np.array(result.optimize_result.fval)])
        # todo: order of results plays a role
        if start_indices is None:
            start_indices = np.array(range(fvals.size))
        else:
            start_indices = process_start_indices(start_indices, fvals.size)
        fvals = fvals[:, start_indices]
        # if none of the fvals are finite, set default value to zero as
        # np.nanmin will error for an empty array
        if np.isfinite(fvals).any():
            min_val = min(min_val, np.nanmin(fvals[np.isfinite(fvals)]))

        fvals_all.append(fvals)

    # if there are references, also account for those
    if references:
        min_val = min(min_val, np.nanmin([r['fval'] for r in references]))

    offset_y = process_offset_y(offset_y, scale_y, float(min_val))

    # return offsetted values
    return [fvals + offset_y for fvals in fvals_all], offset_y


def handle_options(ax, max_len_fvals, ref, y_limits, offset_y):
    """
    Apply post-plotting transformations to the axis object.

    Get the limits for the y-axis, plots the reference points, will do
    more at a later time point.

    Parameters
    ----------
    ax: matplotlib.Axes, optional
        Axes object to use.
    max_len_fvals: int
        maximum number of points
    ref: list, optional
        List of reference points for optimization results, containing at
        least a function value fval
    y_limits: float or ndarray, optional
        maximum value to be plotted on the y-axis, or y-limits
    offset_y:
        offset for the y-axis, if it is supposed to be in log10-scale

    Returns
    -------
    ax: matplotlib.Axes
        The plot axes.
    """
    # handle reference points
    for i_ref in ref:
        # plot reference point as line
        ax.plot(
            [0, max_len_fvals - 1],
            [i_ref.fval + offset_y, i_ref.fval + offset_y],
            '--',
            color=i_ref.color,
            label=i_ref.legend,
        )

        # create legend for reference points
        if i_ref.legend is not None:
            ax.legend()

    # handle y-limits
    ax = process_y_limits(ax, y_limits)

    return ax