import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
import numpy as np
import pandas as pd
from typing import Optional, Tuple
from ..ensemble import Ensemble
from ..ensemble.constants import (
COLOR_HIT_BOTH_BOUNDS, COLOR_HIT_ONE_BOUND, COLOR_HIT_NO_BOUNDS)
[docs]def ensemble_identifiability(ensemble: Ensemble,
ax: Optional[plt.Axes] = None,
size: Optional[Tuple[float]] = (12, 6)):
"""
Plots an overview about how many parameters hit the parameter bounds based
on a ensemble of parameters. confidence intervals/credible ranges are
computed via the ensemble mean plus/minus 1 standard deviation.
This highlevel routine expects a ensemble object as input.
Parameters
----------
ensemble:
ensemble of parameter vectors (from pypesto.ensemble)
ax:
Axes object to use.
size:
Figure size (width, height) in inches. Is only applied when no ax
object is specified
Returns
-------
ax: matplotlib.Axes
The plot axes.
"""
# first get the data to check identifiability
id_df = ensemble.check_identifiability()
# check how many bounds are actually hit and which ones
none_hit, lb_hit, ub_hit, both_hit = _prepare_identifiability_plot(id_df)
# call lowlevel routine whick works with np arrays only
ax = ensemble_identifiability_lowlevel(none_hit, lb_hit, ub_hit, both_hit,
ax, size)
return ax
def ensemble_identifiability_lowlevel(none_hit: np.ndarray,
lb_hit: np.ndarray,
ub_hit: np.ndarray,
both_hit: np.ndarray,
ax: Optional[plt.Axes] = None,
size: Optional[Tuple[float]] = (16, 10)):
"""
Plots an overview about how many parameters hit the parameter bounds based
on a ensemble of parameters. Confidence intervals/credible ranges are
computed via the ensemble mean plus/minus 1 standard deviation.
This lowlevel routine works with numpy arrays which define the confidence
intervals/credible ranges of each parameter.
Parameters
----------
none_hit:
2-dimensional array of confidence interval/credible ranges for
identifiable parameters
lb_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit the lower parameter bound
ub_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit the upper parameter bound
both_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit both parameter bounds
ax:
Axes object to use.
size:
Figure size (width, height) in inches. Is only applied when no ax
object is specified
Returns
-------
ax: matplotlib.Axes
The plot axes.
"""
# define some short hands for later plotting
n_par = sum([none_hit.shape[0], lb_hit.shape[0],
ub_hit.shape[0], both_hit.shape[0]])
x_both = len(both_hit) / n_par
x_lb = len(lb_hit) / n_par
x_ub = len(ub_hit) / n_par
x_none = 1. - x_both - x_ub - x_lb
patches_both_hit, patches_lb_hit, patches_ub_hit, patches_none_hit = \
_create_patches(none_hit, lb_hit, ub_hit, both_hit)
# axes
if ax is None:
ax = plt.subplots()[1]
fig = plt.gcf()
fig.set_size_inches(*size)
# create axes object and add patch collections
if patches_both_hit:
ax.add_collection(patches_both_hit)
if patches_lb_hit:
ax.add_collection(patches_lb_hit)
if patches_ub_hit:
ax.add_collection(patches_ub_hit)
if patches_none_hit:
ax.add_collection(patches_none_hit)
# plot dashed lines indicating the number rof non-identifiable parameters
vert = [-.05, 1.05]
ax.plot([x_both, x_both], vert, 'k--', linewidth=1.5)
ax.plot([x_both + x_lb, x_both + x_lb], vert, 'k--', linewidth=1.5)
ax.plot([x_both + x_lb + x_ub, x_both + x_lb + x_ub], vert,
'k--', linewidth=1.5)
# add text
if patches_both_hit:
ax.text(x_both / 2, -.05, 'both bounds hit',
color=COLOR_HIT_BOTH_BOUNDS,
rotation=-90, va='top', ha='center')
if patches_lb_hit:
ax.text(x_both + x_lb / 2, -.05, 'lower bound hit',
color=COLOR_HIT_ONE_BOUND, rotation=-90, va='top', ha='center')
if patches_ub_hit:
ax.text(x_both + x_lb + x_ub / 2, -.05, 'upper bound hit',
color=COLOR_HIT_ONE_BOUND, rotation=-90, va='top', ha='center')
if patches_none_hit:
ax.text(1 - x_none / 2, -.05, 'no bounds hit',
color=COLOR_HIT_NO_BOUNDS, rotation=-90, va='top', ha='center')
ax.text(0, -.7, 'identifiable parameters: {:4.1f}%'.format(x_none * 100),
va='top')
# plot upper and lower bounds
ax.text(-.03, 1., 'upper\nbound', ha='right', va='center')
ax.text(-.03, 0., 'lower\nbound', ha='right', va='center')
ax.plot([-.02, 1.03], [0, 0], 'k:', linewidth=1.5)
ax.plot([-.02, 1.03], [1, 1], 'k:', linewidth=1.5)
plt.xticks([])
plt.yticks([])
# plot frame
ax.plot([0, 0], vert, 'k-', linewidth=1.5)
ax.plot([1, 1], vert, 'k-', linewidth=1.5)
# beautify axes
plt.xlim((-.15, 1.1))
plt.ylim((-.78, 1.15))
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['top'].set_visible(False)
return ax
def _prepare_identifiability_plot(id_df: pd.DataFrame):
"""
This routine groups model parameters based on a ensemble object into
four categories, based on the mean of the parameter ensemble plus/minus
1 standard deviation: Parameters that hit both bounds, parameters that hit
only the lower [or upper] bound, and parameters that hit no bounds.
It returns them as four numpy arrays, together with their confidence
intervals/credible ranges.
Parameters
----------
id_df:
Pandas dataframe with information about parameter identifiability,
as created by pypesto.ensemble.check_identifiability()
Returns
-------
none_hit:
2-dimensional array of confidence interval/credible ranges for
identifiable parameters
lb_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit the lower parameter bound
ub_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit the upper parameter bound
both_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit both parameter bounds
"""
# prepare
both_hit = []
lb_hit = []
ub_hit = []
none_hit = []
def _affine_transform(par_info):
# rescale parameters to bounds
lb = par_info['lowerBound']
ub = par_info['upperBound']
val_l = par_info['ensemble_mean'] - par_info['ensemble_std']
val_u = par_info['ensemble_mean'] + par_info['ensemble_std']
# check if parameter confidence intervals/credible ranges hit bound
if val_l <= lb:
lower_val = 0.
else:
lower_val = (val_l - lb) / (ub - lb)
if val_u >= ub:
upper_val = 1.
else:
upper_val = (val_u - lb) / (ub - lb)
return lower_val, upper_val
for par_id in list(id_df.index):
# check which of the parameters seems to be identifiable and group them
if id_df.loc[par_id, 'within lb: 1 std'] and \
id_df.loc[par_id, 'within ub: 1 std']:
none_hit.append(_affine_transform(id_df.loc[par_id, :]))
elif id_df.loc[par_id, 'within lb: 1 std']:
ub_hit.append(_affine_transform(id_df.loc[par_id, :]))
elif id_df.loc[par_id, 'within ub: 1 std']:
lb_hit.append(_affine_transform(id_df.loc[par_id, :]))
else:
both_hit.append(_affine_transform(id_df.loc[par_id, :]))
return np.array(none_hit), np.array(lb_hit), np.array(ub_hit), \
np.array(both_hit)
def _create_patches(none_hit: np.ndarray,
lb_hit: np.ndarray,
ub_hit: np.ndarray,
both_hit: np.ndarray):
"""
Creates matplotlib.patches.PatchCollection objects from numpy arrays with
confidence intervals/credible ranges, which visualize identifiability
properties of the model parameters.
Parameters
----------
none_hit:
2-dimensional array of confidence interval/credible ranges for
identifiable parameters
lb_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit the lower parameter bound
ub_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit the upper parameter bound
both_hit:
2-dimensional array of confidence interval/credible ranges for
parameters which hit both parameter bounds
Returns
-------
patches_both_hit:
patches showing parameters which hit both parameter bounds in the
ensemble (and are hence non-identifiable)
patches_lb_hit:
patches showing parameters which hit only the lower parameter bounds
in the ensemble (and are hence non-identifiable)
patches_ub_hit:
patches showing parameters which hit only the lower parameter bounds
in the ensemble (and are hence non-identifiable)
patches_none_hit
patches showing parameters which hit no parameter bounds in the
ensemble (and are hence identifiable)
"""
# get total number of parameters
n_par = sum([none_hit.shape[0], lb_hit.shape[0],
ub_hit.shape[0], both_hit.shape[0]])
# start patches at the left end and increment by h = 1/n_par
x = 0.
h = 1. / n_par
# creates patches for parameters which hit both bounds
patches_both_hit = []
if both_hit.size > 0:
for _ in both_hit:
# create a list of rectangles
patches_both_hit.append(Rectangle((x, 0.), h, 1.))
x += h
patches_both_hit = PatchCollection(patches_both_hit,
facecolors=COLOR_HIT_BOTH_BOUNDS)
# creates patches for parameters which hit lower bound
patches_lb_hit = []
# sort by normalizes length of confidence interval/credible range
if lb_hit.size > 0:
tmp_lb = np.sort(lb_hit[:, 1])[::-1]
for lb_par in tmp_lb:
# create a list of rectangles
patches_lb_hit.append(Rectangle((x, 0.), h, lb_par))
x += h
patches_lb_hit = PatchCollection(patches_lb_hit,
facecolors=COLOR_HIT_ONE_BOUND)
# creates patches for parameters which hit upper bound
patches_ub_hit = []
# sort by normalizes length of confidence interval/credible range
if ub_hit.size > 0:
tmp_ub = np.sort(ub_hit[:, 0])
for ub_par in tmp_ub:
# create a list of rectangles
patches_ub_hit.append(Rectangle((x, ub_par), h, 1. - ub_par))
x += h
patches_ub_hit = PatchCollection(patches_ub_hit,
facecolors=COLOR_HIT_ONE_BOUND)
# creates patches for parameters which hit no bounds
patches_none_hit = []
# sort by normalizes length of confidence interval/credible range
if none_hit.size > 0:
tmp_none = np.argsort(none_hit[:, 1] - none_hit[:, 0])[::-1]
for none_par in tmp_none:
patches_none_hit.append(
# create a list of rectangles
Rectangle((x, none_hit[none_par, 0]), h,
none_hit[none_par, 1] - none_hit[none_par, 0]))
x += h
patches_none_hit = PatchCollection(patches_none_hit,
facecolors=COLOR_HIT_NO_BOUNDS)
return patches_both_hit, patches_lb_hit, patches_ub_hit, patches_none_hit