Source code for pypesto.ensemble.utils

import h5py
import numpy as np
import pandas as pd
import os
from typing import Callable, Union
from pathlib import Path

from .constants import (EnsembleType, OUTPUT, UPPER_BOUND, LOWER_BOUND,
                        PREDICTION_RESULTS, PREDICTION_ID, SUMMARY)
from .ensemble import (Ensemble, EnsemblePrediction)


[docs]def read_from_csv(path: str, sep: str = '\t', index_col: int = 0, headline_parser: Callable = None, ensemble_type: EnsembleType = None, lower_bound: np.ndarray = None, upper_bound: np.ndarray = None): """ function for creating an ensemble from a csv file Parameters ---------- path: path to csv file to read in parameter ensemble sep: separator in csv file index_col: index column in csv file headline_parser: A function which reads in the headline of the csv file and converts it into vector_tags (see constructor of Ensemble for more details) ensemble_type: Ensemble type: representative sample or random ensemble lower_bound: array of potential lower bounds for the parameters upper_bound: array of potential upper bounds for the parameters Returns ------- result: Ensemble object of parameter vectors """ # get the data from the csv ensemble_df = pd.read_csv(path, sep=sep, index_col=index_col) # set the type of the ensemble if ensemble_type is None: ensemble_type = EnsembleType.ensemble return read_from_df(dataframe=ensemble_df, headline_parser=headline_parser, ensemble_type=ensemble_type, lower_bound=lower_bound, upper_bound=upper_bound)
[docs]def read_from_df(dataframe: pd.DataFrame, headline_parser: Callable = None, ensemble_type: EnsembleType = None, lower_bound: np.ndarray = None, upper_bound: np.ndarray = None): """ function for creating an ensemble from a csv file Parameters ---------- dataframe: pandas.DataFrame to read in parameter ensemble headline_parser: A function which reads in the headline of the csv file and converts it into vector_tags (see constructor of Ensemble for more details) ensemble_type: Ensemble type: representative sample or random ensemble lower_bound: array of potential lower bounds for the parameters upper_bound: array of potential upper bounds for the parameters Returns ------- result: Ensemble object of parameter vectors """ # if we have a parser to make vector_tags from column names, we use it vector_tags = None if headline_parser is not None: vector_tags = headline_parser(list(dataframe.columns)) # set the type of the ensemble if ensemble_type is None: ensemble_type = EnsembleType.ensemble return Ensemble(x_vectors=dataframe.values, x_names=list(dataframe.index), vector_tags=vector_tags, ensemble_type=ensemble_type, lower_bound=lower_bound, upper_bound=upper_bound)
[docs]def write_ensemble_prediction_to_h5(ensemble_prediction: EnsemblePrediction, output_file: str, base_path: str = None): # parse base path base = Path('') if base_path is not None: base = Path(base_path) # open file f = h5py.File(output_file, 'w') # write prediction ID if available if ensemble_prediction.prediction_id is not None: f.create_dataset(os.path.join(base, PREDICTION_ID), data=ensemble_prediction.prediction_id) # write the single prediction results for i_result, result in enumerate(ensemble_prediction.prediction_results): tmp_base_path = os.path.join(base, f'{PREDICTION_RESULTS}_{i_result}') result.write_to_h5(output_file, base_path=tmp_base_path) # write lower bounds per condition, if available if ensemble_prediction.lower_bound is not None: f.create_group(os.path.join(base, f'{LOWER_BOUND}s')) for i_cond, lower_bounds in enumerate(ensemble_prediction.lower_bound): condition_id = \ ensemble_prediction.prediction_results[0].condition_ids[i_cond] f.create_group(os.path.join(base, condition_id)) f.create_dataset(os.path.join(base, condition_id, LOWER_BOUND), data=lower_bounds) # write upper bounds per condition, if available if ensemble_prediction.upper_bound is not None: f.create_group(os.path.join(base, f'{UPPER_BOUND}s')) for i_cond, upper_bounds in enumerate(ensemble_prediction.upper_bound): condition_id = \ ensemble_prediction.prediction_results[0].condition_ids[i_cond] f.create_group(os.path.join(base, condition_id)) f.create_dataset(os.path.join(base, condition_id, UPPER_BOUND), data=upper_bounds) # write summary statistics to h5 file for i_key in ensemble_prediction.prediction_summary.keys(): i_summary = ensemble_prediction.prediction_summary[i_key] if i_summary is not None: tmp_base_path = os.path.join(base, f'{SUMMARY}_{i_key}') f.create_group(tmp_base_path) i_summary.write_to_h5(output_file, base_path=tmp_base_path) # close file f.close()
def get_prediction_dataset(ens: Union[Ensemble, EnsemblePrediction], prediction_index: int = 0) -> np.ndarray: """ Extract an array of prediction from either an Ensemble object which contains a list of predictions of from an EnsemblePrediction object. Parameters ========== ens: Ensemble objects containing a set of parameter vectors and a set of predictions or EnsemblePrediction object containing only predictions prediction_index: index telling which prediction from the list should be analyzed Returns ======= dataset: numpy array containing the ensemble predictions """ if isinstance(ens, Ensemble): dataset = ens.predictions[prediction_index] elif isinstance(ens, EnsemblePrediction): ens.condense_to_arrays() dataset = ens.prediction_arrays[OUTPUT].transpose() else: raise Exception('Need either an Ensemble object with predictions or ' 'an EnsemblePrediction object as input. Stopping.') return dataset