import h5py
import numpy as np
import pandas as pd
import os
from typing import Callable, Union
from pathlib import Path
from .constants import (EnsembleType, OUTPUT, UPPER_BOUND, LOWER_BOUND,
PREDICTION_RESULTS, PREDICTION_ID, SUMMARY)
from .ensemble import (Ensemble, EnsemblePrediction)
[docs]def read_from_csv(path: str,
sep: str = '\t',
index_col: int = 0,
headline_parser: Callable = None,
ensemble_type: EnsembleType = None,
lower_bound: np.ndarray = None,
upper_bound: np.ndarray = None):
"""
function for creating an ensemble from a csv file
Parameters
----------
path:
path to csv file to read in parameter ensemble
sep:
separator in csv file
index_col:
index column in csv file
headline_parser:
A function which reads in the headline of the csv file and converts it
into vector_tags (see constructor of Ensemble for more details)
ensemble_type:
Ensemble type: representative sample or random ensemble
lower_bound:
array of potential lower bounds for the parameters
upper_bound:
array of potential upper bounds for the parameters
Returns
-------
result:
Ensemble object of parameter vectors
"""
# get the data from the csv
ensemble_df = pd.read_csv(path, sep=sep, index_col=index_col)
# set the type of the ensemble
if ensemble_type is None:
ensemble_type = EnsembleType.ensemble
return read_from_df(dataframe=ensemble_df,
headline_parser=headline_parser,
ensemble_type=ensemble_type,
lower_bound=lower_bound,
upper_bound=upper_bound)
[docs]def read_from_df(dataframe: pd.DataFrame,
headline_parser: Callable = None,
ensemble_type: EnsembleType = None,
lower_bound: np.ndarray = None,
upper_bound: np.ndarray = None):
"""
function for creating an ensemble from a csv file
Parameters
----------
dataframe:
pandas.DataFrame to read in parameter ensemble
headline_parser:
A function which reads in the headline of the csv file and converts it
into vector_tags (see constructor of Ensemble for more details)
ensemble_type:
Ensemble type: representative sample or random ensemble
lower_bound:
array of potential lower bounds for the parameters
upper_bound:
array of potential upper bounds for the parameters
Returns
-------
result:
Ensemble object of parameter vectors
"""
# if we have a parser to make vector_tags from column names, we use it
vector_tags = None
if headline_parser is not None:
vector_tags = headline_parser(list(dataframe.columns))
# set the type of the ensemble
if ensemble_type is None:
ensemble_type = EnsembleType.ensemble
return Ensemble(x_vectors=dataframe.values,
x_names=list(dataframe.index),
vector_tags=vector_tags,
ensemble_type=ensemble_type,
lower_bound=lower_bound,
upper_bound=upper_bound)
[docs]def write_ensemble_prediction_to_h5(ensemble_prediction: EnsemblePrediction,
output_file: str,
base_path: str = None):
# parse base path
base = Path('')
if base_path is not None:
base = Path(base_path)
# open file
f = h5py.File(output_file, 'w')
# write prediction ID if available
if ensemble_prediction.prediction_id is not None:
f.create_dataset(os.path.join(base, PREDICTION_ID),
data=ensemble_prediction.prediction_id)
# write the single prediction results
for i_result, result in enumerate(ensemble_prediction.prediction_results):
tmp_base_path = os.path.join(base, f'{PREDICTION_RESULTS}_{i_result}')
result.write_to_h5(output_file, base_path=tmp_base_path)
# write lower bounds per condition, if available
if ensemble_prediction.lower_bound is not None:
f.create_group(os.path.join(base, f'{LOWER_BOUND}s'))
for i_cond, lower_bounds in enumerate(ensemble_prediction.lower_bound):
condition_id = \
ensemble_prediction.prediction_results[0].condition_ids[i_cond]
f.create_group(os.path.join(base, condition_id))
f.create_dataset(os.path.join(base, condition_id, LOWER_BOUND),
data=lower_bounds)
# write upper bounds per condition, if available
if ensemble_prediction.upper_bound is not None:
f.create_group(os.path.join(base, f'{UPPER_BOUND}s'))
for i_cond, upper_bounds in enumerate(ensemble_prediction.upper_bound):
condition_id = \
ensemble_prediction.prediction_results[0].condition_ids[i_cond]
f.create_group(os.path.join(base, condition_id))
f.create_dataset(os.path.join(base, condition_id, UPPER_BOUND),
data=upper_bounds)
# write summary statistics to h5 file
for i_key in ensemble_prediction.prediction_summary.keys():
i_summary = ensemble_prediction.prediction_summary[i_key]
if i_summary is not None:
tmp_base_path = os.path.join(base, f'{SUMMARY}_{i_key}')
f.create_group(tmp_base_path)
i_summary.write_to_h5(output_file, base_path=tmp_base_path)
# close file
f.close()
def get_prediction_dataset(ens: Union[Ensemble, EnsemblePrediction],
prediction_index: int = 0) -> np.ndarray:
"""
Extract an array of prediction from either an Ensemble object which
contains a list of predictions of from an EnsemblePrediction object.
Parameters
==========
ens:
Ensemble objects containing a set of parameter vectors and a set of
predictions or EnsemblePrediction object containing only predictions
prediction_index:
index telling which prediction from the list should be analyzed
Returns
=======
dataset:
numpy array containing the ensemble predictions
"""
if isinstance(ens, Ensemble):
dataset = ens.predictions[prediction_index]
elif isinstance(ens, EnsemblePrediction):
ens.condense_to_arrays()
dataset = ens.prediction_arrays[OUTPUT].transpose()
else:
raise Exception('Need either an Ensemble object with predictions or '
'an EnsemblePrediction object as input. Stopping.')
return dataset