Source code for pimmslearn.analyzers.compare_predictions
from __future__ import annotations
from pathlib import Path
from typing import List
import pandas as pd
[docs]
def load_predictions(pred_files: List, shared_columns=("observed",)):
pred_files = iter(pred_files)
fname = next(pred_files)
pred = pd.read_csv(fname, index_col=[0, 1])
# cast to list
shared_columns = list(shared_columns)
for fname in pred_files:
_pred_file = pd.read_csv(fname, index_col=[0, 1])
idx_shared = pred.index.intersection(_pred_file.index)
assert len(
idx_shared
), f"No shared index between already loaded models {pred.columns} and {fname}"
if shared_columns:
assert all(
pred.loc[idx_shared, shared_columns]
== _pred_file.loc[idx_shared, shared_columns]
)
pred = pred.join(_pred_file.drop(shared_columns, axis=1))
else:
pred = pred.join(_pred_file)
return pred
[docs]
def load_split_prediction_by_modelkey(
experiment_folder: Path,
split: str,
model_keys: list[str],
allow_missing=False,
shared_columns: list[str] = None,
):
"""Load predictions from a list of models.
Parameters
----------
experiment_folder : Path
Path to experiment folder
split : str
which split of simulated data to load
model_keys : List
List of model keys to be loaded
allow_missing : bool, optional
Ignore missing pred files of requested model, default False
shared_columns : List, optional
List of columns that are shared between all models, by default None
Returns
-------
pd.DataFrame
Prediction data frame with shared columns and model predictions
"""
pred_files = [
experiment_folder / "preds" / f"pred_{split}_{key}.csv" for key in model_keys
]
to_remove = list()
for file in pred_files:
if not file.exists():
if allow_missing:
print(f"WARNING: {file} does not exist")
to_remove.append(file)
else:
raise FileNotFoundError(f"{file} does not exist")
if to_remove:
pred_files.remove(to_remove)
return load_predictions(pred_files, shared_columns=shared_columns)
[docs]
def load_single_csv_pred_file(
fname: str | Path, value_name: str = "intensity"
) -> pd.Series:
"""Load a single pred file from a single model.
Last column are measurments, other are index.
Parameters
----------
fname : str | Path
Path to csv file to be loaded
value_name : str, optional
name for measurments to be used, by default 'intensity'
Returns
-------
pd.Series
measurments as a single column with set indices
"""
pred = pd.read_csv(fname) # getattr for other file formats
pred = pred.set_index(pred.columns[:-1].tolist())
pred = pred.squeeze()
pred.name = value_name
return pred