Source code for pimmslearn.model
import logging
import numpy as np
import pandas as pd
import torch
import torch.utils.data
logger = logging.getLogger(__name__)
[docs]
def build_df_from_pred_batches(pred, scaler=None, index=None, columns=None):
pred = np.vstack(pred)
if scaler:
pred = scaler.inverse_transform(pred)
pred = pd.DataFrame(pred, index=index, columns=columns)
return pred
[docs]
def get_latent_space(
model_method_call: callable,
dl: torch.utils.data.DataLoader,
dl_index: pd.Index,
latent_tuple_pos: int = 0,
) -> pd.DataFrame:
"""Create a DataFrame of the latent space based on the model method call
to be used (here: the model encoder or a latent space helper method)
Parameters
----------
model_method_call : callable
A method call on a pytorch.Module to create encodings for a batch of data.
dl : torch.utils.data.DataLoader
The DataLoader to use, producing predictions in a non-random fashion.
dl_index : pd.Index
pandas Index
latent_tuple_pos : int, optional
if model_method_call returns a tuple from a batch,
the tensor at the tuple position to selecet, by default 0
Returns
-------
pd.DataFrame
DataFrame of latent space indexed with dl_index.
"""
latent_space = []
for b in dl:
model_input = b[1]
res = model_method_call(model_input)
# if issubclass(type(res), torch.Tensor):
if isinstance(res, tuple):
res = res[latent_tuple_pos]
res = res.detach().numpy()
latent_space.append(res)
M = res.shape[-1]
latent_space = build_df_from_pred_batches(
latent_space,
index=dl_index,
columns=[f"latent dimension {i+1}" for i in range(M)],
)
return latent_space