Source code for pimmslearn.io.load
import logging
from typing import List, Union
import pandas as pd
logger = logging.getLogger(__name__)
[docs]
def verify_df(
df: pd.DataFrame,
fname: str,
index_col: str, # could be potentially 0 for the first column
verify_fname: bool = False,
usecols=None,
):
if usecols and isinstance(index_col, str):
assert index_col in usecols, "Add index_col to usecols Sequence"
if verify_fname:
if not len(df.shape) == 2:
raise ValueError(
f"Expected 2 -dimensional array, not {len(df.shape)} -dimensional,"
f" of type: {type(df)}"
)
N, M = df.shape
assert f"N{N:05d}" in str(fname) and f"M{M:05d}" in str(fname), (
"Filename number don't match loaded numbers: "
f"{fname} should contain N{N} and M{M}"
)
[docs]
def from_csv(
fname: str,
nrows: int = None,
# could be potentially 0 for the first column
index_col: Union[int, str, List] = "Sample ID",
verify_fname: bool = False,
usecols=None,
**kwargs,
):
logger.warning(f"Passed unknown kwargs: {kwargs}")
df = pd.read_csv(
fname, index_col=index_col, low_memory=False, nrows=nrows, usecols=usecols
).squeeze("columns")
if len(df.shape) == 1:
# unstack all but first column
df = df.unstack(df.index.names[1:])
verify_df(
df=df,
fname=fname,
index_col=index_col,
verify_fname=verify_fname,
usecols=usecols,
)
return df # all __init__ parameters are kwargs
[docs]
def from_pickle(
fname: str,
# could be potentially 0 for the first column
index_col: Union[int, str, List] = "Sample ID",
verify_fname: bool = False,
usecols=None,
**kwargs,
) -> pd.DataFrame:
logger.warning(f"Passed unknown kwargs: {kwargs}")
df = pd.read_pickle(fname).squeeze()
if len(df.shape) == 1:
df = df.unstack(df.index.names[1:])
verify_df(
df=df,
fname=fname,
index_col=index_col,
verify_fname=verify_fname,
usecols=usecols,
)
return df # all __init__ parameters are kwargs