Source code for alphadia.fragcomp.utils
"""Utility methods for fragment competition."""
import logging
import numpy as np
import pandas as pd
logger = logging.getLogger(__name__)
[docs]
def add_frag_start_stop_idx(
psm_df: pd.DataFrame, frag_df: pd.DataFrame
) -> pd.DataFrame:
"""The fragment dataframe is indexed by the precursor index.
This function adds the start and stop indices of the fragments to the PSM dataframe.
Parameters
----------
psm_df: pd.DataFrame
The PSM dataframe.
frag_df: pd.DataFrame
The fragment dataframe.
Returns
-------
pd.DataFrame
The PSM dataframe with the start and stop indices of the fragments.
"""
if "_frag_start_idx" in psm_df.columns and "_frag_stop_idx" in psm_df.columns:
logger.warning(
"Fragment start and stop indices already present in PSM dataframe. Skipping."
)
return psm_df
frag_df["frag_idx"] = np.arange(len(frag_df))
index_df = frag_df.groupby("_candidate_idx", as_index=False).agg(
_frag_start_idx=pd.NamedAgg("frag_idx", "min"),
_frag_stop_idx=pd.NamedAgg("frag_idx", "max"),
)
index_df["_frag_stop_idx"] += 1
return psm_df.merge(index_df, "inner", on="_candidate_idx")
[docs]
def candidate_hash(precursor_idx: np.ndarray, rank: np.ndarray) -> np.ndarray:
"""Create a 64 bit hash (np.ndarray[np.uint64]) from precursor_idx and rank.
The precursor_idx is the lower 32 bits.
The rank is the next 8 bits.
Note: the explicit casting is important to ensure it returns a 64-bit integer.
"""
return (precursor_idx.astype(np.int64) + (rank.astype(np.int64) << 32)).astype(
np.uint64
)