Source code for alphadia.search.selection.config_df

"""Configuration DataFrames for selection parameters."""

import logging

import numba as nb
import numpy as np
import pandas as pd

from alphadia.search.jitclasses.jit_config import JITConfig

logger = logging.getLogger()



[docs]
@nb.experimental.jitclass()
class CandidateSelectionConfigJIT:
    """Numba compatible config object for the HybridCandidate class.
    Please see the documentation of the CandidateSelectionConfig class for more information on the parameters and their default values.
    """

    rt_tolerance: nb.float64
    precursor_mz_tolerance: nb.float64
    fragment_mz_tolerance: nb.float64
    mobility_tolerance: nb.float64
    isotope_tolerance: nb.float64

    peak_len_rt: nb.float64
    sigma_scale_rt: nb.float64
    peak_len_mobility: nb.float64
    sigma_scale_mobility: nb.float64

    candidate_count: nb.int64
    top_k_precursors: nb.int64
    top_k_fragments: nb.int64
    exclude_shared_ions: nb.types.bool_
    kernel_size: nb.int64

    f_mobility: nb.float64
    f_rt: nb.float64
    center_fraction: nb.float64
    min_size_mobility: nb.int64
    min_size_rt: nb.int64
    max_size_mobility: nb.int64
    max_size_rt: nb.int64

    group_channels: nb.types.bool_
    use_weighted_score: nb.types.bool_

    join_close_candidates: nb.types.bool_
    join_close_candidates_scan_threshold: nb.float64
    join_close_candidates_cycle_threshold: nb.float64

    feature_std: nb.float64[::1]
    feature_mean: nb.float64[::1]
    feature_weight: nb.float64[::1]

    def __init__(
        self,
        rt_tolerance,
        precursor_mz_tolerance,
        fragment_mz_tolerance,
        mobility_tolerance,
        isotope_tolerance,
        peak_len_rt,
        sigma_scale_rt,
        peak_len_mobility,
        sigma_scale_mobility,
        candidate_count,
        top_k_precursors,
        top_k_fragments,
        exclude_shared_ions,
        kernel_size,
        f_mobility,
        f_rt,
        center_fraction,
        min_size_mobility,
        min_size_rt,
        max_size_mobility,
        max_size_rt,
        group_channels,
        use_weighted_score,
        join_close_candidates,
        join_close_candidates_scan_threshold,
        join_close_candidates_cycle_threshold,
        feature_std,
        feature_mean,
        feature_weight,
    ):
        self.rt_tolerance = rt_tolerance
        self.precursor_mz_tolerance = precursor_mz_tolerance
        self.fragment_mz_tolerance = fragment_mz_tolerance
        self.mobility_tolerance = mobility_tolerance
        self.isotope_tolerance = isotope_tolerance

        self.peak_len_rt = peak_len_rt
        self.sigma_scale_rt = sigma_scale_rt
        self.peak_len_mobility = peak_len_mobility
        self.sigma_scale_mobility = sigma_scale_mobility

        self.candidate_count = candidate_count
        self.top_k_precursors = top_k_precursors
        self.top_k_fragments = top_k_fragments
        self.exclude_shared_ions = exclude_shared_ions
        self.kernel_size = kernel_size

        self.f_mobility = f_mobility
        self.f_rt = f_rt
        self.center_fraction = center_fraction
        self.min_size_mobility = min_size_mobility
        self.min_size_rt = min_size_rt
        self.max_size_mobility = max_size_mobility
        self.max_size_rt = max_size_rt

        self.group_channels = group_channels
        self.use_weighted_score = use_weighted_score

        self.join_close_candidates = join_close_candidates
        self.join_close_candidates_scan_threshold = join_close_candidates_scan_threshold
        self.join_close_candidates_cycle_threshold = (
            join_close_candidates_cycle_threshold
        )

        self.feature_std = feature_std
        self.feature_mean = feature_mean
        self.feature_weight = feature_weight




[docs]
class CandidateSelectionConfig(
    JITConfig
):  # TODO rename to CandidateSelectionHyperparameters
    _jit_container_type = CandidateSelectionConfigJIT


[docs]
    def __init__(self):
        super().__init__()

        self.rt_tolerance = 60.0
        self.precursor_mz_tolerance = 10.0
        self.fragment_mz_tolerance = 15.0
        self.mobility_tolerance = 0.1
        self.isotope_tolerance = 0.01

        self.peak_len_rt = 10.0
        self.sigma_scale_rt = 0.1
        self.peak_len_mobility = 0.013
        self.sigma_scale_mobility = 1.0

        self.candidate_count = 5

        self.top_k_precursors = 3
        self.top_k_fragments = 12
        self.exclude_shared_ions = True
        self.kernel_size = 30

        # parameters used during peak identification
        self.f_mobility = 1.0
        self.f_rt = 0.99
        self.center_fraction = 0.5
        self.min_size_mobility = 8
        self.min_size_rt = 3
        self.max_size_mobility = 30
        self.max_size_rt = 15

        self.group_channels = False
        self.use_weighted_score = True

        self.join_close_candidates = True
        self.join_close_candidates_scan_threshold = 0.01
        self.join_close_candidates_cycle_threshold = 0.6

        # self.feature_std = np.array([ 1.2583724, 0.91052234, 1.2126098, 14.557817, 0.04327635, 0.24623954, 0.03225865, 1.2671406,1.,1,1,1 ], np.float64)
        self.feature_std = np.ones(1, np.float64)
        self.feature_mean = np.zeros(1, np.float64)
        self.feature_weight = np.ones(1, np.float64)

        # self.feature_weight[2] = 1.
        # self.feature_weight[1] = 1.

        # self.feature_weight[11] = 1.
        # self.feature_mean = np.array([ 2.967344, 1.2160938, 1.426444, 13.960179, 0.06620345, 0.44364494, 0.03138363, 3.1453438,1.,1,1,1 ], np.float64)
        # self.feature_weight = np.array([ 0.43898424,  0.97879761,  0.72262148, 0., 0.0,  0.3174245, 0.30102549,  0.44892641, 1.,1,1,1], np.float64)


[docs]
    def validate(self):
        pass





[docs]
@nb.experimental.jitclass
class PrecursorFlatContainer:
    precursor_idx: nb.uint32[::1]

    frag_start_idx: nb.uint32[::1]
    frag_stop_idx: nb.uint32[::1]
    candidate_start_idx: nb.uint32[::1]
    candidate_stop_idx: nb.uint32[::1]

    charge: nb.uint8[::1]
    rt: nb.float32[::1]
    mobility: nb.float32[::1]
    mz: nb.float32[::1]
    isotopes: nb.float32[:, ::1]

    def __init__(
        self,
        precursor_idx,
        frag_start_idx,
        frag_stop_idx,
        candidate_start_idx,
        candidate_stop_idx,
        charge,
        rt,
        mobility,
        mz,
        isotopes,
    ):
        self.precursor_idx = precursor_idx

        self.frag_start_idx = frag_start_idx
        self.frag_stop_idx = frag_stop_idx
        self.candidate_start_idx = candidate_start_idx
        self.candidate_stop_idx = candidate_stop_idx

        self.charge = charge
        self.rt = rt
        self.mobility = mobility
        self.mz = mz
        self.isotopes = isotopes




[docs]
@nb.experimental.jitclass
class CandidateContainer:
    precursor_idx: nb.uint32[::1]
    rank: nb.uint8[::1]
    score: nb.float32[::1]

    scan_center: nb.uint32[::1]
    scan_start: nb.uint32[::1]
    scan_stop: nb.uint32[::1]

    frame_center: nb.uint32[::1]
    frame_start: nb.uint32[::1]
    frame_stop: nb.uint32[::1]

    def __init__(
        self,
        n_candidates,
    ):
        self.precursor_idx = np.zeros(n_candidates, dtype=np.uint32)
        self.rank = np.zeros(n_candidates, dtype=np.uint8)
        self.score = np.zeros(n_candidates, dtype=np.float32)

        self.scan_center = np.zeros(n_candidates, dtype=np.uint32)
        self.scan_start = np.zeros(n_candidates, dtype=np.uint32)
        self.scan_stop = np.zeros(n_candidates, dtype=np.uint32)

        self.frame_center = np.zeros(n_candidates, dtype=np.uint32)
        self.frame_start = np.zeros(n_candidates, dtype=np.uint32)
        self.frame_stop = np.zeros(n_candidates, dtype=np.uint32)

    def get_candidate_df_column_names(self) -> list[str]:
        """Get the column names for the candidate DataFrame."""
        return [
            "precursor_idx",
            "rank",
            "score",
            "scan_center",
            "scan_start",
            "scan_stop",
            "frame_center",
            "frame_start",
            "frame_stop",
        ]

    def get_candidate_df_data(self, min_score: int = 0) -> tuple[np.ndarray, ...]:
        """Prepare a tuple with the candidate data, filtering by minimum score."""
        mask = self.score > min_score

        return (
            self.precursor_idx[mask],
            self.rank[mask],
            self.score[mask],
            self.scan_center[mask],
            self.scan_start[mask],
            self.scan_stop[mask],
            self.frame_center[mask],
            self.frame_start[mask],
            self.frame_stop[mask],
        )




[docs]
def candidate_container_to_df(candidate_container: CandidateContainer) -> pd.DataFrame:
    """Convert a CandidateContainer to pd.DataFrame."""
    return pd.DataFrame(
        {
            key: value
            for key, value in zip(
                candidate_container.get_candidate_df_column_names(),
                candidate_container.get_candidate_df_data(),
                strict=True,
            )
        }
    )