Source code for alphadia.search.selection.config_df

"""Configuration DataFrames for selection parameters."""

import logging

import numba as nb
import numpy as np
import pandas as pd

from alphadia.search.jitclasses.jit_config import JITConfig

logger = logging.getLogger()


[docs] @nb.experimental.jitclass() class CandidateSelectionConfigJIT: """Numba compatible config object for the HybridCandidate class. Please see the documentation of the CandidateSelectionConfig class for more information on the parameters and their default values. """ rt_tolerance: nb.float64 precursor_mz_tolerance: nb.float64 fragment_mz_tolerance: nb.float64 mobility_tolerance: nb.float64 isotope_tolerance: nb.float64 peak_len_rt: nb.float64 sigma_scale_rt: nb.float64 peak_len_mobility: nb.float64 sigma_scale_mobility: nb.float64 candidate_count: nb.int64 top_k_precursors: nb.int64 top_k_fragments: nb.int64 exclude_shared_ions: nb.types.bool_ kernel_size: nb.int64 f_mobility: nb.float64 f_rt: nb.float64 center_fraction: nb.float64 min_size_mobility: nb.int64 min_size_rt: nb.int64 max_size_mobility: nb.int64 max_size_rt: nb.int64 group_channels: nb.types.bool_ use_weighted_score: nb.types.bool_ join_close_candidates: nb.types.bool_ join_close_candidates_scan_threshold: nb.float64 join_close_candidates_cycle_threshold: nb.float64 feature_std: nb.float64[::1] feature_mean: nb.float64[::1] feature_weight: nb.float64[::1] def __init__( self, rt_tolerance, precursor_mz_tolerance, fragment_mz_tolerance, mobility_tolerance, isotope_tolerance, peak_len_rt, sigma_scale_rt, peak_len_mobility, sigma_scale_mobility, candidate_count, top_k_precursors, top_k_fragments, exclude_shared_ions, kernel_size, f_mobility, f_rt, center_fraction, min_size_mobility, min_size_rt, max_size_mobility, max_size_rt, group_channels, use_weighted_score, join_close_candidates, join_close_candidates_scan_threshold, join_close_candidates_cycle_threshold, feature_std, feature_mean, feature_weight, ): self.rt_tolerance = rt_tolerance self.precursor_mz_tolerance = precursor_mz_tolerance self.fragment_mz_tolerance = fragment_mz_tolerance self.mobility_tolerance = mobility_tolerance self.isotope_tolerance = isotope_tolerance self.peak_len_rt = peak_len_rt self.sigma_scale_rt = sigma_scale_rt self.peak_len_mobility = peak_len_mobility self.sigma_scale_mobility = sigma_scale_mobility self.candidate_count = candidate_count self.top_k_precursors = top_k_precursors self.top_k_fragments = top_k_fragments self.exclude_shared_ions = exclude_shared_ions self.kernel_size = kernel_size self.f_mobility = f_mobility self.f_rt = f_rt self.center_fraction = center_fraction self.min_size_mobility = min_size_mobility self.min_size_rt = min_size_rt self.max_size_mobility = max_size_mobility self.max_size_rt = max_size_rt self.group_channels = group_channels self.use_weighted_score = use_weighted_score self.join_close_candidates = join_close_candidates self.join_close_candidates_scan_threshold = join_close_candidates_scan_threshold self.join_close_candidates_cycle_threshold = ( join_close_candidates_cycle_threshold ) self.feature_std = feature_std self.feature_mean = feature_mean self.feature_weight = feature_weight
[docs] class CandidateSelectionConfig( JITConfig ): # TODO rename to CandidateSelectionHyperparameters _jit_container_type = CandidateSelectionConfigJIT
[docs] def __init__(self): super().__init__() self.rt_tolerance = 60.0 self.precursor_mz_tolerance = 10.0 self.fragment_mz_tolerance = 15.0 self.mobility_tolerance = 0.1 self.isotope_tolerance = 0.01 self.peak_len_rt = 10.0 self.sigma_scale_rt = 0.1 self.peak_len_mobility = 0.013 self.sigma_scale_mobility = 1.0 self.candidate_count = 5 self.top_k_precursors = 3 self.top_k_fragments = 12 self.exclude_shared_ions = True self.kernel_size = 30 # parameters used during peak identification self.f_mobility = 1.0 self.f_rt = 0.99 self.center_fraction = 0.5 self.min_size_mobility = 8 self.min_size_rt = 3 self.max_size_mobility = 30 self.max_size_rt = 15 self.group_channels = False self.use_weighted_score = True self.join_close_candidates = True self.join_close_candidates_scan_threshold = 0.01 self.join_close_candidates_cycle_threshold = 0.6 # self.feature_std = np.array([ 1.2583724, 0.91052234, 1.2126098, 14.557817, 0.04327635, 0.24623954, 0.03225865, 1.2671406,1.,1,1,1 ], np.float64) self.feature_std = np.ones(1, np.float64) self.feature_mean = np.zeros(1, np.float64) self.feature_weight = np.ones(1, np.float64)
# self.feature_weight[2] = 1. # self.feature_weight[1] = 1. # self.feature_weight[11] = 1. # self.feature_mean = np.array([ 2.967344, 1.2160938, 1.426444, 13.960179, 0.06620345, 0.44364494, 0.03138363, 3.1453438,1.,1,1,1 ], np.float64) # self.feature_weight = np.array([ 0.43898424, 0.97879761, 0.72262148, 0., 0.0, 0.3174245, 0.30102549, 0.44892641, 1.,1,1,1], np.float64)
[docs] def validate(self): pass
[docs] @nb.experimental.jitclass class PrecursorFlatContainer: precursor_idx: nb.uint32[::1] frag_start_idx: nb.uint32[::1] frag_stop_idx: nb.uint32[::1] candidate_start_idx: nb.uint32[::1] candidate_stop_idx: nb.uint32[::1] charge: nb.uint8[::1] rt: nb.float32[::1] mobility: nb.float32[::1] mz: nb.float32[::1] isotopes: nb.float32[:, ::1] def __init__( self, precursor_idx, frag_start_idx, frag_stop_idx, candidate_start_idx, candidate_stop_idx, charge, rt, mobility, mz, isotopes, ): self.precursor_idx = precursor_idx self.frag_start_idx = frag_start_idx self.frag_stop_idx = frag_stop_idx self.candidate_start_idx = candidate_start_idx self.candidate_stop_idx = candidate_stop_idx self.charge = charge self.rt = rt self.mobility = mobility self.mz = mz self.isotopes = isotopes
[docs] @nb.experimental.jitclass class CandidateContainer: precursor_idx: nb.uint32[::1] rank: nb.uint8[::1] score: nb.float32[::1] scan_center: nb.uint32[::1] scan_start: nb.uint32[::1] scan_stop: nb.uint32[::1] frame_center: nb.uint32[::1] frame_start: nb.uint32[::1] frame_stop: nb.uint32[::1] def __init__( self, n_candidates, ): self.precursor_idx = np.zeros(n_candidates, dtype=np.uint32) self.rank = np.zeros(n_candidates, dtype=np.uint8) self.score = np.zeros(n_candidates, dtype=np.float32) self.scan_center = np.zeros(n_candidates, dtype=np.uint32) self.scan_start = np.zeros(n_candidates, dtype=np.uint32) self.scan_stop = np.zeros(n_candidates, dtype=np.uint32) self.frame_center = np.zeros(n_candidates, dtype=np.uint32) self.frame_start = np.zeros(n_candidates, dtype=np.uint32) self.frame_stop = np.zeros(n_candidates, dtype=np.uint32) def get_candidate_df_column_names(self) -> list[str]: """Get the column names for the candidate DataFrame.""" return [ "precursor_idx", "rank", "score", "scan_center", "scan_start", "scan_stop", "frame_center", "frame_start", "frame_stop", ] def get_candidate_df_data(self, min_score: int = 0) -> tuple[np.ndarray, ...]: """Prepare a tuple with the candidate data, filtering by minimum score.""" mask = self.score > min_score return ( self.precursor_idx[mask], self.rank[mask], self.score[mask], self.scan_center[mask], self.scan_start[mask], self.scan_stop[mask], self.frame_center[mask], self.frame_start[mask], self.frame_stop[mask], )
[docs] def candidate_container_to_df(candidate_container: CandidateContainer) -> pd.DataFrame: """Convert a CandidateContainer to pd.DataFrame.""" return pd.DataFrame( { key: value for key, value in zip( candidate_container.get_candidate_df_column_names(), candidate_container.get_candidate_df_data(), strict=True, ) } )