import logging
import pandas as pd
from alphadia.calibration.estimator import (
CalibrationEstimator,
calibration_model_provider,
)
from alphadia.constants.keys import CalibCols, ConstantsClass
from alphadia.workflow.managers.base import BaseManager
logger = logging.getLogger()
EstimatorGroups = dict[str, dict[str, CalibrationEstimator]]
CalibrationConfig = dict[str, dict[str, dict[str, str | int | list[str]]]]
[docs]
class CalibrationGroups(metaclass=ConstantsClass):
"""String constants for calibration groups."""
FRAGMENT = "fragment"
PRECURSOR = "precursor"
[docs]
class CalibrationEstimators(metaclass=ConstantsClass):
"""String constants for calibration estimators."""
MZ = "mz"
RT = "rt"
MOBILITY = "mobility"
# Configuration for the calibration manager.
# Note: The mapping to which columns to actually use it currently done in ColumnNameHandler. # TODO: rethink this coupling
CALIBRATION_GROUPS_CONFIG: CalibrationConfig = {
CalibrationGroups.FRAGMENT: {
CalibrationEstimators.MZ: {
"input_columns": [CalibCols.MZ_LIBRARY],
"target_columns": [CalibCols.MZ_OBSERVED],
"output_columns": [CalibCols.MZ_CALIBRATED],
"model": "LOESSRegression",
"model_args": {"n_kernels": 2},
"transform_deviation": "1e6",
}
},
CalibrationGroups.PRECURSOR: {
CalibrationEstimators.MZ: {
"input_columns": [CalibCols.MZ_LIBRARY],
"target_columns": [CalibCols.MZ_OBSERVED],
"output_columns": [CalibCols.MZ_CALIBRATED],
"model": "LOESSRegression",
"model_args": {"n_kernels": 2},
"transform_deviation": "1e6",
},
CalibrationEstimators.RT: {
"input_columns": [CalibCols.RT_LIBRARY],
"target_columns": [CalibCols.RT_OBSERVED],
"output_columns": [CalibCols.RT_CALIBRATED],
"model": "LOESSRegression",
"model_args": {"n_kernels": 6},
},
CalibrationEstimators.MOBILITY: {
"input_columns": [CalibCols.MOBILITY_LIBRARY],
"target_columns": [CalibCols.MOBILITY_OBSERVED],
"output_columns": [CalibCols.MOBILITY_CALIBRATED],
"model": "LOESSRegression",
"model_args": {"n_kernels": 2},
},
},
}
[docs]
class CalibrationManager(BaseManager):
[docs]
def __init__(
self,
path: None | str = None,
load_from_file: bool = True,
has_ms1: bool = True,
has_mobility: bool = True,
**kwargs,
):
"""Contains, updates and applies all calibrations for a single run.
Calibrations are grouped into calibration groups. Each calibration group is applied to a single data structure (precursor dataframe, fragment fataframe, etc.). Each calibration group contains multiple estimators which each calibrate a single property (mz, rt, etc.). Each estimator is a `Calibration` object which contains the estimator function.
Parameters
----------
path : str, default=None
Path where the current parameter set is saved to and loaded from.
load_from_file : bool, default=True
If True, the manager will be loaded from file if it exists.
has_ms1 : bool, default=True
If True, the calibration manager will include MS1 calibration. This will include an MS1 estimator in the precursor group.
has_mobility : bool, default=True
If True, the calibration manager will include mobility calibration. This will include a mobility estimator in the precursor group.
kwargs :
Will be passed to the parent class `BaseManager`, need to be valid keyword arguments.
"""
super().__init__(path=path, load_from_file=load_from_file, **kwargs)
self._has_mobility = has_mobility
self._has_ms1 = has_ms1
self.reporter.log_string(f"Initializing {self.__class__.__name__}")
self.reporter.log_event("initializing", {"name": f"{self.__class__.__name__}"})
if not self.is_loaded_from_file:
self.all_fitted = False
self.estimator_groups: EstimatorGroups = self.setup_estimator_groups(
CALIBRATION_GROUPS_CONFIG
)
@property
def estimator_groups(self) -> EstimatorGroups:
"""List of calibration groups."""
return self._estimator_groups
@estimator_groups.setter
def estimator_groups(self, value: EstimatorGroups):
self._estimator_groups = value
[docs]
def setup_estimator_groups(self, calibration_config: CalibrationConfig):
"""Load calibration configuration.
Each calibration config is a list of calibration groups which consist of multiple estimators.
For each estimator the `model` and `model_args` are used to request a model from the calibration_model_provider and to initialize it.
The estimator is then initialized with the `Calibration` class and added to the group.
Parameters
----------
calibration_config : CalibrationConfig
Calibration configuration
Example
-------
Create a calibration manager with a single group and a single estimator:
.. code-block:: python
calibration_manager = CalibrationManager()
calibration_manager.load_config({
'mz_calibration': [
{
'name': 'mz',
'model': 'LOESSRegression',
'model_args': { 'n_kernels': 2 },
'input_columns': [CalibCols.MZ_LIBRARY],
'target_columns': [CalibCols.MZ_OBSERVED],
'output_columns': [CalibCols.MZ_CALIBRATED],
'transform_deviation': 1e6
}
]
})
"""
self.reporter.log_string("Setting up calibration estimators ..")
estimator_groups: EstimatorGroups = {}
for group_name, estimators_params_in_group in calibration_config.items():
self.reporter.log_string(
f"Found {len(estimators_params_in_group)} estimator(s) in calibration group '{group_name}'"
)
initialized_estimators: dict[str, CalibrationEstimator] = {}
for estimator_name, estimator_params in estimators_params_in_group.items():
if (
not self._has_mobility
and estimator_name == CalibrationEstimators.MOBILITY
):
self.reporter.log_string(
f"Skipping estimator '{CalibrationEstimators.MOBILITY}' in group '{group_name}' as it is not available in the raw data",
)
continue
if (
not self._has_ms1
and group_name == CalibrationGroups.PRECURSOR
and estimator_name == CalibrationEstimators.MZ
):
self.reporter.log_string(
f"Skipping estimator '{CalibrationEstimators.MZ}' in group '{group_name}' as it is not available in the raw data",
)
continue
model = calibration_model_provider.get_model(estimator_params["model"])
model_args = estimator_params.get("model_args", {})
self.reporter.log_string(
f"Initializing estimator '{estimator_name}' in group '{group_name}' with '{estimator_params}' .."
)
initialized_estimators[estimator_name] = CalibrationEstimator(
name=estimator_name,
model=model(**model_args),
input_columns=estimator_params["input_columns"],
target_columns=estimator_params["target_columns"],
output_columns=estimator_params["output_columns"],
transform_deviation=estimator_params.get(
"transform_deviation", None
),
)
estimator_groups[group_name] = initialized_estimators
self.reporter.log_string("Done setting up calibration estimators.")
return estimator_groups
[docs]
def get_estimator(
self, group_name: str, estimator_name: str
) -> CalibrationEstimator | None:
"""Get an estimator from a calibration group.
Parameters
----------
group_name : str
Name of the calibration group
estimator_name : str
Name of the estimator
Returns
-------
CalibrationEstimator | None
The estimator object or None if not found
"""
try:
return self.estimator_groups[group_name][estimator_name]
except KeyError:
return None
[docs]
def fit(
self,
df: pd.DataFrame,
group_name: str,
plot: bool = True,
figure_path: None | str = None,
):
"""Fit all estimators in a calibration group.
Parameters
----------
df : pandas.DataFrame
Dataframe containing the input and target columns
group_name : str
Name of the calibration group
plot: bool, default=True
If True, a plot of the calibration is generated.
figure_path: str, default=None
If set, the generated plot is saved to the given path.
"""
# only iterate over the first group with the given name
for estimator in self.estimator_groups[group_name].values():
self.reporter.log_string(
f"Fitting estimator '{estimator.name}' in calibration group '{group_name}' .."
)
estimator.fit(df, plot=plot, figure_path=figure_path)
all_fitted = True
for group in self.estimator_groups.values():
for estimator in group.values():
all_fitted &= estimator.is_fitted
self.all_fitted = all_fitted
[docs]
def predict(self, df: pd.DataFrame, group_name: str):
"""Predict all estimators in a calibration group.
Parameters
----------
df : pandas.DataFrame
Dataframe containing the input and target columns
group_name : str
Name of the calibration group
"""
for estimator in self.estimator_groups[group_name].values():
self.reporter.log_string(
f"Predicting estimator '{estimator.name}' in calibration group '{group_name}' .."
)
estimator.predict(df, inplace=True)