"""Utility functions for scoring calculations in AlphaDIA.
This module provides numba-accelerated utility functions for various scoring calculations,
including correlation coefficients, profile normalization, and statistical operations.
"""
import numba as nb
import numpy as np
from numba import types
from alphadia.utils import USE_NUMBA_CACHING
[docs]
@nb.njit(
types.Array(types.float32, 1, "A")(
types.Array(types.float32, 1, "A"), types.Array(types.float32, 2, "A")
),
cache=USE_NUMBA_CACHING,
)
def correlation_coefficient(x: np.ndarray, ys: np.ndarray) -> np.ndarray:
"""Calculate the correlation coefficient between x and each y in ys.
Returns a numpy array of the same length as ys.
Parameters
----------
x : np.ndarray[float32, ndim=1]
Base array of shape (n,)
ys : np.ndarray[float32, ndim=2]
Array of shape (m, n) containing arrays to correlate with x
Returns
-------
np.ndarray[float32, ndim=1]
Array of shape (m,) containing correlation coefficients.
Returns 0 for cases where either x or y has zero variance.
"""
n = len(x)
# Calculate means
mx = x.mean()
# Calculate mean for each y array manually since axis parameter isn't supported
m = len(ys)
my = np.zeros(m, dtype=np.float32)
for i in range(m):
my[i] = np.sum(ys[i]) / n
# Initialize array for results
result = np.zeros(m, dtype=np.float32)
x_minus_mx = x - mx
var_x = np.sum(x_minus_mx * x_minus_mx) / n
# Calculate correlation coefficient for each y in ys
for i in range(m):
# Calculate covariance and variances
ys_minus_my = ys[i] - my[i]
cov = np.sum(x_minus_mx * ys_minus_my) / n
var_y = np.sum(ys_minus_my * ys_minus_my) / n
var_xy = var_x * var_y
# Handle zero variance cases
if var_xy == 0:
result[i] = 0
else:
result[i] = cov / np.sqrt(var_xy)
return result
[docs]
@nb.njit(
types.Array(types.float32, 2, "A")(
types.Array(types.float32, 2, "A"), types.Optional(types.int64)
),
cache=USE_NUMBA_CACHING,
)
def normalize_profiles(
intensity_slice: np.ndarray, center_dilations: int = 1
) -> np.ndarray:
"""Calculate normalized intensity profiles from dense array.
Parameters
----------
intensity_slice : np.ndarray[float32, ndim=2]
Array where first dimension represents different measurements,
and subsequent dimensions represent mz and rt
center_dilations : int, optional
Number of points to consider around center for normalization.
Default is 1.
Returns
-------
np.ndarray[float32, ndim=2]
Array of normalized intensity profiles with same shape as input,
where profiles with zero center intensity are set to zero
"""
center_idx = intensity_slice.shape[1] // 2
# Calculate mean manually instead of using axis parameter
center_intensity = np.ones((intensity_slice.shape[0], 1))
for i in range(intensity_slice.shape[0]):
window = intensity_slice[
i, center_idx - center_dilations : center_idx + center_dilations + 1
]
center_intensity[i, 0] = np.sum(window) / window.shape[0]
# Create normalized output array, initialized to zeros
center_intensity_normalized = np.zeros_like(intensity_slice, dtype=np.float32)
# Only normalize profiles where center intensity > 0
for i in range(intensity_slice.shape[0]):
if center_intensity[i, 0] > 0:
center_intensity_normalized[i] = intensity_slice[i] / center_intensity[i, 0]
return center_intensity_normalized