Copy librosa PCEN implementation

2026-01-10 17:19:34 +01:00 · 2025-08-17 18:02:40 +01:00 · 2025-08-17 18:02:40 +01:00 · 4aea3fb2b0
commit 4aea3fb2b0
parent c1945ebdb7
2 changed files with 37 additions and 10 deletions
--- a/src/batdetect2/preprocess/audio.py
+++ b/src/batdetect2/preprocess/audio.py
@ -109,7 +109,7 @@ class AudioConfig(BaseConfig):
    resample: Optional[ResampleConfig] = Field(default_factory=ResampleConfig)
    scale: bool = SCALE_RAW_AUDIO
-    center: bool = True
+    center: bool = False
    duration: Optional[float] = DEFAULT_DURATION
--- a/src/batdetect2/preprocess/spectrogram.py
+++ b/src/batdetect2/preprocess/spectrogram.py
@ -25,6 +25,7 @@ import numpy as np
 import xarray as xr
 from numpy.typing import DTypeLike
 from pydantic import Field
 from scipy import signal
 from soundevent import arrays, audio
 from soundevent.arrays import operations as ops
@ -135,7 +136,7 @@ class PcenConfig(BaseConfig):
        Exponent (r). Controls the compression characteristic.
    """
-    time_constant: float = 0.4
+    time_constant: float = 0.01
    gain: float = 0.98
    bias: float = 2
    power: float = 0.5
@ -513,6 +514,7 @@ def apply_pcen(
    time_constant: float = 0.4,
    gain: float = 0.98,
    bias: float = 2,
    eps: float = 1e-6,
    power: float = 0.5,
 ) -> xr.DataArray:
    """Apply Per-Channel Energy Normalization (PCEN) to a spectrogram.
@ -538,20 +540,45 @@ def apply_pcen(
    xr.DataArray
        PCEN-scaled spectrogram.
    """
    spec = spec * (2**31)
    samplerate = 1 / spec.time.attrs["step"]
    hop_size = spec.attrs["hop_size"]
    hop_length = int(hop_size * samplerate)
-    t_frames = time_constant * samplerate / (float(hop_length) * 10)
+
    t_frames = time_constant * samplerate / hop_length
    smoothing_constant = (np.sqrt(1 + 4 * t_frames**2) - 1) / (2 * t_frames**2)
-    return audio.pcen(
+
-        spec * (2**31),
+    axis = spec.get_axis_num("time")
-        smooth=smoothing_constant,
+
-        gain=gain,
+    shape = tuple([1] * spec.ndim)
-        bias=bias,
+    zi = np.empty(shape)
-        power=power,
+    zi[:] = signal.lfilter_zi(
-    ).astype(spec.dtype)
+        [smoothing_constant],
        [1, smoothing_constant - 1],
    )[:]
    # Smooth the input array along the given axis
    smoothed, _ = signal.lfilter(
        [smoothing_constant],
        [1, smoothing_constant - 1],
        spec.data,
        zi=zi,
        axis=axis,  # type: ignore
    )
    smooth = np.exp(-gain * (np.log(eps) + np.log1p(smoothed / eps)))
    data = (bias**power) * np.expm1(
        power * np.log1p(spec.data * smooth / bias)
    )
    return xr.DataArray(
        data,
        dims=spec.dims,
        coords=spec.coords,
        attrs=spec.attrs,
    )
 def scale_log(