Copy librosa PCEN implementation

This commit is contained in:
mbsantiago 2025-08-17 18:02:40 +01:00
parent c1945ebdb7
commit 4aea3fb2b0
2 changed files with 37 additions and 10 deletions

View File

@ -109,7 +109,7 @@ class AudioConfig(BaseConfig):
resample: Optional[ResampleConfig] = Field(default_factory=ResampleConfig)
scale: bool = SCALE_RAW_AUDIO
center: bool = True
center: bool = False
duration: Optional[float] = DEFAULT_DURATION

View File

@ -25,6 +25,7 @@ import numpy as np
import xarray as xr
from numpy.typing import DTypeLike
from pydantic import Field
from scipy import signal
from soundevent import arrays, audio
from soundevent.arrays import operations as ops
@ -135,7 +136,7 @@ class PcenConfig(BaseConfig):
Exponent (r). Controls the compression characteristic.
"""
time_constant: float = 0.4
time_constant: float = 0.01
gain: float = 0.98
bias: float = 2
power: float = 0.5
@ -513,6 +514,7 @@ def apply_pcen(
time_constant: float = 0.4,
gain: float = 0.98,
bias: float = 2,
eps: float = 1e-6,
power: float = 0.5,
) -> xr.DataArray:
"""Apply Per-Channel Energy Normalization (PCEN) to a spectrogram.
@ -538,20 +540,45 @@ def apply_pcen(
xr.DataArray
PCEN-scaled spectrogram.
"""
spec = spec * (2**31)
samplerate = 1 / spec.time.attrs["step"]
hop_size = spec.attrs["hop_size"]
hop_length = int(hop_size * samplerate)
t_frames = time_constant * samplerate / (float(hop_length) * 10)
t_frames = time_constant * samplerate / hop_length
smoothing_constant = (np.sqrt(1 + 4 * t_frames**2) - 1) / (2 * t_frames**2)
return audio.pcen(
spec * (2**31),
smooth=smoothing_constant,
gain=gain,
bias=bias,
power=power,
).astype(spec.dtype)
axis = spec.get_axis_num("time")
shape = tuple([1] * spec.ndim)
zi = np.empty(shape)
zi[:] = signal.lfilter_zi(
[smoothing_constant],
[1, smoothing_constant - 1],
)[:]
# Smooth the input array along the given axis
smoothed, _ = signal.lfilter(
[smoothing_constant],
[1, smoothing_constant - 1],
spec.data,
zi=zi,
axis=axis, # type: ignore
)
smooth = np.exp(-gain * (np.log(eps) + np.log1p(smoothed / eps)))
data = (bias**power) * np.expm1(
power * np.log1p(spec.data * smooth / bias)
)
return xr.DataArray(
data,
dims=spec.dims,
coords=spec.coords,
attrs=spec.attrs,
)
def scale_log(