mirror of
https://github.com/macaodha/batdetect2.git
synced 2026-01-10 17:19:34 +01:00
Copy librosa PCEN implementation
This commit is contained in:
parent
c1945ebdb7
commit
4aea3fb2b0
@ -109,7 +109,7 @@ class AudioConfig(BaseConfig):
|
|||||||
|
|
||||||
resample: Optional[ResampleConfig] = Field(default_factory=ResampleConfig)
|
resample: Optional[ResampleConfig] = Field(default_factory=ResampleConfig)
|
||||||
scale: bool = SCALE_RAW_AUDIO
|
scale: bool = SCALE_RAW_AUDIO
|
||||||
center: bool = True
|
center: bool = False
|
||||||
duration: Optional[float] = DEFAULT_DURATION
|
duration: Optional[float] = DEFAULT_DURATION
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -25,6 +25,7 @@ import numpy as np
|
|||||||
import xarray as xr
|
import xarray as xr
|
||||||
from numpy.typing import DTypeLike
|
from numpy.typing import DTypeLike
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
from scipy import signal
|
||||||
from soundevent import arrays, audio
|
from soundevent import arrays, audio
|
||||||
from soundevent.arrays import operations as ops
|
from soundevent.arrays import operations as ops
|
||||||
|
|
||||||
@ -135,7 +136,7 @@ class PcenConfig(BaseConfig):
|
|||||||
Exponent (r). Controls the compression characteristic.
|
Exponent (r). Controls the compression characteristic.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
time_constant: float = 0.4
|
time_constant: float = 0.01
|
||||||
gain: float = 0.98
|
gain: float = 0.98
|
||||||
bias: float = 2
|
bias: float = 2
|
||||||
power: float = 0.5
|
power: float = 0.5
|
||||||
@ -513,6 +514,7 @@ def apply_pcen(
|
|||||||
time_constant: float = 0.4,
|
time_constant: float = 0.4,
|
||||||
gain: float = 0.98,
|
gain: float = 0.98,
|
||||||
bias: float = 2,
|
bias: float = 2,
|
||||||
|
eps: float = 1e-6,
|
||||||
power: float = 0.5,
|
power: float = 0.5,
|
||||||
) -> xr.DataArray:
|
) -> xr.DataArray:
|
||||||
"""Apply Per-Channel Energy Normalization (PCEN) to a spectrogram.
|
"""Apply Per-Channel Energy Normalization (PCEN) to a spectrogram.
|
||||||
@ -538,20 +540,45 @@ def apply_pcen(
|
|||||||
xr.DataArray
|
xr.DataArray
|
||||||
PCEN-scaled spectrogram.
|
PCEN-scaled spectrogram.
|
||||||
"""
|
"""
|
||||||
|
spec = spec * (2**31)
|
||||||
samplerate = 1 / spec.time.attrs["step"]
|
samplerate = 1 / spec.time.attrs["step"]
|
||||||
hop_size = spec.attrs["hop_size"]
|
hop_size = spec.attrs["hop_size"]
|
||||||
|
|
||||||
hop_length = int(hop_size * samplerate)
|
hop_length = int(hop_size * samplerate)
|
||||||
t_frames = time_constant * samplerate / (float(hop_length) * 10)
|
|
||||||
|
t_frames = time_constant * samplerate / hop_length
|
||||||
|
|
||||||
smoothing_constant = (np.sqrt(1 + 4 * t_frames**2) - 1) / (2 * t_frames**2)
|
smoothing_constant = (np.sqrt(1 + 4 * t_frames**2) - 1) / (2 * t_frames**2)
|
||||||
return audio.pcen(
|
|
||||||
spec * (2**31),
|
axis = spec.get_axis_num("time")
|
||||||
smooth=smoothing_constant,
|
|
||||||
gain=gain,
|
shape = tuple([1] * spec.ndim)
|
||||||
bias=bias,
|
zi = np.empty(shape)
|
||||||
power=power,
|
zi[:] = signal.lfilter_zi(
|
||||||
).astype(spec.dtype)
|
[smoothing_constant],
|
||||||
|
[1, smoothing_constant - 1],
|
||||||
|
)[:]
|
||||||
|
|
||||||
|
# Smooth the input array along the given axis
|
||||||
|
smoothed, _ = signal.lfilter(
|
||||||
|
[smoothing_constant],
|
||||||
|
[1, smoothing_constant - 1],
|
||||||
|
spec.data,
|
||||||
|
zi=zi,
|
||||||
|
axis=axis, # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
|
smooth = np.exp(-gain * (np.log(eps) + np.log1p(smoothed / eps)))
|
||||||
|
data = (bias**power) * np.expm1(
|
||||||
|
power * np.log1p(spec.data * smooth / bias)
|
||||||
|
)
|
||||||
|
|
||||||
|
return xr.DataArray(
|
||||||
|
data,
|
||||||
|
dims=spec.dims,
|
||||||
|
coords=spec.coords,
|
||||||
|
attrs=spec.attrs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def scale_log(
|
def scale_log(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user