mirror of
https://github.com/macaodha/batdetect2.git
synced 2025-06-29 22:51:58 +02:00
Added docstrings to audio module
This commit is contained in:
parent
a9f91322d4
commit
23620c2233
@ -1,6 +1,7 @@
|
|||||||
"""Module containing functions for preprocessing audio clips."""
|
"""Module containing functions for preprocessing audio clips."""
|
||||||
|
|
||||||
from typing import Optional
|
from functools import partial
|
||||||
|
from typing import Callable, Optional, Protocol
|
||||||
|
|
||||||
import xarray as xr
|
import xarray as xr
|
||||||
from soundevent import data
|
from soundevent import data
|
||||||
@ -47,6 +48,20 @@ __all__ = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class AudioPreprocessor(Protocol):
|
||||||
|
def __call__(
|
||||||
|
self,
|
||||||
|
clip: data.Clip,
|
||||||
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
|
) -> xr.DataArray: ...
|
||||||
|
|
||||||
|
|
||||||
|
def build_preprocessor_from_config(
|
||||||
|
config: PreprocessingConfig,
|
||||||
|
) -> AudioPreprocessor:
|
||||||
|
return partial(preprocess_audio_clip, config=config)
|
||||||
|
|
||||||
|
|
||||||
def preprocess_audio_clip(
|
def preprocess_audio_clip(
|
||||||
clip: data.Clip,
|
clip: data.Clip,
|
||||||
config: Optional[PreprocessingConfig] = None,
|
config: Optional[PreprocessingConfig] = None,
|
||||||
|
@ -1,4 +1,26 @@
|
|||||||
from typing import Optional
|
"""Handles loading and initial preprocessing of audio waveforms.
|
||||||
|
|
||||||
|
This module provides components for loading audio data associated with
|
||||||
|
`soundevent` objects (Clips, Recordings, or raw files) and applying
|
||||||
|
fundamental waveform processing steps. These steps typically include:
|
||||||
|
|
||||||
|
1. Loading the raw audio data.
|
||||||
|
2. Adjusting the audio clip to a fixed duration (optional).
|
||||||
|
3. Resampling the audio to a target sample rate (optional).
|
||||||
|
4. Centering the waveform (DC offset removal) (optional).
|
||||||
|
5. Scaling the waveform amplitude (optional).
|
||||||
|
|
||||||
|
The processing pipeline is configurable via the `AudioConfig` data structure,
|
||||||
|
allowing for reproducible preprocessing consistent between model training and
|
||||||
|
inference. It uses the `soundevent` library for audio loading and basic array
|
||||||
|
operations, and `scipy` for resampling implementations.
|
||||||
|
|
||||||
|
The primary interface is the `AudioLoader` protocol, with
|
||||||
|
`ConfigurableAudioLoader` providing a concrete implementation driven by the
|
||||||
|
`AudioConfig`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional, Protocol
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import xarray as xr
|
import xarray as xr
|
||||||
@ -10,29 +32,334 @@ from soundevent.arrays import operations as ops
|
|||||||
|
|
||||||
from batdetect2.configs import BaseConfig
|
from batdetect2.configs import BaseConfig
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"AudioLoader",
|
||||||
|
"ResampleConfig",
|
||||||
|
"AudioConfig",
|
||||||
|
"ConfigurableAudioLoader",
|
||||||
|
"build_audio_loader",
|
||||||
|
"load_file_audio",
|
||||||
|
"load_recording_audio",
|
||||||
|
"load_clip_audio",
|
||||||
|
"adjust_audio_duration",
|
||||||
|
"resample_audio",
|
||||||
|
"TARGET_SAMPLERATE_HZ",
|
||||||
|
"SCALE_RAW_AUDIO",
|
||||||
|
"DEFAULT_DURATION",
|
||||||
|
]
|
||||||
|
|
||||||
TARGET_SAMPLERATE_HZ = 256_000
|
TARGET_SAMPLERATE_HZ = 256_000
|
||||||
|
"""Default target sample rate in Hz used if resampling is enabled."""
|
||||||
|
|
||||||
SCALE_RAW_AUDIO = False
|
SCALE_RAW_AUDIO = False
|
||||||
|
"""Default setting for whether to perform peak normalization."""
|
||||||
|
|
||||||
DEFAULT_DURATION = None
|
DEFAULT_DURATION = None
|
||||||
|
"""Default setting for target audio duration in seconds."""
|
||||||
|
|
||||||
|
|
||||||
|
class AudioLoader(Protocol):
|
||||||
|
"""Defines the interface for an audio loading and processing component.
|
||||||
|
|
||||||
|
An AudioLoader is responsible for retrieving audio data corresponding to
|
||||||
|
different soundevent objects (files, Recordings, Clips) and applying a
|
||||||
|
configured set of initial preprocessing steps. Adhering to this protocol
|
||||||
|
allows for different loading strategies or implementations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def load_file(
|
||||||
|
self,
|
||||||
|
path: data.PathLike,
|
||||||
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess audio directly from a file path.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path : PathLike
|
||||||
|
Path to the audio file.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
A directory prefix to prepend to the path if `path` is relative.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
The loaded and preprocessed audio waveform as an xarray DataArray
|
||||||
|
with time coordinates. Typically loads only the first channel.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
FileNotFoundError
|
||||||
|
If the audio file cannot be found.
|
||||||
|
Exception
|
||||||
|
If the audio file cannot be loaded or processed.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def load_recording(
|
||||||
|
self,
|
||||||
|
recording: data.Recording,
|
||||||
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess the entire audio for a Recording object.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
recording : data.Recording
|
||||||
|
The Recording object containing metadata about the audio file.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
A directory where the audio file associated with the recording
|
||||||
|
can be found, especially if the path in the recording is relative.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
The loaded and preprocessed audio waveform. Typically loads only
|
||||||
|
the first channel.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
FileNotFoundError
|
||||||
|
If the audio file associated with the recording cannot be found.
|
||||||
|
Exception
|
||||||
|
If the audio file cannot be loaded or processed.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def load_clip(
|
||||||
|
self,
|
||||||
|
clip: data.Clip,
|
||||||
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess the audio segment defined by a Clip object.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
clip : data.Clip
|
||||||
|
The Clip object specifying the recording and the start/end times
|
||||||
|
of the segment to load.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
A directory where the audio file associated with the clip's
|
||||||
|
recording can be found.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
The loaded and preprocessed audio waveform for the specified clip
|
||||||
|
duration. Typically loads only the first channel.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
FileNotFoundError
|
||||||
|
If the audio file associated with the clip cannot be found.
|
||||||
|
Exception
|
||||||
|
If the audio file cannot be loaded or processed.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
class ResampleConfig(BaseConfig):
|
class ResampleConfig(BaseConfig):
|
||||||
|
"""Configuration for audio resampling.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
samplerate : int, default=256000
|
||||||
|
The target sample rate in Hz to resample the audio to. Must be > 0.
|
||||||
|
mode : str, default="poly"
|
||||||
|
The resampling algorithm to use. Options:
|
||||||
|
- "poly": Polyphase resampling using `scipy.signal.resample_poly`.
|
||||||
|
Generally fast.
|
||||||
|
- "fourier": Resampling via Fourier method using
|
||||||
|
`scipy.signal.resample`. May handle non-integer
|
||||||
|
resampling factors differently.
|
||||||
|
"""
|
||||||
|
|
||||||
samplerate: int = Field(default=TARGET_SAMPLERATE_HZ, gt=0)
|
samplerate: int = Field(default=TARGET_SAMPLERATE_HZ, gt=0)
|
||||||
mode: str = "poly"
|
mode: str = "poly"
|
||||||
|
|
||||||
|
|
||||||
class AudioConfig(BaseConfig):
|
class AudioConfig(BaseConfig):
|
||||||
|
"""Configuration for loading and initial audio preprocessing.
|
||||||
|
|
||||||
|
Defines the sequence of operations applied to raw audio waveforms after
|
||||||
|
loading, controlling steps like resampling, scaling, centering, and
|
||||||
|
duration adjustment.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
resample : ResampleConfig, optional
|
||||||
|
Configuration for resampling. If provided (or defaulted), audio will
|
||||||
|
be resampled to the specified `samplerate` using the specified `mode`.
|
||||||
|
If set to `None` in the config file, resampling is skipped.
|
||||||
|
Defaults to a ResampleConfig instance with standard settings.
|
||||||
|
scale : bool, default=False
|
||||||
|
If True, scales the audio waveform using peak normalization so that
|
||||||
|
its maximum absolute amplitude is approximately 1.0. If False
|
||||||
|
(default), no amplitude scaling is applied.
|
||||||
|
center : bool, default=True
|
||||||
|
If True (default), centers the waveform by subtracting its mean
|
||||||
|
(DC offset removal). If False, the waveform is not centered.
|
||||||
|
duration : float, optional
|
||||||
|
If set to a float value (seconds), the loaded audio clip will be
|
||||||
|
adjusted (cropped or padded with zeros) to exactly this duration.
|
||||||
|
If None (default), the original duration is kept.
|
||||||
|
"""
|
||||||
|
|
||||||
resample: Optional[ResampleConfig] = Field(default_factory=ResampleConfig)
|
resample: Optional[ResampleConfig] = Field(default_factory=ResampleConfig)
|
||||||
scale: bool = SCALE_RAW_AUDIO
|
scale: bool = SCALE_RAW_AUDIO
|
||||||
center: bool = True
|
center: bool = True
|
||||||
duration: Optional[float] = DEFAULT_DURATION
|
duration: Optional[float] = DEFAULT_DURATION
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigurableAudioLoader:
|
||||||
|
"""Concrete implementation of the `AudioLoader` driven by `AudioConfig`.
|
||||||
|
|
||||||
|
This class loads audio and applies preprocessing steps (resampling,
|
||||||
|
scaling, centering, duration adjustment) based on the settings provided
|
||||||
|
in an `AudioConfig` object during initialization. It delegates the actual
|
||||||
|
work to module-level functions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
config: AudioConfig,
|
||||||
|
):
|
||||||
|
"""Initialize the ConfigurableAudioLoader.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
config : AudioConfig
|
||||||
|
The configuration object specifying the desired preprocessing steps
|
||||||
|
and parameters.
|
||||||
|
"""
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
def load_file(
|
||||||
|
self,
|
||||||
|
path: data.PathLike,
|
||||||
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess audio directly from a file path.
|
||||||
|
|
||||||
|
Implements the `AudioLoader.load_file` method by delegating to the
|
||||||
|
`load_file_audio` function, passing the stored configuration.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path : PathLike
|
||||||
|
Path to the audio file.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
A directory prefix if `path` is relative.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
Loaded and preprocessed waveform (first channel).
|
||||||
|
"""
|
||||||
|
return load_file_audio(path, config=self.config, audio_dir=audio_dir)
|
||||||
|
|
||||||
|
def load_recording(
|
||||||
|
self,
|
||||||
|
recording: data.Recording,
|
||||||
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess the entire audio for a Recording object.
|
||||||
|
|
||||||
|
Implements the `AudioLoader.load_recording` method by delegating to the
|
||||||
|
`load_recording_audio` function, passing the stored configuration.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
recording : data.Recording
|
||||||
|
The Recording object.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
Directory containing the audio file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
Loaded and preprocessed waveform (first channel).
|
||||||
|
"""
|
||||||
|
return load_recording_audio(
|
||||||
|
recording, config=self.config, audio_dir=audio_dir
|
||||||
|
)
|
||||||
|
|
||||||
|
def load_clip(
|
||||||
|
self,
|
||||||
|
clip: data.Clip,
|
||||||
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess the audio segment defined by a Clip object.
|
||||||
|
|
||||||
|
Implements the `AudioLoader.load_clip` method by delegating to the
|
||||||
|
`load_clip_audio` function, passing the stored configuration.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
clip : data.Clip
|
||||||
|
The Clip object specifying the segment.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
Directory containing the audio file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
Loaded and preprocessed waveform segment (first channel).
|
||||||
|
"""
|
||||||
|
return load_clip_audio(clip, config=self.config, audio_dir=audio_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def build_audio_loader(
|
||||||
|
config: AudioConfig,
|
||||||
|
) -> AudioLoader:
|
||||||
|
"""Factory function to create an AudioLoader based on configuration.
|
||||||
|
|
||||||
|
Instantiates and returns a `ConfigurableAudioLoader` initialized with
|
||||||
|
the provided `AudioConfig`. The return type is `AudioLoader`, adhering
|
||||||
|
to the protocol.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
config : AudioConfig
|
||||||
|
The configuration object specifying preprocessing steps.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
AudioLoader
|
||||||
|
An instance of `ConfigurableAudioLoader` ready to load and process audio
|
||||||
|
according to the configuration.
|
||||||
|
"""
|
||||||
|
return ConfigurableAudioLoader(config=config)
|
||||||
|
|
||||||
|
|
||||||
def load_file_audio(
|
def load_file_audio(
|
||||||
path: data.PathLike,
|
path: data.PathLike,
|
||||||
config: Optional[AudioConfig] = None,
|
config: Optional[AudioConfig] = None,
|
||||||
audio_dir: Optional[data.PathLike] = None,
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
dtype: DTypeLike = np.float32, # type: ignore
|
dtype: DTypeLike = np.float32, # type: ignore
|
||||||
) -> xr.DataArray:
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess audio from a file path using specified config.
|
||||||
|
|
||||||
|
Creates a `soundevent.data.Recording` object from the file path and then
|
||||||
|
delegates the loading and processing to `load_recording_audio`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path : PathLike
|
||||||
|
Path to the audio file.
|
||||||
|
config : AudioConfig, optional
|
||||||
|
Audio processing configuration. If None, default settings defined
|
||||||
|
in `AudioConfig` are used.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
Directory prefix if `path` is relative.
|
||||||
|
dtype : DTypeLike, default=np.float32
|
||||||
|
Target NumPy data type for the loaded audio array.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
Loaded and preprocessed waveform (first channel only).
|
||||||
|
"""
|
||||||
recording = data.Recording.from_file(path)
|
recording = data.Recording.from_file(path)
|
||||||
return load_recording_audio(
|
return load_recording_audio(
|
||||||
recording,
|
recording,
|
||||||
@ -48,6 +375,28 @@ def load_recording_audio(
|
|||||||
audio_dir: Optional[data.PathLike] = None,
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
dtype: DTypeLike = np.float32, # type: ignore
|
dtype: DTypeLike = np.float32, # type: ignore
|
||||||
) -> xr.DataArray:
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess the entire audio content of a recording using config.
|
||||||
|
|
||||||
|
Creates a `soundevent.data.Clip` spanning the full duration of the
|
||||||
|
recording and then delegates the loading and processing to `load_clip_audio`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
recording : data.Recording
|
||||||
|
The Recording object containing metadata and file path.
|
||||||
|
config : AudioConfig, optional
|
||||||
|
Audio processing configuration. If None, default settings are used.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
Directory containing the audio file, used if the path in `recording`
|
||||||
|
is relative.
|
||||||
|
dtype : DTypeLike, default=np.float32
|
||||||
|
Target NumPy data type for the loaded audio array.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
Loaded and preprocessed waveform (first channel only).
|
||||||
|
"""
|
||||||
clip = data.Clip(
|
clip = data.Clip(
|
||||||
recording=recording,
|
recording=recording,
|
||||||
start_time=0,
|
start_time=0,
|
||||||
@ -67,6 +416,49 @@ def load_clip_audio(
|
|||||||
audio_dir: Optional[data.PathLike] = None,
|
audio_dir: Optional[data.PathLike] = None,
|
||||||
dtype: DTypeLike = np.float32, # type: ignore
|
dtype: DTypeLike = np.float32, # type: ignore
|
||||||
) -> xr.DataArray:
|
) -> xr.DataArray:
|
||||||
|
"""Load and preprocess a specific audio clip segment based on config.
|
||||||
|
|
||||||
|
This is the core function performing the configured processing pipeline:
|
||||||
|
1. Loads the specified clip segment using `soundevent.audio.load_clip`.
|
||||||
|
2. Selects the first audio channel.
|
||||||
|
3. Adjusts duration (crop/pad) if `config.duration` is set.
|
||||||
|
4. Resamples if `config.resample` is configured.
|
||||||
|
5. Centers (DC offset removal) if `config.center` is True.
|
||||||
|
6. Scales (peak normalization) if `config.scale` is True.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
clip : data.Clip
|
||||||
|
The Clip object defining the audio segment and source recording.
|
||||||
|
config : AudioConfig, optional
|
||||||
|
Audio processing configuration. If None, a default `AudioConfig` is
|
||||||
|
used.
|
||||||
|
audio_dir : PathLike, optional
|
||||||
|
Directory containing the source audio file specified in the clip's
|
||||||
|
recording.
|
||||||
|
dtype : DTypeLike, default=np.float32
|
||||||
|
Target NumPy data type for the processed audio array.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
The loaded and preprocessed waveform segment as an xarray DataArray
|
||||||
|
with time coordinates.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
FileNotFoundError
|
||||||
|
If the underlying audio file cannot be found.
|
||||||
|
Exception
|
||||||
|
If audio loading or processing fails for other reasons (e.g., invalid
|
||||||
|
format, resampling error).
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
- **Mono Processing:** This function currently loads and processes only the
|
||||||
|
**first channel** (channel 0) of the audio file. Any other channels
|
||||||
|
are ignored.
|
||||||
|
"""
|
||||||
config = config or AudioConfig()
|
config = config or AudioConfig()
|
||||||
|
|
||||||
wav = (
|
wav = (
|
||||||
@ -96,6 +488,30 @@ def adjust_audio_duration(
|
|||||||
wave: xr.DataArray,
|
wave: xr.DataArray,
|
||||||
duration: float,
|
duration: float,
|
||||||
) -> xr.DataArray:
|
) -> xr.DataArray:
|
||||||
|
"""Adjust the duration of an audio waveform array via cropping or padding.
|
||||||
|
|
||||||
|
If the current duration is longer than the target, it crops the array
|
||||||
|
from the beginning. If shorter, it pads the array with zeros at the end
|
||||||
|
using `soundevent.arrays.extend_dim`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
wave : xr.DataArray
|
||||||
|
The input audio waveform with a 'time' dimension and coordinates.
|
||||||
|
duration : float
|
||||||
|
The target duration in seconds.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
The waveform adjusted to the target duration. Returns the input
|
||||||
|
unmodified if duration already matches or if the wave is empty.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
If `duration` is negative.
|
||||||
|
"""
|
||||||
start_time, end_time = arrays.get_dim_range(wave, dim="time")
|
start_time, end_time = arrays.get_dim_range(wave, dim="time")
|
||||||
current_duration = end_time - start_time
|
current_duration = end_time - start_time
|
||||||
|
|
||||||
@ -124,6 +540,36 @@ def resample_audio(
|
|||||||
mode: str = "poly",
|
mode: str = "poly",
|
||||||
dtype: DTypeLike = np.float32, # type: ignore
|
dtype: DTypeLike = np.float32, # type: ignore
|
||||||
) -> xr.DataArray:
|
) -> xr.DataArray:
|
||||||
|
"""Resample an audio waveform DataArray to a target sample rate.
|
||||||
|
|
||||||
|
Updates the 'time' coordinate axis according to the new sample rate and
|
||||||
|
number of samples. Uses either polyphase (`scipy.signal.resample_poly`)
|
||||||
|
or Fourier method (`scipy.signal.resample`) based on the `mode`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
wav : xr.DataArray
|
||||||
|
Input audio waveform with 'time' dimension and coordinates.
|
||||||
|
samplerate : int, default=TARGET_SAMPLERATE_HZ
|
||||||
|
Target sample rate in Hz.
|
||||||
|
mode : str, default="poly"
|
||||||
|
Resampling algorithm: "poly" or "fourier".
|
||||||
|
dtype : DTypeLike, default=np.float32
|
||||||
|
Target data type for the resampled array.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
xr.DataArray
|
||||||
|
Resampled waveform with updated time coordinates. Returns the input
|
||||||
|
unmodified (but dtype cast) if the sample rate is already correct or
|
||||||
|
if the input array is empty.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
If `wav` lacks a 'time' dimension, the original sample rate cannot
|
||||||
|
be determined, `samplerate` is non-positive, or `mode` is invalid.
|
||||||
|
"""
|
||||||
if "time" not in wav.dims:
|
if "time" not in wav.dims:
|
||||||
raise ValueError("Audio must have a time dimension")
|
raise ValueError("Audio must have a time dimension")
|
||||||
|
|
||||||
@ -180,6 +626,33 @@ def resample_audio_poly(
|
|||||||
sr_new: int,
|
sr_new: int,
|
||||||
axis: int = -1,
|
axis: int = -1,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
|
"""Resample a numpy array using `scipy.signal.resample_poly`.
|
||||||
|
|
||||||
|
This method is often preferred for signals when the ratio of new
|
||||||
|
to old sample rates can be expressed as a rational number. It uses
|
||||||
|
polyphase filtering.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
array : np.ndarray
|
||||||
|
The input array to resample.
|
||||||
|
sr_orig : int
|
||||||
|
The original sample rate in Hz.
|
||||||
|
sr_new : int
|
||||||
|
The target sample rate in Hz.
|
||||||
|
axis : int, default=-1
|
||||||
|
The axis of `array` along which to resample.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
np.ndarray
|
||||||
|
The array resampled to the target sample rate.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
If sample rates are not positive.
|
||||||
|
"""
|
||||||
gcd = np.gcd(sr_orig, sr_new)
|
gcd = np.gcd(sr_orig, sr_new)
|
||||||
return resample_poly(
|
return resample_poly(
|
||||||
array.values,
|
array.values,
|
||||||
@ -195,5 +668,28 @@ def resample_audio_fourier(
|
|||||||
sr_new: int,
|
sr_new: int,
|
||||||
axis: int = -1,
|
axis: int = -1,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
|
"""Resample a numpy array using `scipy.signal.resample`.
|
||||||
|
|
||||||
|
This method uses FFTs to resample the signal.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
array : np.ndarray
|
||||||
|
The input array to resample.
|
||||||
|
num : int
|
||||||
|
The desired number of samples in the output array along `axis`.
|
||||||
|
axis : int, default=-1
|
||||||
|
The axis of `array` along which to resample.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
np.ndarray
|
||||||
|
The array resampled to have `num` samples along `axis`.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
If `num` is negative.
|
||||||
|
"""
|
||||||
ratio = sr_new / sr_orig
|
ratio = sr_new / sr_orig
|
||||||
return resample(array, int(array.shape[axis] * ratio), axis=axis) # type: ignore
|
return resample(array, int(array.shape[axis] * ratio), axis=axis) # type: ignore
|
||||||
|
Loading…
Reference in New Issue
Block a user