Compare commits

..

3 Commits

Author SHA1 Message Date
Kavi
4282e2ae70 Added AudioPath as an alias for the path definition 2025-05-16 15:13:08 +02:00
Kavi
52570738f2 Renamed load_audio_data to load_audio_and_samplerate 2025-05-16 14:56:35 +02:00
Kavi
cbd362d6ea Updated docstrings for tests 2025-05-16 14:53:35 +02:00
6 changed files with 32 additions and 30 deletions

View File

@ -99,6 +99,7 @@ consult the API documentation in the code.
import warnings import warnings
from typing import List, Optional, Tuple, BinaryIO, Any, Union from typing import List, Optional, Tuple, BinaryIO, Any, Union
from .types import AudioPath
import numpy as np import numpy as np
import torch import torch
@ -244,9 +245,7 @@ def generate_spectrogram(
def process_file( def process_file(
path: Union[ path: AudioPath,
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
],
model: DetectionModel = MODEL, model: DetectionModel = MODEL,
config: Optional[ProcessingConfiguration] = None, config: Optional[ProcessingConfiguration] = None,
device: torch.device = DEVICE, device: torch.device = DEVICE,
@ -256,9 +255,7 @@ def process_file(
Parameters Parameters
---------- ----------
path : Union[ path : AudioPath
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
]
Path to audio data. Path to audio data.
model : DetectionModel, optional model : DetectionModel, optional
Detection model. Uses default model if not specified. Detection model. Uses default model if not specified.

View File

@ -1,6 +1,10 @@
"""Types used in the code base.""" """Types used in the code base."""
from typing import List, NamedTuple, Optional, Union from typing import List, NamedTuple, Optional, Union, Any, BinaryIO
import audioread
import os
import soundfile as sf
import numpy as np import numpy as np
import torch import torch
@ -40,6 +44,9 @@ __all__ = [
"SpectrogramParameters", "SpectrogramParameters",
] ]
AudioPath = Union[
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
]
class SpectrogramParameters(TypedDict): class SpectrogramParameters(TypedDict):
"""Parameters for generating spectrograms.""" """Parameters for generating spectrograms."""

View File

@ -1,6 +1,8 @@
import warnings import warnings
from typing import Optional, Tuple, Union, Any, BinaryIO from typing import Optional, Tuple, Union, Any, BinaryIO
from ..types import AudioPath
import librosa import librosa
import librosa.core.spectrum import librosa.core.spectrum
import numpy as np import numpy as np
@ -9,7 +11,6 @@ import torch
import audioread import audioread
import os import os
import soundfile as sf import soundfile as sf
import io
from batdetect2.detector import parameters from batdetect2.detector import parameters
@ -17,7 +18,7 @@ from . import wavfile
__all__ = [ __all__ = [
"load_audio", "load_audio",
"load_audio_data", "load_audio_and_samplerate",
"generate_spectrogram", "generate_spectrogram",
"pad_audio", "pad_audio",
] ]
@ -147,9 +148,7 @@ def generate_spectrogram(
return spec, spec_for_viz return spec, spec_for_viz
def load_audio( def load_audio(
path: Union[ path: AudioPath,
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
],
time_exp_fact: float, time_exp_fact: float,
target_samp_rate: int, target_samp_rate: int,
scale: bool = False, scale: bool = False,
@ -174,13 +173,11 @@ def load_audio(
ValueError: If the audio file is stereo. ValueError: If the audio file is stereo.
""" """
sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration) sample_rate, audio_data, _ = load_audio_and_samplerate(path, time_exp_fact, target_samp_rate, scale, max_duration)
return sample_rate, audio_data return sample_rate, audio_data
def load_audio_data( def load_audio_and_samplerate(
path: Union[ path: AudioPath,
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
],
time_exp_fact: float, time_exp_fact: float,
target_samp_rate: int, target_samp_rate: int,
scale: bool = False, scale: bool = False,
@ -200,6 +197,7 @@ def load_audio_data(
Returns: Returns:
sampling_rate: The sampling rate of the audio. sampling_rate: The sampling rate of the audio.
audio_raw: The audio signal in a numpy array. audio_raw: The audio signal in a numpy array.
file_sampling_rate: The original sampling rate of the audio
Raises: Raises:
ValueError: If the audio file is stereo. ValueError: If the audio file is stereo.

View File

@ -2,7 +2,8 @@ import json
import os import os
from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO
import librosa from ..types import AudioPath
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import torch import torch
@ -736,9 +737,7 @@ def process_audio_array(
def process_file( def process_file(
path: Union[ path: AudioPath,
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
],
model: DetectionModel, model: DetectionModel,
config: ProcessingConfiguration, config: ProcessingConfiguration,
device: torch.device, device: torch.device,
@ -751,7 +750,7 @@ def process_file(
Parameters Parameters
---------- ----------
path : str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO path : AudioPath
Path to audio file. Path to audio file.
model : torch.nn.Module model : torch.nn.Module
@ -760,6 +759,9 @@ def process_file(
config : ProcessingConfiguration config : ProcessingConfiguration
Configuration for processing. Configuration for processing.
file_id: Optional[str],
Give the data an id. Defaults to the filename if path is a string. Otherwise an md5 will be calculated from the binary data.
Returns Returns
------- -------
results : Results or Any results : Results or Any
@ -773,7 +775,7 @@ def process_file(
spec_slices = [] spec_slices = []
# load audio file # load audio file
sampling_rate, audio_full, file_samp_rate = au.load_audio_data( sampling_rate, audio_full, file_samp_rate = au.load_audio_and_samplerate(
path, path,
time_exp_fact=config.get("time_expansion", 1) or 1, time_exp_fact=config.get("time_expansion", 1) or 1,
target_samp_rate=config["target_samp_rate"], target_samp_rate=config["target_samp_rate"],
@ -857,9 +859,7 @@ def process_file(
return results return results
def _generate_id(path: Union[ def _generate_id(path: AudioPath) -> str:
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
]) -> str:
""" Generate an id based on the path. """ Generate an id based on the path.
If the path is a str or PathLike it will parsed as the basename. If the path is a str or PathLike it will parsed as the basename.

View File

@ -284,7 +284,7 @@ def test_process_file_with_empty_predictions_does_not_fail(
assert len(results["pred_dict"]["annotation"]) == 0 assert len(results["pred_dict"]["annotation"]) == 0
def test_process_file_file_id_defaults_to_basename(): def test_process_file_file_id_defaults_to_basename():
"""Test that no detections are made above the nyquist frequency.""" """Test that process_file assigns basename as an id if no file_id is provided."""
# Recording donated by @@kdarras # Recording donated by @@kdarras
basename = "20230322_172000_selec2.wav" basename = "20230322_172000_selec2.wav"
path = os.path.join(DATA_DIR, basename) path = os.path.join(DATA_DIR, basename)
@ -295,7 +295,7 @@ def test_process_file_file_id_defaults_to_basename():
assert id == basename assert id == basename
def test_bytesio_file_id_defaults_to_md5(): def test_bytesio_file_id_defaults_to_md5():
"""Test that no detections are made above the nyquist frequency.""" """Test that process_file assigns an md5 sum as an id if no file_id is provided when using binary data."""
# Recording donated by @@kdarras # Recording donated by @@kdarras
basename = "20230322_172000_selec2.wav" basename = "20230322_172000_selec2.wav"
path = os.path.join(DATA_DIR, basename) path = os.path.join(DATA_DIR, basename)

View File

@ -146,9 +146,9 @@ def test_load_audio_using_bytesio():
with open(path, "rb") as f: with open(path, "rb") as f:
data = io.BytesIO(f.read()) data = io.BytesIO(f.read())
sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_and_samplerate(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_and_samplerate(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
assert expected_sample_rate == sample_rate assert expected_sample_rate == sample_rate
assert exp_file_sample_rate == file_sample_rate assert exp_file_sample_rate == file_sample_rate