From 66ac7e608fdc2ee597aa445acc5621f5eeefccc1 Mon Sep 17 00:00:00 2001 From: Kavi Date: Tue, 25 Feb 2025 14:24:48 +0100 Subject: [PATCH 01/11] Changed the signature of api.process_file, au.load_audio and du.process_file. This allows users to use the same args for processing data as librosa.load() --- batdetect2/api.py | 23 ++++++++++++++++++----- batdetect2/utils/audio_utils.py | 28 ++++++++++++++++++++-------- batdetect2/utils/detector_utils.py | 29 ++++++++++++++++++++--------- tests/test_audio_utils.py | 11 ++++++++++- 4 files changed, 68 insertions(+), 23 deletions(-) diff --git a/batdetect2/api.py b/batdetect2/api.py index 4d04f42..1a5f14c 100644 --- a/batdetect2/api.py +++ b/batdetect2/api.py @@ -97,7 +97,7 @@ consult the API documentation in the code. """ import warnings -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, BinaryIO, Any, Union import numpy as np import torch @@ -120,6 +120,10 @@ from batdetect2.types import ( ) from batdetect2.utils.detector_utils import list_audio_files, load_model +import audioread +import os +import soundfile as sf + # Remove warnings from torch warnings.filterwarnings("ignore", category=UserWarning, module="torch") @@ -238,32 +242,41 @@ def generate_spectrogram( def process_file( - audio_file: str, + path: Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ], model: DetectionModel = MODEL, config: Optional[ProcessingConfiguration] = None, device: torch.device = DEVICE, + file_id: str | None = None ) -> du.RunResults: """Process audio file with model. Parameters ---------- - audio_file : str - Path to audio file. + path : Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ] + Path to audio data. model : DetectionModel, optional Detection model. Uses default model if not specified. config : Optional[ProcessingConfiguration], optional Processing configuration, by default None (uses default parameters). device : torch.device, optional Device to use, by default tries to use GPU if available. + file_id: Optional[str], + Give the data an id. If path is a string path to a file this can be ignored and + the file_id will be the basename of the file. """ if config is None: config = CONFIG return du.process_file( - audio_file, + path, model, config, device, + file_id ) diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py index a60ea94..77ba5f5 100644 --- a/batdetect2/utils/audio_utils.py +++ b/batdetect2/utils/audio_utils.py @@ -1,11 +1,15 @@ import warnings -from typing import Optional, Tuple +from typing import Optional, Tuple, Union, Any, BinaryIO import librosa import librosa.core.spectrum import numpy as np import torch +import audioread +import os +import soundfile as sf + from batdetect2.detector import parameters from . import wavfile @@ -140,21 +144,29 @@ def generate_spectrogram( return spec, spec_for_viz +def get_samplerate( + path: Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ]): + with sf.SoundFile(path) as f: + return f.samplerate def load_audio( - audio_file: str, + path: Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ], time_exp_fact: float, target_samp_rate: int, scale: bool = False, max_duration: Optional[float] = None, -) -> Tuple[int, np.ndarray]: +) -> Tuple[int, np.ndarray ]: """Load an audio file and resample it to the target sampling rate. The audio is also scaled to [-1, 1] and clipped to the maximum duration. Only mono files are supported. Args: - audio_file (str): Path to the audio file. + path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file. target_samp_rate (int): Target sampling rate. scale (bool): Whether to scale the audio to [-1, 1]. max_duration (float): Maximum duration of the audio in seconds. @@ -170,16 +182,16 @@ def load_audio( with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=wavfile.WavFileWarning) # sampling_rate, audio_raw = wavfile.read(audio_file) - audio_raw, sampling_rate = librosa.load( - audio_file, + audio_raw, file_sampling_rate = librosa.load( + path, sr=None, dtype=np.float32, ) - + if len(audio_raw.shape) > 1: raise ValueError("Currently does not handle stereo files") - sampling_rate = sampling_rate * time_exp_fact + sampling_rate = file_sampling_rate * time_exp_fact # resample - need to do this after correcting for time expansion sampling_rate_old = sampling_rate diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py index 63643b6..f7392f8 100644 --- a/batdetect2/utils/detector_utils.py +++ b/batdetect2/utils/detector_utils.py @@ -1,6 +1,6 @@ import json import os -from typing import Any, Iterator, List, Optional, Tuple, Union +from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO import librosa import numpy as np @@ -31,6 +31,11 @@ from batdetect2.types import ( SpectrogramParameters, ) +import audioread +import os +import soundfile as sf + + __all__ = [ "load_model", "list_audio_files", @@ -729,10 +734,13 @@ def process_audio_array( def process_file( - audio_file: str, + path: Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ], model: DetectionModel, config: ProcessingConfiguration, device: torch.device, + file_id: str | None = None ) -> Union[RunResults, Any]: """Process a single audio file with detection model. @@ -741,7 +749,7 @@ def process_file( Parameters ---------- - audio_file : str + path : str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO Path to audio file. model : torch.nn.Module @@ -762,18 +770,17 @@ def process_file( cnn_feats = [] spec_slices = [] - # Get original sampling rate - file_samp_rate = librosa.get_samplerate(audio_file) - orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1) - # load audio file sampling_rate, audio_full = au.load_audio( - audio_file, + path, time_exp_fact=config.get("time_expansion", 1) or 1, target_samp_rate=config["target_samp_rate"], scale=config["scale_raw_audio"], max_duration=config.get("max_duration"), ) + file_samp_rate = au.get_samplerate(path) + + orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1) # loop through larger file and split into chunks # TODO: fix so that it overlaps correctly and takes care of @@ -823,9 +830,13 @@ def process_file( spec_slices, ) + _file_id = file_id + if _file_id is None: + _file_id = os.path.basename(path) if isinstance(path, str) else "unknown" + # convert results to a dictionary in the right format results = convert_results( - file_id=os.path.basename(audio_file), + file_id=_file_id, time_exp=config.get("time_expansion", 1) or 1, duration=audio_full.shape[0] / float(sampling_rate), params=config, diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py index 1b489bc..ebe2f0f 100644 --- a/tests/test_audio_utils.py +++ b/tests/test_audio_utils.py @@ -6,7 +6,8 @@ from hypothesis import strategies as st from batdetect2.detector import parameters from batdetect2.utils import audio_utils, detector_utils - +import io +import requests @given(duration=st.floats(min_value=0.1, max_value=2)) def test_can_compute_correct_spectrogram_width(duration: float): @@ -134,3 +135,11 @@ def test_pad_audio_with_fixed_width(duration: float, width: int): resize_factor=params["resize_factor"], ) assert expected_width == width + +def test_get_samplerate_using_bytesio(): + audio_url="https://anon.erda.au.dk/share_redirect/e5c7G2AWmg/F1/20240724/2MU02597/BIOBD01_20240626_231650.wav" + + sample_rate = audio_utils.get_samplerate(io.BytesIO(requests.get(audio_url).content)) + + expected_sample_rate = 256000 + assert expected_sample_rate == sample_rate From 838a1ade0d93b8c0723f1fa138e2ad8dd3730a04 Mon Sep 17 00:00:00 2001 From: Kavi Date: Tue, 25 Feb 2025 14:46:40 +0100 Subject: [PATCH 02/11] Updated get_samplerate test to use example data file. --- tests/test_audio_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py index ebe2f0f..c223ecf 100644 --- a/tests/test_audio_utils.py +++ b/tests/test_audio_utils.py @@ -137,9 +137,10 @@ def test_pad_audio_with_fixed_width(duration: float, width: int): assert expected_width == width def test_get_samplerate_using_bytesio(): - audio_url="https://anon.erda.au.dk/share_redirect/e5c7G2AWmg/F1/20240724/2MU02597/BIOBD01_20240626_231650.wav" + with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f: + audio_bytes = io.BytesIO(f.read()) - sample_rate = audio_utils.get_samplerate(io.BytesIO(requests.get(audio_url).content)) + sample_rate = audio_utils.get_samplerate(audio_bytes) - expected_sample_rate = 256000 + expected_sample_rate = 500000 assert expected_sample_rate == sample_rate From 6af7fef3167c870b950ceea5c64349ec76c4cbfd Mon Sep 17 00:00:00 2001 From: Kavi Date: Wed, 26 Feb 2025 14:11:11 +0100 Subject: [PATCH 03/11] Fix 'unknown' id by providing a _generate_id() function. --- batdetect2/utils/detector_utils.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py index f7392f8..4c1eefe 100644 --- a/batdetect2/utils/detector_utils.py +++ b/batdetect2/utils/detector_utils.py @@ -33,8 +33,10 @@ from batdetect2.types import ( import audioread import os +import io import soundfile as sf - +import hashlib +import uuid __all__ = [ "load_model", @@ -832,7 +834,7 @@ def process_file( _file_id = file_id if _file_id is None: - _file_id = os.path.basename(path) if isinstance(path, str) else "unknown" + _file_id = _generate_id(path) # convert results to a dictionary in the right format results = convert_results( @@ -856,6 +858,24 @@ def process_file( return results +def _generate_id(path: Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ]) -> str: + """ Generate an id based on the path. + + If the path is a str or PathLike it will parsed as the basename. + This should ensure backwards compatibility with previous versions. + """ + if isinstance(path, str) or isinstance(path, os.PathLike): + return os.path.basename(path) + elif isinstance(path, (BinaryIO, io.BytesIO)): + path.seek(0) + md5 = hashlib.md5(path.read()).hexdigest() + path.seek(0) + return md5 + else: + return str(uuid.uuid4()) + def summarize_results(results, predictions, config): """Print summary of results.""" From e10e270de49e81daed463ec7d8713e97d2bdeefe Mon Sep 17 00:00:00 2001 From: Kavi Date: Wed, 26 Feb 2025 14:12:09 +0100 Subject: [PATCH 04/11] Fix error in get_samplerate when reading io.BytesIO. --- batdetect2/utils/audio_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py index 77ba5f5..a31397c 100644 --- a/batdetect2/utils/audio_utils.py +++ b/batdetect2/utils/audio_utils.py @@ -9,6 +9,7 @@ import torch import audioread import os import soundfile as sf +import io from batdetect2.detector import parameters @@ -147,7 +148,10 @@ def generate_spectrogram( def get_samplerate( path: Union[ str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ]): + ]): + if isinstance(path, (BinaryIO, io.BytesIO)): + path.seek(0) + with sf.SoundFile(path) as f: return f.samplerate From 47dbdc79c236867f0d5c25daa5b86e0a1737eb2a Mon Sep 17 00:00:00 2001 From: Kavi Date: Wed, 26 Feb 2025 14:12:42 +0100 Subject: [PATCH 05/11] Added tests for api and load_audio --- tests/test_api.py | 27 +++++++++++++++++++++ tests/test_audio_utils.py | 49 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/tests/test_api.py b/tests/test_api.py index e828c9e..51149e1 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -10,11 +10,13 @@ import torch from torch import nn from batdetect2 import api +import io PKG_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) TEST_DATA_DIR = os.path.join(PKG_DIR, "example_data", "audio") TEST_DATA = glob(os.path.join(TEST_DATA_DIR, "*.wav")) +DATA_DIR = os.path.join(os.path.dirname(__file__), "data") def test_load_model_with_default_params(): """Test loading model with default parameters.""" @@ -280,3 +282,28 @@ def test_process_file_with_empty_predictions_does_not_fail( assert results is not None assert len(results["pred_dict"]["annotation"]) == 0 + +def test_process_file_file_id_defaults_to_basename(): + """Test that no detections are made above the nyquist frequency.""" + # Recording donated by @@kdarras + basename = "20230322_172000_selec2.wav" + path = os.path.join(DATA_DIR, basename) + + output = api.process_file(path) + predictions = output["pred_dict"] + id = predictions["id"] + assert id == basename + +def test_bytesio_file_id_defaults_to_md5(): + """Test that no detections are made above the nyquist frequency.""" + # Recording donated by @@kdarras + basename = "20230322_172000_selec2.wav" + path = os.path.join(DATA_DIR, basename) + + with open(path, "rb") as f: + data = io.BytesIO(f.read()) + + output = api.process_file(data) + predictions = output["pred_dict"] + id = predictions["id"] + assert id == "7ade9ebf1a9fe5477ff3a2dc57001929" diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py index c223ecf..9a2afc0 100644 --- a/tests/test_audio_utils.py +++ b/tests/test_audio_utils.py @@ -7,7 +7,9 @@ from hypothesis import strategies as st from batdetect2.detector import parameters from batdetect2.utils import audio_utils, detector_utils import io -import requests +import os + +DATA_DIR = os.path.join(os.path.dirname(__file__), "data") @given(duration=st.floats(min_value=0.1, max_value=2)) def test_can_compute_correct_spectrogram_width(duration: float): @@ -144,3 +146,48 @@ def test_get_samplerate_using_bytesio(): expected_sample_rate = 500000 assert expected_sample_rate == sample_rate + + + +def test_load_audio_using_bytes(): + filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav" + + with open(filename, "rb") as f: + audio_bytes = io.BytesIO(f.read()) + + sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + + expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + + assert expected_sample_rate == sample_rate + + assert np.array_equal(audio_data, expected_audio_data) + + + +def test_get_samplerate_using_bytesio_2(): + basename = "20230322_172000_selec2.wav" + path = os.path.join(DATA_DIR, basename) + + with open(path, "rb") as f: + audio_bytes = io.BytesIO(f.read()) + + sample_rate = audio_utils.get_samplerate(audio_bytes) + + expected_sample_rate = 192_000 + assert expected_sample_rate == sample_rate + +def test_load_audio_using_bytes_2(): + basename = "20230322_172000_selec2.wav" + path = os.path.join(DATA_DIR, basename) + + with open(path, "rb") as f: + data = io.BytesIO(f.read()) + + sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + + expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + + assert expected_sample_rate == sample_rate + + assert np.array_equal(audio_data, expected_audio_data) \ No newline at end of file From f62bc99ab24e3c503b8aaa9162c713b3749ba22f Mon Sep 17 00:00:00 2001 From: Kavi Date: Wed, 26 Feb 2025 14:13:21 +0100 Subject: [PATCH 06/11] Added api method to process a URL --- batdetect2/api.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/batdetect2/api.py b/batdetect2/api.py index 1a5f14c..8978b63 100644 --- a/batdetect2/api.py +++ b/batdetect2/api.py @@ -123,6 +123,8 @@ from batdetect2.utils.detector_utils import list_audio_files, load_model import audioread import os import soundfile as sf +import requests +import io # Remove warnings from torch warnings.filterwarnings("ignore", category=UserWarning, module="torch") @@ -279,6 +281,49 @@ def process_file( file_id ) +def process_url( + url: str, + model: DetectionModel = MODEL, + config: Optional[ProcessingConfiguration] = None, + device: torch.device = DEVICE, + file_id: str | None = None +) -> du.RunResults: + """Process audio file with model. + + Parameters + ---------- + url : str + HTTP URL to load the audio data from + model : DetectionModel, optional + Detection model. Uses default model if not specified. + config : Optional[ProcessingConfiguration], optional + Processing configuration, by default None (uses default parameters). + device : torch.device, optional + Device to use, by default tries to use GPU if available. + file_id: Optional[str], + Give the data an id. Defaults to the URL + """ + if config is None: + config = CONFIG + + if file_id is None: + file_id = url + + response = requests.get(url) + + # Raise exception on HTTP error + response.raise_for_status() + + # Retrieve body as raw bytes + raw_audio_data = response.content + + return du.process_file( + io.BytesIO(raw_audio_data), + model, + config, + device, + file_id + ) def process_spectrogram( spec: torch.Tensor, From 230b6167bc5ae7a00eeb718b5b918775f2daa502 Mon Sep 17 00:00:00 2001 From: Kavi Date: Thu, 27 Feb 2025 08:10:27 +0100 Subject: [PATCH 07/11] Added load_audio_data() which returns the original sample rate. Changed load_audio() implementation so that it uses load_audio_data but retains its signature. du.process_file() now does not need to call get_samplerate --- batdetect2/utils/audio_utils.py | 44 +++++++++++++++++++++-------- batdetect2/utils/detector_utils.py | 3 +- tests/test_audio_utils.py | 45 +++--------------------------- 3 files changed, 38 insertions(+), 54 deletions(-) diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py index a31397c..66714f0 100644 --- a/batdetect2/utils/audio_utils.py +++ b/batdetect2/utils/audio_utils.py @@ -17,6 +17,7 @@ from . import wavfile __all__ = [ "load_audio", + "load_audio_data", "generate_spectrogram", "pad_audio", ] @@ -145,16 +146,6 @@ def generate_spectrogram( return spec, spec_for_viz -def get_samplerate( - path: Union[ - str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ]): - if isinstance(path, (BinaryIO, io.BytesIO)): - path.seek(0) - - with sf.SoundFile(path) as f: - return f.samplerate - def load_audio( path: Union[ str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO @@ -169,6 +160,37 @@ def load_audio( The audio is also scaled to [-1, 1] and clipped to the maximum duration. Only mono files are supported. + Args: + path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file. + target_samp_rate (int): Target sampling rate. + scale (bool): Whether to scale the audio to [-1, 1]. + max_duration (float): Maximum duration of the audio in seconds. + + Returns: + sampling_rate: The sampling rate of the audio. + audio_raw: The audio signal in a numpy array. + + Raises: + ValueError: If the audio file is stereo. + + """ + sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration) + return sample_rate, audio_data + +def load_audio_data( + path: Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ], + time_exp_fact: float, + target_samp_rate: int, + scale: bool = False, + max_duration: Optional[float] = None, +) -> Tuple[int, np.ndarray, int | float]: + """Load an audio file and resample it to the target sampling rate. + + The audio is also scaled to [-1, 1] and clipped to the maximum duration. + Only mono files are supported. + Args: path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file. target_samp_rate (int): Target sampling rate. @@ -223,7 +245,7 @@ def load_audio( audio_raw = audio_raw - audio_raw.mean() audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6) - return sampling_rate, audio_raw + return sampling_rate, audio_raw, file_sampling_rate def compute_spectrogram_width( diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py index 4c1eefe..ffbcf6e 100644 --- a/batdetect2/utils/detector_utils.py +++ b/batdetect2/utils/detector_utils.py @@ -773,14 +773,13 @@ def process_file( spec_slices = [] # load audio file - sampling_rate, audio_full = au.load_audio( + sampling_rate, audio_full, file_samp_rate = au.load_audio_data( path, time_exp_fact=config.get("time_expansion", 1) or 1, target_samp_rate=config["target_samp_rate"], scale=config["scale_raw_audio"], max_duration=config.get("max_duration"), ) - file_samp_rate = au.get_samplerate(path) orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1) diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py index 9a2afc0..a6c08fb 100644 --- a/tests/test_audio_utils.py +++ b/tests/test_audio_utils.py @@ -137,57 +137,20 @@ def test_pad_audio_with_fixed_width(duration: float, width: int): resize_factor=params["resize_factor"], ) assert expected_width == width - -def test_get_samplerate_using_bytesio(): - with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f: - audio_bytes = io.BytesIO(f.read()) - - sample_rate = audio_utils.get_samplerate(audio_bytes) - - expected_sample_rate = 500000 - assert expected_sample_rate == sample_rate - -def test_load_audio_using_bytes(): - filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav" - - with open(filename, "rb") as f: - audio_bytes = io.BytesIO(f.read()) - - sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) - - expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) - - assert expected_sample_rate == sample_rate - - assert np.array_equal(audio_data, expected_audio_data) - - - -def test_get_samplerate_using_bytesio_2(): - basename = "20230322_172000_selec2.wav" - path = os.path.join(DATA_DIR, basename) - - with open(path, "rb") as f: - audio_bytes = io.BytesIO(f.read()) - - sample_rate = audio_utils.get_samplerate(audio_bytes) - - expected_sample_rate = 192_000 - assert expected_sample_rate == sample_rate - -def test_load_audio_using_bytes_2(): +def test_load_audio_using_bytesio(): basename = "20230322_172000_selec2.wav" path = os.path.join(DATA_DIR, basename) with open(path, "rb") as f: data = io.BytesIO(f.read()) - sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) - expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) assert expected_sample_rate == sample_rate + assert exp_file_sample_rate == file_sample_rate assert np.array_equal(audio_data, expected_audio_data) \ No newline at end of file From 54ca55558775c43b72c768f6360c996b58629b9a Mon Sep 17 00:00:00 2001 From: Kavi Date: Thu, 27 Feb 2025 13:51:58 +0100 Subject: [PATCH 08/11] Fixed code to support Python3.9 syntax --- batdetect2/api.py | 4 ++-- batdetect2/utils/audio_utils.py | 2 +- batdetect2/utils/detector_utils.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/batdetect2/api.py b/batdetect2/api.py index 8978b63..da2d106 100644 --- a/batdetect2/api.py +++ b/batdetect2/api.py @@ -250,7 +250,7 @@ def process_file( model: DetectionModel = MODEL, config: Optional[ProcessingConfiguration] = None, device: torch.device = DEVICE, - file_id: str | None = None + file_id: Optional[str] = None ) -> du.RunResults: """Process audio file with model. @@ -286,7 +286,7 @@ def process_url( model: DetectionModel = MODEL, config: Optional[ProcessingConfiguration] = None, device: torch.device = DEVICE, - file_id: str | None = None + file_id: Optional[str] = None ) -> du.RunResults: """Process audio file with model. diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py index 66714f0..134fee7 100644 --- a/batdetect2/utils/audio_utils.py +++ b/batdetect2/utils/audio_utils.py @@ -185,7 +185,7 @@ def load_audio_data( target_samp_rate: int, scale: bool = False, max_duration: Optional[float] = None, -) -> Tuple[int, np.ndarray, int | float]: +) -> Tuple[int, np.ndarray, Union[float, int]]: """Load an audio file and resample it to the target sampling rate. The audio is also scaled to [-1, 1] and clipped to the maximum duration. diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py index ffbcf6e..f6e5776 100644 --- a/batdetect2/utils/detector_utils.py +++ b/batdetect2/utils/detector_utils.py @@ -742,7 +742,7 @@ def process_file( model: DetectionModel, config: ProcessingConfiguration, device: torch.device, - file_id: str | None = None + file_id: Optional[str] = None ) -> Union[RunResults, Any]: """Process a single audio file with detection model. From cbd362d6ea82e69d8535424556923edf42619e45 Mon Sep 17 00:00:00 2001 From: Kavi Date: Fri, 16 May 2025 14:53:35 +0200 Subject: [PATCH 09/11] Updated docstrings for tests --- tests/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 51149e1..d46786d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -284,7 +284,7 @@ def test_process_file_with_empty_predictions_does_not_fail( assert len(results["pred_dict"]["annotation"]) == 0 def test_process_file_file_id_defaults_to_basename(): - """Test that no detections are made above the nyquist frequency.""" + """Test that process_file assigns basename as an id if no file_id is provided.""" # Recording donated by @@kdarras basename = "20230322_172000_selec2.wav" path = os.path.join(DATA_DIR, basename) @@ -295,7 +295,7 @@ def test_process_file_file_id_defaults_to_basename(): assert id == basename def test_bytesio_file_id_defaults_to_md5(): - """Test that no detections are made above the nyquist frequency.""" + """Test that process_file assigns an md5 sum as an id if no file_id is provided when using binary data.""" # Recording donated by @@kdarras basename = "20230322_172000_selec2.wav" path = os.path.join(DATA_DIR, basename) From 52570738f27d163c12d0d1ed14c025bee782d54b Mon Sep 17 00:00:00 2001 From: Kavi Date: Fri, 16 May 2025 14:56:35 +0200 Subject: [PATCH 10/11] Renamed load_audio_data to load_audio_and_samplerate --- batdetect2/utils/audio_utils.py | 8 ++++---- batdetect2/utils/detector_utils.py | 6 ++++-- tests/test_audio_utils.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py index 134fee7..a2cfe44 100644 --- a/batdetect2/utils/audio_utils.py +++ b/batdetect2/utils/audio_utils.py @@ -9,7 +9,6 @@ import torch import audioread import os import soundfile as sf -import io from batdetect2.detector import parameters @@ -17,7 +16,7 @@ from . import wavfile __all__ = [ "load_audio", - "load_audio_data", + "load_audio_and_samplerate", "generate_spectrogram", "pad_audio", ] @@ -174,10 +173,10 @@ def load_audio( ValueError: If the audio file is stereo. """ - sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration) + sample_rate, audio_data, _ = load_audio_and_samplerate(path, time_exp_fact, target_samp_rate, scale, max_duration) return sample_rate, audio_data -def load_audio_data( +def load_audio_and_samplerate( path: Union[ str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO ], @@ -200,6 +199,7 @@ def load_audio_data( Returns: sampling_rate: The sampling rate of the audio. audio_raw: The audio signal in a numpy array. + file_sampling_rate: The original sampling rate of the audio Raises: ValueError: If the audio file is stereo. diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py index f6e5776..93155b1 100644 --- a/batdetect2/utils/detector_utils.py +++ b/batdetect2/utils/detector_utils.py @@ -2,7 +2,6 @@ import json import os from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO -import librosa import numpy as np import pandas as pd import torch @@ -759,6 +758,9 @@ def process_file( config : ProcessingConfiguration Configuration for processing. + + file_id: Optional[str], + Give the data an id. Defaults to the filename if path is a string. Otherwise Returns ------- @@ -773,7 +775,7 @@ def process_file( spec_slices = [] # load audio file - sampling_rate, audio_full, file_samp_rate = au.load_audio_data( + sampling_rate, audio_full, file_samp_rate = au.load_audio_and_samplerate( path, time_exp_fact=config.get("time_expansion", 1) or 1, target_samp_rate=config["target_samp_rate"], diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py index a6c08fb..ed64b15 100644 --- a/tests/test_audio_utils.py +++ b/tests/test_audio_utils.py @@ -146,9 +146,9 @@ def test_load_audio_using_bytesio(): with open(path, "rb") as f: data = io.BytesIO(f.read()) - sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_and_samplerate(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) - expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_and_samplerate(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) assert expected_sample_rate == sample_rate assert exp_file_sample_rate == file_sample_rate From 4282e2ae70a50848012f495fbfbffd82443a368d Mon Sep 17 00:00:00 2001 From: Kavi Date: Fri, 16 May 2025 15:13:08 +0200 Subject: [PATCH 11/11] Added AudioPath as an alias for the path definition --- batdetect2/api.py | 9 +++------ batdetect2/types.py | 9 ++++++++- batdetect2/utils/audio_utils.py | 10 ++++------ batdetect2/utils/detector_utils.py | 14 ++++++-------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/batdetect2/api.py b/batdetect2/api.py index da2d106..86bf55b 100644 --- a/batdetect2/api.py +++ b/batdetect2/api.py @@ -99,6 +99,7 @@ consult the API documentation in the code. import warnings from typing import List, Optional, Tuple, BinaryIO, Any, Union +from .types import AudioPath import numpy as np import torch @@ -244,9 +245,7 @@ def generate_spectrogram( def process_file( - path: Union[ - str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ], + path: AudioPath, model: DetectionModel = MODEL, config: Optional[ProcessingConfiguration] = None, device: torch.device = DEVICE, @@ -256,9 +255,7 @@ def process_file( Parameters ---------- - path : Union[ - str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ] + path : AudioPath Path to audio data. model : DetectionModel, optional Detection model. Uses default model if not specified. diff --git a/batdetect2/types.py b/batdetect2/types.py index 57a60b4..3f22862 100644 --- a/batdetect2/types.py +++ b/batdetect2/types.py @@ -1,6 +1,10 @@ """Types used in the code base.""" -from typing import List, NamedTuple, Optional, Union +from typing import List, NamedTuple, Optional, Union, Any, BinaryIO + +import audioread +import os +import soundfile as sf import numpy as np import torch @@ -40,6 +44,9 @@ __all__ = [ "SpectrogramParameters", ] +AudioPath = Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ] class SpectrogramParameters(TypedDict): """Parameters for generating spectrograms.""" diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py index a2cfe44..b89cdca 100644 --- a/batdetect2/utils/audio_utils.py +++ b/batdetect2/utils/audio_utils.py @@ -1,6 +1,8 @@ import warnings from typing import Optional, Tuple, Union, Any, BinaryIO +from ..types import AudioPath + import librosa import librosa.core.spectrum import numpy as np @@ -146,9 +148,7 @@ def generate_spectrogram( return spec, spec_for_viz def load_audio( - path: Union[ - str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ], + path: AudioPath, time_exp_fact: float, target_samp_rate: int, scale: bool = False, @@ -177,9 +177,7 @@ def load_audio( return sample_rate, audio_data def load_audio_and_samplerate( - path: Union[ - str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ], + path: AudioPath, time_exp_fact: float, target_samp_rate: int, scale: bool = False, diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py index 93155b1..f96c5d7 100644 --- a/batdetect2/utils/detector_utils.py +++ b/batdetect2/utils/detector_utils.py @@ -2,6 +2,8 @@ import json import os from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO +from ..types import AudioPath + import numpy as np import pandas as pd import torch @@ -735,9 +737,7 @@ def process_audio_array( def process_file( - path: Union[ - str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ], + path: AudioPath, model: DetectionModel, config: ProcessingConfiguration, device: torch.device, @@ -750,7 +750,7 @@ def process_file( Parameters ---------- - path : str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + path : AudioPath Path to audio file. model : torch.nn.Module @@ -760,7 +760,7 @@ def process_file( Configuration for processing. file_id: Optional[str], - Give the data an id. Defaults to the filename if path is a string. Otherwise + Give the data an id. Defaults to the filename if path is a string. Otherwise an md5 will be calculated from the binary data. Returns ------- @@ -859,9 +859,7 @@ def process_file( return results -def _generate_id(path: Union[ - str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ]) -> str: +def _generate_id(path: AudioPath) -> str: """ Generate an id based on the path. If the path is a str or PathLike it will parsed as the basename.