From 230b6167bc5ae7a00eeb718b5b918775f2daa502 Mon Sep 17 00:00:00 2001 From: Kavi Date: Thu, 27 Feb 2025 08:10:27 +0100 Subject: [PATCH] Added load_audio_data() which returns the original sample rate. Changed load_audio() implementation so that it uses load_audio_data but retains its signature. du.process_file() now does not need to call get_samplerate --- batdetect2/utils/audio_utils.py | 44 +++++++++++++++++++++-------- batdetect2/utils/detector_utils.py | 3 +- tests/test_audio_utils.py | 45 +++--------------------------- 3 files changed, 38 insertions(+), 54 deletions(-) diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py index a31397c..66714f0 100644 --- a/batdetect2/utils/audio_utils.py +++ b/batdetect2/utils/audio_utils.py @@ -17,6 +17,7 @@ from . import wavfile __all__ = [ "load_audio", + "load_audio_data", "generate_spectrogram", "pad_audio", ] @@ -145,16 +146,6 @@ def generate_spectrogram( return spec, spec_for_viz -def get_samplerate( - path: Union[ - str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO - ]): - if isinstance(path, (BinaryIO, io.BytesIO)): - path.seek(0) - - with sf.SoundFile(path) as f: - return f.samplerate - def load_audio( path: Union[ str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO @@ -169,6 +160,37 @@ def load_audio( The audio is also scaled to [-1, 1] and clipped to the maximum duration. Only mono files are supported. + Args: + path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file. + target_samp_rate (int): Target sampling rate. + scale (bool): Whether to scale the audio to [-1, 1]. + max_duration (float): Maximum duration of the audio in seconds. + + Returns: + sampling_rate: The sampling rate of the audio. + audio_raw: The audio signal in a numpy array. + + Raises: + ValueError: If the audio file is stereo. + + """ + sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration) + return sample_rate, audio_data + +def load_audio_data( + path: Union[ + str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO + ], + time_exp_fact: float, + target_samp_rate: int, + scale: bool = False, + max_duration: Optional[float] = None, +) -> Tuple[int, np.ndarray, int | float]: + """Load an audio file and resample it to the target sampling rate. + + The audio is also scaled to [-1, 1] and clipped to the maximum duration. + Only mono files are supported. + Args: path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file. target_samp_rate (int): Target sampling rate. @@ -223,7 +245,7 @@ def load_audio( audio_raw = audio_raw - audio_raw.mean() audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6) - return sampling_rate, audio_raw + return sampling_rate, audio_raw, file_sampling_rate def compute_spectrogram_width( diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py index 4c1eefe..ffbcf6e 100644 --- a/batdetect2/utils/detector_utils.py +++ b/batdetect2/utils/detector_utils.py @@ -773,14 +773,13 @@ def process_file( spec_slices = [] # load audio file - sampling_rate, audio_full = au.load_audio( + sampling_rate, audio_full, file_samp_rate = au.load_audio_data( path, time_exp_fact=config.get("time_expansion", 1) or 1, target_samp_rate=config["target_samp_rate"], scale=config["scale_raw_audio"], max_duration=config.get("max_duration"), ) - file_samp_rate = au.get_samplerate(path) orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1) diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py index 9a2afc0..a6c08fb 100644 --- a/tests/test_audio_utils.py +++ b/tests/test_audio_utils.py @@ -137,57 +137,20 @@ def test_pad_audio_with_fixed_width(duration: float, width: int): resize_factor=params["resize_factor"], ) assert expected_width == width - -def test_get_samplerate_using_bytesio(): - with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f: - audio_bytes = io.BytesIO(f.read()) - - sample_rate = audio_utils.get_samplerate(audio_bytes) - - expected_sample_rate = 500000 - assert expected_sample_rate == sample_rate - -def test_load_audio_using_bytes(): - filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav" - - with open(filename, "rb") as f: - audio_bytes = io.BytesIO(f.read()) - - sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) - - expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) - - assert expected_sample_rate == sample_rate - - assert np.array_equal(audio_data, expected_audio_data) - - - -def test_get_samplerate_using_bytesio_2(): - basename = "20230322_172000_selec2.wav" - path = os.path.join(DATA_DIR, basename) - - with open(path, "rb") as f: - audio_bytes = io.BytesIO(f.read()) - - sample_rate = audio_utils.get_samplerate(audio_bytes) - - expected_sample_rate = 192_000 - assert expected_sample_rate == sample_rate - -def test_load_audio_using_bytes_2(): +def test_load_audio_using_bytesio(): basename = "20230322_172000_selec2.wav" path = os.path.join(DATA_DIR, basename) with open(path, "rb") as f: data = io.BytesIO(f.read()) - sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) - expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) + expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) assert expected_sample_rate == sample_rate + assert exp_file_sample_rate == file_sample_rate assert np.array_equal(audio_data, expected_audio_data) \ No newline at end of file