Added test to ensure pad_audio function, and utils, are working as expected

2026-01-10 00:59:34 +01:00 · 2024-11-10 22:39:30 +00:00 · 2024-11-10 22:39:30 +00:00 · 7906842a16
commit 7906842a16
parent a4b22d6590
1 changed files with 136 additions and 0 deletions
--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@ -0,0 +1,136 @@
+import numpy as np
+import torch
+import torch.nn.functional as F
+from hypothesis import given
+from hypothesis import strategies as st
+
+from batdetect2.detector import parameters
+from batdetect2.utils import audio_utils, detector_utils
+
+
+@given(duration=st.floats(min_value=0.1, max_value=2))
+def test_can_compute_correct_spectrogram_width(duration: float):
+    samplerate = parameters.TARGET_SAMPLERATE_HZ
+    params = parameters.DEFAULT_SPECTROGRAM_PARAMETERS
+
+    length = int(duration * samplerate)
+    audio = np.random.rand(length)
+
+    spectrogram, _ = audio_utils.generate_spectrogram(
+        audio,
+        samplerate,
+        params,
+    )
+
+    # convert to pytorch
+    spectrogram = torch.from_numpy(spectrogram)
+
+    # add batch and channel dimensions
+    spectrogram = spectrogram.unsqueeze(0).unsqueeze(0)
+
+    # resize the spec
+    resize_factor = params["resize_factor"]
+    spec_op_shape = (
+        int(params["spec_height"] * resize_factor),
+        int(spectrogram.shape[-1] * resize_factor),
+    )
+    spectrogram = F.interpolate(
+        spectrogram,
+        size=spec_op_shape,
+        mode="bilinear",
+        align_corners=False,
+    )
+
+    expected_width = audio_utils.compute_spectrogram_width(
+        length,
+        samplerate=parameters.TARGET_SAMPLERATE_HZ,
+        window_duration=params["fft_win_length"],
+        window_overlap=params["fft_overlap"],
+        resize_factor=params["resize_factor"],
+    )
+
+    assert spectrogram.shape[-1] == expected_width
+
+
+@given(duration=st.floats(min_value=0.1, max_value=2))
+def test_pad_audio_without_fixed_size(duration: float):
+    # Test the pad_audio function
+    # This function is used to pad audio with zeros to a specific length
+    # It is used in the generate_spectrogram function
+    # The function is tested with a simplepas
+    samplerate = parameters.TARGET_SAMPLERATE_HZ
+    params = parameters.DEFAULT_SPECTROGRAM_PARAMETERS
+
+    length = int(duration * samplerate)
+    audio = np.random.rand(length)
+
+    # pad the audio to be divisible by divide factor
+    padded_audio = audio_utils.pad_audio(
+        audio,
+        samplerate=samplerate,
+        window_duration=params["fft_win_length"],
+        window_overlap=params["fft_overlap"],
+        resize_factor=params["resize_factor"],
+        divide_factor=params["spec_divide_factor"],
+    )
+
+    # check that the padded audio is divisible by the divide factor
+    expected_width = audio_utils.compute_spectrogram_width(
+        len(padded_audio),
+        samplerate=parameters.TARGET_SAMPLERATE_HZ,
+        window_duration=params["fft_win_length"],
+        window_overlap=params["fft_overlap"],
+        resize_factor=params["resize_factor"],
+    )
+
+    assert expected_width % params["spec_divide_factor"] == 0
+
+
+@given(duration=st.floats(min_value=0.1, max_value=2))
+def test_computed_spectrograms_are_actually_divisible_by_the_spec_divide_factor(
+    duration: float,
+):
+    samplerate = parameters.TARGET_SAMPLERATE_HZ
+    params = parameters.DEFAULT_SPECTROGRAM_PARAMETERS
+    length = int(duration * samplerate)
+    audio = np.random.rand(length)
+    _, spectrogram, _ = detector_utils.compute_spectrogram(
+        audio,
+        samplerate,
+        params,
+        torch.device("cpu"),
+    )
+    assert spectrogram.shape[-1] % params["spec_divide_factor"] == 0
+
+
+@given(
+    duration=st.floats(min_value=0.1, max_value=2),
+    width=st.integers(min_value=128, max_value=1024),
+)
+def test_pad_audio_with_fixed_width(duration: float, width: int):
+    samplerate = parameters.TARGET_SAMPLERATE_HZ
+    params = parameters.DEFAULT_SPECTROGRAM_PARAMETERS
+
+    length = int(duration * samplerate)
+    audio = np.random.rand(length)
+
+    # pad the audio to be divisible by divide factor
+    padded_audio = audio_utils.pad_audio(
+        audio,
+        samplerate=samplerate,
+        window_duration=params["fft_win_length"],
+        window_overlap=params["fft_overlap"],
+        resize_factor=params["resize_factor"],
+        divide_factor=params["spec_divide_factor"],
+        fixed_width=width,
+    )
+
+    # check that the padded audio is divisible by the divide factor
+    expected_width = audio_utils.compute_spectrogram_width(
+        len(padded_audio),
+        samplerate=parameters.TARGET_SAMPLERATE_HZ,
+        window_duration=params["fft_win_length"],
+        window_overlap=params["fft_overlap"],
+        resize_factor=params["resize_factor"],
+    )
+    assert expected_width == width