Added load_audio_data() which returns the original sample rate. Changed load_audio() implementation so that it uses load_audio_data but retains its signature. du.process_file() now does not need to call get_samplerate

2025-06-29 22:51:58 +02:00 · 2025-02-27 08:10:27 +01:00 · 2025-02-27 08:10:27 +01:00 · 230b6167bc
commit 230b6167bc
parent f62bc99ab2
3 changed files with 38 additions and 54 deletions
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@ -17,6 +17,7 @@ from . import wavfile

 __all__ = [
    "load_audio",
+    "load_audio_data",
    "generate_spectrogram",
    "pad_audio",
 ]
@ -145,16 +146,6 @@ def generate_spectrogram(

    return spec, spec_for_viz

-def get_samplerate(
-    path:  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ]):       
-    if isinstance(path, (BinaryIO, io.BytesIO)):
-        path.seek(0)
-    
-    with sf.SoundFile(path) as f:
-        return f.samplerate
-
 def load_audio(
    path:  Union[
        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
@ -169,6 +160,37 @@ def load_audio(
    The audio is also scaled to [-1, 1] and clipped to the maximum duration.
    Only mono files are supported.

+    Args:
+        path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
+        target_samp_rate (int): Target sampling rate.
+        scale (bool): Whether to scale the audio to [-1, 1].
+        max_duration (float): Maximum duration of the audio in seconds.
+
+    Returns:
+        sampling_rate: The sampling rate of the audio.
+        audio_raw: The audio signal in a numpy array.
+
+    Raises:
+        ValueError: If the audio file is stereo.
+
+    """
+    sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration)
+    return sample_rate, audio_data
+
+def load_audio_data(
+    path:  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ],
+    time_exp_fact: float,
+    target_samp_rate: int,
+    scale: bool = False,
+    max_duration: Optional[float] = None,
+) -> Tuple[int, np.ndarray, int | float]:
+    """Load an audio file and resample it to the target sampling rate.
+
+    The audio is also scaled to [-1, 1] and clipped to the maximum duration.
+    Only mono files are supported.
+
    Args:
        path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
        target_samp_rate (int): Target sampling rate.
@ -223,7 +245,7 @@ def load_audio(
        audio_raw = audio_raw - audio_raw.mean()
        audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6)

-    return sampling_rate, audio_raw
+    return sampling_rate, audio_raw, file_sampling_rate


 def compute_spectrogram_width(
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@ -773,14 +773,13 @@ def process_file(
    spec_slices = []

    # load audio file
-    sampling_rate, audio_full = au.load_audio(
+    sampling_rate, audio_full, file_samp_rate = au.load_audio_data(
        path,
        time_exp_fact=config.get("time_expansion", 1) or 1,
        target_samp_rate=config["target_samp_rate"],
        scale=config["scale_raw_audio"],
        max_duration=config.get("max_duration"),
    )
-    file_samp_rate = au.get_samplerate(path)

    orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)

--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@ -137,57 +137,20 @@ def test_pad_audio_with_fixed_width(duration: float, width: int):
        resize_factor=params["resize_factor"],
    )
    assert expected_width == width
-
-def test_get_samplerate_using_bytesio():
-    with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate = audio_utils.get_samplerate(audio_bytes)
-
-    expected_sample_rate = 500000
-    assert expected_sample_rate == sample_rate
-
    

-def test_load_audio_using_bytes():
-    filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
-    
-    with open(filename, "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
-
-    expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
-
-    assert expected_sample_rate == sample_rate
-
-    assert np.array_equal(audio_data, expected_audio_data)
-
-
-
-def test_get_samplerate_using_bytesio_2():
-    basename = "20230322_172000_selec2.wav"
-    path = os.path.join(DATA_DIR, basename)
-
-    with open(path, "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate = audio_utils.get_samplerate(audio_bytes)
-
-    expected_sample_rate = 192_000
-    assert expected_sample_rate == sample_rate
-
-def test_load_audio_using_bytes_2():
+def test_load_audio_using_bytesio():
    basename = "20230322_172000_selec2.wav"
    path = os.path.join(DATA_DIR, basename)

    with open(path, "rb") as f:
        data = io.BytesIO(f.read())
    
-    sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+    sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)

-    expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+    expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)

    assert expected_sample_rate == sample_rate
+    assert exp_file_sample_rate == file_sample_rate

    assert np.array_equal(audio_data, expected_audio_data)