From 230b6167bc5ae7a00eeb718b5b918775f2daa502 Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Thu, 27 Feb 2025 08:10:27 +0100
Subject: [PATCH] Added load_audio_data() which returns the original sample
 rate. Changed load_audio() implementation so that it uses load_audio_data but
 retains its signature. du.process_file() now does not need to call
 get_samplerate

---
 batdetect2/utils/audio_utils.py    | 44 +++++++++++++++++++++--------
 batdetect2/utils/detector_utils.py |  3 +-
 tests/test_audio_utils.py          | 45 +++---------------------------
 3 files changed, 38 insertions(+), 54 deletions(-)

diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py
index a31397c..66714f0 100644
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@@ -17,6 +17,7 @@ from . import wavfile
 
 __all__ = [
     "load_audio",
+    "load_audio_data",
     "generate_spectrogram",
     "pad_audio",
 ]
@@ -145,16 +146,6 @@ def generate_spectrogram(
 
     return spec, spec_for_viz
 
-def get_samplerate(
-    path:  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ]):       
-    if isinstance(path, (BinaryIO, io.BytesIO)):
-        path.seek(0)
-    
-    with sf.SoundFile(path) as f:
-        return f.samplerate
-
 def load_audio(
     path:  Union[
         str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
@@ -169,6 +160,37 @@ def load_audio(
     The audio is also scaled to [-1, 1] and clipped to the maximum duration.
     Only mono files are supported.
 
+    Args:
+        path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
+        target_samp_rate (int): Target sampling rate.
+        scale (bool): Whether to scale the audio to [-1, 1].
+        max_duration (float): Maximum duration of the audio in seconds.
+
+    Returns:
+        sampling_rate: The sampling rate of the audio.
+        audio_raw: The audio signal in a numpy array.
+
+    Raises:
+        ValueError: If the audio file is stereo.
+
+    """
+    sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration)
+    return sample_rate, audio_data
+
+def load_audio_data(
+    path:  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ],
+    time_exp_fact: float,
+    target_samp_rate: int,
+    scale: bool = False,
+    max_duration: Optional[float] = None,
+) -> Tuple[int, np.ndarray, int | float]:
+    """Load an audio file and resample it to the target sampling rate.
+
+    The audio is also scaled to [-1, 1] and clipped to the maximum duration.
+    Only mono files are supported.
+
     Args:
         path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
         target_samp_rate (int): Target sampling rate.
@@ -223,7 +245,7 @@ def load_audio(
         audio_raw = audio_raw - audio_raw.mean()
         audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6)
 
-    return sampling_rate, audio_raw
+    return sampling_rate, audio_raw, file_sampling_rate
 
 
 def compute_spectrogram_width(
diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py
index 4c1eefe..ffbcf6e 100644
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@@ -773,14 +773,13 @@ def process_file(
     spec_slices = []
 
     # load audio file
-    sampling_rate, audio_full = au.load_audio(
+    sampling_rate, audio_full, file_samp_rate = au.load_audio_data(
         path,
         time_exp_fact=config.get("time_expansion", 1) or 1,
         target_samp_rate=config["target_samp_rate"],
         scale=config["scale_raw_audio"],
         max_duration=config.get("max_duration"),
     )
-    file_samp_rate = au.get_samplerate(path)
 
     orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)
 
diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py
index 9a2afc0..a6c08fb 100644
--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@@ -137,57 +137,20 @@ def test_pad_audio_with_fixed_width(duration: float, width: int):
         resize_factor=params["resize_factor"],
     )
     assert expected_width == width
-
-def test_get_samplerate_using_bytesio():
-    with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate = audio_utils.get_samplerate(audio_bytes)
-
-    expected_sample_rate = 500000
-    assert expected_sample_rate == sample_rate
-
     
 
-def test_load_audio_using_bytes():
-    filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
-    
-    with open(filename, "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
-
-    expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
-
-    assert expected_sample_rate == sample_rate
-
-    assert np.array_equal(audio_data, expected_audio_data)
-
-
-
-def test_get_samplerate_using_bytesio_2():
-    basename = "20230322_172000_selec2.wav"
-    path = os.path.join(DATA_DIR, basename)
-
-    with open(path, "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate = audio_utils.get_samplerate(audio_bytes)
-
-    expected_sample_rate = 192_000
-    assert expected_sample_rate == sample_rate
-
-def test_load_audio_using_bytes_2():
+def test_load_audio_using_bytesio():
     basename = "20230322_172000_selec2.wav"
     path = os.path.join(DATA_DIR, basename)
 
     with open(path, "rb") as f:
         data = io.BytesIO(f.read())
     
-    sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+    sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
 
-    expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+    expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
 
     assert expected_sample_rate == sample_rate
+    assert exp_file_sample_rate == file_sample_rate
 
     assert np.array_equal(audio_data, expected_audio_data)
\ No newline at end of file