From 66ac7e608fdc2ee597aa445acc5621f5eeefccc1 Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Tue, 25 Feb 2025 14:24:48 +0100
Subject: [PATCH 01/11] Changed the signature of api.process_file,
 au.load_audio and du.process_file. This allows users to use the same args for
 processing data as librosa.load()

---
 batdetect2/api.py                  | 23 ++++++++++++++++++-----
 batdetect2/utils/audio_utils.py    | 28 ++++++++++++++++++++--------
 batdetect2/utils/detector_utils.py | 29 ++++++++++++++++++++---------
 tests/test_audio_utils.py          | 11 ++++++++++-
 4 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/batdetect2/api.py b/batdetect2/api.py
index 4d04f42..1a5f14c 100644
--- a/batdetect2/api.py
+++ b/batdetect2/api.py
@@ -97,7 +97,7 @@ consult the API documentation in the code.
 
 """
 import warnings
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, BinaryIO, Any, Union
 
 import numpy as np
 import torch
@@ -120,6 +120,10 @@ from batdetect2.types import (
 )
 from batdetect2.utils.detector_utils import list_audio_files, load_model
 
+import audioread
+import os 
+import soundfile as sf
+
 # Remove warnings from torch
 warnings.filterwarnings("ignore", category=UserWarning, module="torch")
 
@@ -238,32 +242,41 @@ def generate_spectrogram(
 
 
 def process_file(
-    audio_file: str,
+    path:  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ],
     model: DetectionModel = MODEL,
     config: Optional[ProcessingConfiguration] = None,
     device: torch.device = DEVICE,
+    file_id: str | None = None
 ) -> du.RunResults:
     """Process audio file with model.
 
     Parameters
     ----------
-    audio_file : str
-        Path to audio file.
+    path :  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ]
+        Path to audio data.
     model : DetectionModel, optional
         Detection model. Uses default model if not specified.
     config : Optional[ProcessingConfiguration], optional
         Processing configuration, by default None (uses default parameters).
     device : torch.device, optional
         Device to use, by default tries to use GPU if available.
+    file_id: Optional[str],
+        Give the data an id. If path is a string path to a file this can be ignored and
+        the file_id will be the basename of the file.
     """
     if config is None:
         config = CONFIG
 
     return du.process_file(
-        audio_file,
+        path,
         model,
         config,
         device,
+        file_id
     )
 
 
diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py
index a60ea94..77ba5f5 100644
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@@ -1,11 +1,15 @@
 import warnings
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Union, Any, BinaryIO
 
 import librosa
 import librosa.core.spectrum
 import numpy as np
 import torch
 
+import audioread
+import os 
+import soundfile as sf
+
 from batdetect2.detector import parameters
 
 from . import wavfile
@@ -140,21 +144,29 @@ def generate_spectrogram(
 
     return spec, spec_for_viz
 
+def get_samplerate(
+    path:  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ]):
+    with sf.SoundFile(path) as f:
+        return f.samplerate
 
 def load_audio(
-    audio_file: str,
+    path:  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ],
     time_exp_fact: float,
     target_samp_rate: int,
     scale: bool = False,
     max_duration: Optional[float] = None,
-) -> Tuple[int, np.ndarray]:
+) -> Tuple[int, np.ndarray ]:
     """Load an audio file and resample it to the target sampling rate.
 
     The audio is also scaled to [-1, 1] and clipped to the maximum duration.
     Only mono files are supported.
 
     Args:
-        audio_file (str): Path to the audio file.
+        path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
         target_samp_rate (int): Target sampling rate.
         scale (bool): Whether to scale the audio to [-1, 1].
         max_duration (float): Maximum duration of the audio in seconds.
@@ -170,16 +182,16 @@ def load_audio(
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", category=wavfile.WavFileWarning)
         # sampling_rate, audio_raw = wavfile.read(audio_file)
-        audio_raw, sampling_rate = librosa.load(
-            audio_file,
+        audio_raw, file_sampling_rate = librosa.load(
+            path,
             sr=None,
             dtype=np.float32,
         )
-
+    
     if len(audio_raw.shape) > 1:
         raise ValueError("Currently does not handle stereo files")
 
-    sampling_rate = sampling_rate * time_exp_fact
+    sampling_rate = file_sampling_rate * time_exp_fact
 
     # resample - need to do this after correcting for time expansion
     sampling_rate_old = sampling_rate
diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py
index 63643b6..f7392f8 100644
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Any, Iterator, List, Optional, Tuple, Union
+from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO
 
 import librosa
 import numpy as np
@@ -31,6 +31,11 @@ from batdetect2.types import (
     SpectrogramParameters,
 )
 
+import audioread
+import os 
+import soundfile as sf
+
+
 __all__ = [
     "load_model",
     "list_audio_files",
@@ -729,10 +734,13 @@ def process_audio_array(
 
 
 def process_file(
-    audio_file: str,
+    path:  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ],
     model: DetectionModel,
     config: ProcessingConfiguration,
     device: torch.device,
+    file_id: str | None = None
 ) -> Union[RunResults, Any]:
     """Process a single audio file with detection model.
 
@@ -741,7 +749,7 @@ def process_file(
 
     Parameters
     ----------
-    audio_file : str
+    path : str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
         Path to audio file.
 
     model : torch.nn.Module
@@ -762,18 +770,17 @@ def process_file(
     cnn_feats = []
     spec_slices = []
 
-    # Get original sampling rate
-    file_samp_rate = librosa.get_samplerate(audio_file)
-    orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)
-
     # load audio file
     sampling_rate, audio_full = au.load_audio(
-        audio_file,
+        path,
         time_exp_fact=config.get("time_expansion", 1) or 1,
         target_samp_rate=config["target_samp_rate"],
         scale=config["scale_raw_audio"],
         max_duration=config.get("max_duration"),
     )
+    file_samp_rate = au.get_samplerate(path)
+
+    orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)
 
     # loop through larger file and split into chunks
     # TODO: fix so that it overlaps correctly and takes care of
@@ -823,9 +830,13 @@ def process_file(
         spec_slices,
     )
 
+    _file_id = file_id
+    if _file_id is None:
+        _file_id = os.path.basename(path) if isinstance(path, str) else "unknown"
+
     # convert results to a dictionary in the right format
     results = convert_results(
-        file_id=os.path.basename(audio_file),
+        file_id=_file_id,
         time_exp=config.get("time_expansion", 1) or 1,
         duration=audio_full.shape[0] / float(sampling_rate),
         params=config,
diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py
index 1b489bc..ebe2f0f 100644
--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@@ -6,7 +6,8 @@ from hypothesis import strategies as st
 
 from batdetect2.detector import parameters
 from batdetect2.utils import audio_utils, detector_utils
-
+import io
+import requests
 
 @given(duration=st.floats(min_value=0.1, max_value=2))
 def test_can_compute_correct_spectrogram_width(duration: float):
@@ -134,3 +135,11 @@ def test_pad_audio_with_fixed_width(duration: float, width: int):
         resize_factor=params["resize_factor"],
     )
     assert expected_width == width
+
+def test_get_samplerate_using_bytesio():
+    audio_url="https://anon.erda.au.dk/share_redirect/e5c7G2AWmg/F1/20240724/2MU02597/BIOBD01_20240626_231650.wav"
+    
+    sample_rate = audio_utils.get_samplerate(io.BytesIO(requests.get(audio_url).content))
+
+    expected_sample_rate = 256000
+    assert expected_sample_rate == sample_rate

From 838a1ade0d93b8c0723f1fa138e2ad8dd3730a04 Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Tue, 25 Feb 2025 14:46:40 +0100
Subject: [PATCH 02/11] Updated get_samplerate test to use example data file.

---
 tests/test_audio_utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py
index ebe2f0f..c223ecf 100644
--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@@ -137,9 +137,10 @@ def test_pad_audio_with_fixed_width(duration: float, width: int):
     assert expected_width == width
 
 def test_get_samplerate_using_bytesio():
-    audio_url="https://anon.erda.au.dk/share_redirect/e5c7G2AWmg/F1/20240724/2MU02597/BIOBD01_20240626_231650.wav"
+    with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f:
+        audio_bytes = io.BytesIO(f.read())
     
-    sample_rate = audio_utils.get_samplerate(io.BytesIO(requests.get(audio_url).content))
+    sample_rate = audio_utils.get_samplerate(audio_bytes)
 
-    expected_sample_rate = 256000
+    expected_sample_rate = 500000
     assert expected_sample_rate == sample_rate

From 6af7fef3167c870b950ceea5c64349ec76c4cbfd Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Wed, 26 Feb 2025 14:11:11 +0100
Subject: [PATCH 03/11] Fix 'unknown' id by providing a _generate_id()
 function.

---
 batdetect2/utils/detector_utils.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py
index f7392f8..4c1eefe 100644
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@@ -33,8 +33,10 @@ from batdetect2.types import (
 
 import audioread
 import os 
+import io
 import soundfile as sf
-
+import hashlib
+import uuid
 
 __all__ = [
     "load_model",
@@ -832,7 +834,7 @@ def process_file(
 
     _file_id = file_id
     if _file_id is None:
-        _file_id = os.path.basename(path) if isinstance(path, str) else "unknown"
+        _file_id = _generate_id(path)
 
     # convert results to a dictionary in the right format
     results = convert_results(
@@ -856,6 +858,24 @@ def process_file(
 
     return results
 
+def _generate_id(path:  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ]) -> str:
+    """ Generate an id based on the path.
+    
+    If the path is a str or PathLike it will parsed as the basename. 
+    This should ensure backwards compatibility with previous versions.     
+    """
+    if isinstance(path, str) or isinstance(path, os.PathLike):
+        return os.path.basename(path)
+    elif isinstance(path, (BinaryIO, io.BytesIO)):
+        path.seek(0)
+        md5 = hashlib.md5(path.read()).hexdigest()
+        path.seek(0)
+        return md5
+    else:
+        return str(uuid.uuid4())
+
 
 def summarize_results(results, predictions, config):
     """Print summary of results."""

From e10e270de49e81daed463ec7d8713e97d2bdeefe Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Wed, 26 Feb 2025 14:12:09 +0100
Subject: [PATCH 04/11] Fix error in get_samplerate when reading io.BytesIO.

---
 batdetect2/utils/audio_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py
index 77ba5f5..a31397c 100644
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@@ -9,6 +9,7 @@ import torch
 import audioread
 import os 
 import soundfile as sf
+import io
 
 from batdetect2.detector import parameters
 
@@ -147,7 +148,10 @@ def generate_spectrogram(
 def get_samplerate(
     path:  Union[
         str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ]):
+    ]):       
+    if isinstance(path, (BinaryIO, io.BytesIO)):
+        path.seek(0)
+    
     with sf.SoundFile(path) as f:
         return f.samplerate
 

From 47dbdc79c236867f0d5c25daa5b86e0a1737eb2a Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Wed, 26 Feb 2025 14:12:42 +0100
Subject: [PATCH 05/11] Added tests for api and load_audio

---
 tests/test_api.py         | 27 +++++++++++++++++++++
 tests/test_audio_utils.py | 49 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/tests/test_api.py b/tests/test_api.py
index e828c9e..51149e1 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -10,11 +10,13 @@ import torch
 from torch import nn
 
 from batdetect2 import api
+import io 
 
 PKG_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 TEST_DATA_DIR = os.path.join(PKG_DIR, "example_data", "audio")
 TEST_DATA = glob(os.path.join(TEST_DATA_DIR, "*.wav"))
 
+DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
 
 def test_load_model_with_default_params():
     """Test loading model with default parameters."""
@@ -280,3 +282,28 @@ def test_process_file_with_empty_predictions_does_not_fail(
 
     assert results is not None
     assert len(results["pred_dict"]["annotation"]) == 0
+
+def test_process_file_file_id_defaults_to_basename():
+    """Test that no detections are made above the nyquist frequency."""
+    # Recording donated by @@kdarras
+    basename = "20230322_172000_selec2.wav"
+    path = os.path.join(DATA_DIR, basename)
+
+    output = api.process_file(path)
+    predictions = output["pred_dict"]
+    id = predictions["id"]
+    assert id == basename
+
+def test_bytesio_file_id_defaults_to_md5():
+    """Test that no detections are made above the nyquist frequency."""
+    # Recording donated by @@kdarras
+    basename = "20230322_172000_selec2.wav"
+    path = os.path.join(DATA_DIR, basename)
+
+    with open(path, "rb") as f:
+        data = io.BytesIO(f.read())
+
+    output = api.process_file(data)
+    predictions = output["pred_dict"]
+    id = predictions["id"]
+    assert id == "7ade9ebf1a9fe5477ff3a2dc57001929"
diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py
index c223ecf..9a2afc0 100644
--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@@ -7,7 +7,9 @@ from hypothesis import strategies as st
 from batdetect2.detector import parameters
 from batdetect2.utils import audio_utils, detector_utils
 import io
-import requests
+import os
+
+DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
 
 @given(duration=st.floats(min_value=0.1, max_value=2))
 def test_can_compute_correct_spectrogram_width(duration: float):
@@ -144,3 +146,48 @@ def test_get_samplerate_using_bytesio():
 
     expected_sample_rate = 500000
     assert expected_sample_rate == sample_rate
+
+    
+
+def test_load_audio_using_bytes():
+    filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
+    
+    with open(filename, "rb") as f:
+        audio_bytes = io.BytesIO(f.read())
+    
+    sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+
+    expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+
+    assert expected_sample_rate == sample_rate
+
+    assert np.array_equal(audio_data, expected_audio_data)
+
+
+
+def test_get_samplerate_using_bytesio_2():
+    basename = "20230322_172000_selec2.wav"
+    path = os.path.join(DATA_DIR, basename)
+
+    with open(path, "rb") as f:
+        audio_bytes = io.BytesIO(f.read())
+    
+    sample_rate = audio_utils.get_samplerate(audio_bytes)
+
+    expected_sample_rate = 192_000
+    assert expected_sample_rate == sample_rate
+
+def test_load_audio_using_bytes_2():
+    basename = "20230322_172000_selec2.wav"
+    path = os.path.join(DATA_DIR, basename)
+
+    with open(path, "rb") as f:
+        data = io.BytesIO(f.read())
+    
+    sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+
+    expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+
+    assert expected_sample_rate == sample_rate
+
+    assert np.array_equal(audio_data, expected_audio_data)
\ No newline at end of file

From f62bc99ab24e3c503b8aaa9162c713b3749ba22f Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Wed, 26 Feb 2025 14:13:21 +0100
Subject: [PATCH 06/11] Added api method to process a URL

---
 batdetect2/api.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/batdetect2/api.py b/batdetect2/api.py
index 1a5f14c..8978b63 100644
--- a/batdetect2/api.py
+++ b/batdetect2/api.py
@@ -123,6 +123,8 @@ from batdetect2.utils.detector_utils import list_audio_files, load_model
 import audioread
 import os 
 import soundfile as sf
+import requests
+import io
 
 # Remove warnings from torch
 warnings.filterwarnings("ignore", category=UserWarning, module="torch")
@@ -279,6 +281,49 @@ def process_file(
         file_id
     )
 
+def process_url(
+    url: str,
+    model: DetectionModel = MODEL,
+    config: Optional[ProcessingConfiguration] = None,
+    device: torch.device = DEVICE,
+    file_id: str | None = None
+) -> du.RunResults:
+    """Process audio file with model.
+
+    Parameters
+    ----------
+    url : str
+        HTTP URL to load the audio data from
+    model : DetectionModel, optional
+        Detection model. Uses default model if not specified.
+    config : Optional[ProcessingConfiguration], optional
+        Processing configuration, by default None (uses default parameters).
+    device : torch.device, optional
+        Device to use, by default tries to use GPU if available.
+    file_id: Optional[str],
+        Give the data an id. Defaults to the URL
+    """
+    if config is None:
+        config = CONFIG
+
+    if file_id is None:
+        file_id = url
+
+    response = requests.get(url)
+
+    # Raise exception on HTTP error
+    response.raise_for_status()
+
+    # Retrieve body as raw bytes
+    raw_audio_data = response.content
+    
+    return du.process_file(
+        io.BytesIO(raw_audio_data),
+        model,
+        config,
+        device,
+        file_id
+    )
 
 def process_spectrogram(
     spec: torch.Tensor,

From 230b6167bc5ae7a00eeb718b5b918775f2daa502 Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Thu, 27 Feb 2025 08:10:27 +0100
Subject: [PATCH 07/11] Added load_audio_data() which returns the original
 sample rate. Changed load_audio() implementation so that it uses
 load_audio_data but retains its signature. du.process_file() now does not
 need to call get_samplerate

---
 batdetect2/utils/audio_utils.py    | 44 +++++++++++++++++++++--------
 batdetect2/utils/detector_utils.py |  3 +-
 tests/test_audio_utils.py          | 45 +++---------------------------
 3 files changed, 38 insertions(+), 54 deletions(-)

diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py
index a31397c..66714f0 100644
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@@ -17,6 +17,7 @@ from . import wavfile
 
 __all__ = [
     "load_audio",
+    "load_audio_data",
     "generate_spectrogram",
     "pad_audio",
 ]
@@ -145,16 +146,6 @@ def generate_spectrogram(
 
     return spec, spec_for_viz
 
-def get_samplerate(
-    path:  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ]):       
-    if isinstance(path, (BinaryIO, io.BytesIO)):
-        path.seek(0)
-    
-    with sf.SoundFile(path) as f:
-        return f.samplerate
-
 def load_audio(
     path:  Union[
         str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
@@ -169,6 +160,37 @@ def load_audio(
     The audio is also scaled to [-1, 1] and clipped to the maximum duration.
     Only mono files are supported.
 
+    Args:
+        path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
+        target_samp_rate (int): Target sampling rate.
+        scale (bool): Whether to scale the audio to [-1, 1].
+        max_duration (float): Maximum duration of the audio in seconds.
+
+    Returns:
+        sampling_rate: The sampling rate of the audio.
+        audio_raw: The audio signal in a numpy array.
+
+    Raises:
+        ValueError: If the audio file is stereo.
+
+    """
+    sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration)
+    return sample_rate, audio_data
+
+def load_audio_data(
+    path:  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ],
+    time_exp_fact: float,
+    target_samp_rate: int,
+    scale: bool = False,
+    max_duration: Optional[float] = None,
+) -> Tuple[int, np.ndarray, int | float]:
+    """Load an audio file and resample it to the target sampling rate.
+
+    The audio is also scaled to [-1, 1] and clipped to the maximum duration.
+    Only mono files are supported.
+
     Args:
         path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
         target_samp_rate (int): Target sampling rate.
@@ -223,7 +245,7 @@ def load_audio(
         audio_raw = audio_raw - audio_raw.mean()
         audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6)
 
-    return sampling_rate, audio_raw
+    return sampling_rate, audio_raw, file_sampling_rate
 
 
 def compute_spectrogram_width(
diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py
index 4c1eefe..ffbcf6e 100644
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@@ -773,14 +773,13 @@ def process_file(
     spec_slices = []
 
     # load audio file
-    sampling_rate, audio_full = au.load_audio(
+    sampling_rate, audio_full, file_samp_rate = au.load_audio_data(
         path,
         time_exp_fact=config.get("time_expansion", 1) or 1,
         target_samp_rate=config["target_samp_rate"],
         scale=config["scale_raw_audio"],
         max_duration=config.get("max_duration"),
     )
-    file_samp_rate = au.get_samplerate(path)
 
     orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)
 
diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py
index 9a2afc0..a6c08fb 100644
--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@@ -137,57 +137,20 @@ def test_pad_audio_with_fixed_width(duration: float, width: int):
         resize_factor=params["resize_factor"],
     )
     assert expected_width == width
-
-def test_get_samplerate_using_bytesio():
-    with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate = audio_utils.get_samplerate(audio_bytes)
-
-    expected_sample_rate = 500000
-    assert expected_sample_rate == sample_rate
-
     
 
-def test_load_audio_using_bytes():
-    filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
-    
-    with open(filename, "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
-
-    expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
-
-    assert expected_sample_rate == sample_rate
-
-    assert np.array_equal(audio_data, expected_audio_data)
-
-
-
-def test_get_samplerate_using_bytesio_2():
-    basename = "20230322_172000_selec2.wav"
-    path = os.path.join(DATA_DIR, basename)
-
-    with open(path, "rb") as f:
-        audio_bytes = io.BytesIO(f.read())
-    
-    sample_rate = audio_utils.get_samplerate(audio_bytes)
-
-    expected_sample_rate = 192_000
-    assert expected_sample_rate == sample_rate
-
-def test_load_audio_using_bytes_2():
+def test_load_audio_using_bytesio():
     basename = "20230322_172000_selec2.wav"
     path = os.path.join(DATA_DIR, basename)
 
     with open(path, "rb") as f:
         data = io.BytesIO(f.read())
     
-    sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+    sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
 
-    expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+    expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
 
     assert expected_sample_rate == sample_rate
+    assert exp_file_sample_rate == file_sample_rate
 
     assert np.array_equal(audio_data, expected_audio_data)
\ No newline at end of file

From 54ca55558775c43b72c768f6360c996b58629b9a Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Thu, 27 Feb 2025 13:51:58 +0100
Subject: [PATCH 08/11] Fixed code to support Python3.9 syntax

---
 batdetect2/api.py                  | 4 ++--
 batdetect2/utils/audio_utils.py    | 2 +-
 batdetect2/utils/detector_utils.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/batdetect2/api.py b/batdetect2/api.py
index 8978b63..da2d106 100644
--- a/batdetect2/api.py
+++ b/batdetect2/api.py
@@ -250,7 +250,7 @@ def process_file(
     model: DetectionModel = MODEL,
     config: Optional[ProcessingConfiguration] = None,
     device: torch.device = DEVICE,
-    file_id: str | None = None
+    file_id: Optional[str] = None
 ) -> du.RunResults:
     """Process audio file with model.
 
@@ -286,7 +286,7 @@ def process_url(
     model: DetectionModel = MODEL,
     config: Optional[ProcessingConfiguration] = None,
     device: torch.device = DEVICE,
-    file_id: str | None = None
+    file_id: Optional[str] = None
 ) -> du.RunResults:
     """Process audio file with model.
 
diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py
index 66714f0..134fee7 100644
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@@ -185,7 +185,7 @@ def load_audio_data(
     target_samp_rate: int,
     scale: bool = False,
     max_duration: Optional[float] = None,
-) -> Tuple[int, np.ndarray, int | float]:
+) -> Tuple[int, np.ndarray, Union[float, int]]:
     """Load an audio file and resample it to the target sampling rate.
 
     The audio is also scaled to [-1, 1] and clipped to the maximum duration.
diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py
index ffbcf6e..f6e5776 100644
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@@ -742,7 +742,7 @@ def process_file(
     model: DetectionModel,
     config: ProcessingConfiguration,
     device: torch.device,
-    file_id: str | None = None
+    file_id: Optional[str] = None
 ) -> Union[RunResults, Any]:
     """Process a single audio file with detection model.
 

From cbd362d6ea82e69d8535424556923edf42619e45 Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Fri, 16 May 2025 14:53:35 +0200
Subject: [PATCH 09/11] Updated docstrings for tests

---
 tests/test_api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_api.py b/tests/test_api.py
index 51149e1..d46786d 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -284,7 +284,7 @@ def test_process_file_with_empty_predictions_does_not_fail(
     assert len(results["pred_dict"]["annotation"]) == 0
 
 def test_process_file_file_id_defaults_to_basename():
-    """Test that no detections are made above the nyquist frequency."""
+    """Test that process_file assigns basename as an id if no file_id is provided."""
     # Recording donated by @@kdarras
     basename = "20230322_172000_selec2.wav"
     path = os.path.join(DATA_DIR, basename)
@@ -295,7 +295,7 @@ def test_process_file_file_id_defaults_to_basename():
     assert id == basename
 
 def test_bytesio_file_id_defaults_to_md5():
-    """Test that no detections are made above the nyquist frequency."""
+    """Test that process_file assigns an md5 sum as an id if no file_id is provided when using binary data."""
     # Recording donated by @@kdarras
     basename = "20230322_172000_selec2.wav"
     path = os.path.join(DATA_DIR, basename)

From 52570738f27d163c12d0d1ed14c025bee782d54b Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Fri, 16 May 2025 14:56:35 +0200
Subject: [PATCH 10/11] Renamed load_audio_data to load_audio_and_samplerate

---
 batdetect2/utils/audio_utils.py    | 8 ++++----
 batdetect2/utils/detector_utils.py | 6 ++++--
 tests/test_audio_utils.py          | 4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py
index 134fee7..a2cfe44 100644
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@@ -9,7 +9,6 @@ import torch
 import audioread
 import os 
 import soundfile as sf
-import io
 
 from batdetect2.detector import parameters
 
@@ -17,7 +16,7 @@ from . import wavfile
 
 __all__ = [
     "load_audio",
-    "load_audio_data",
+    "load_audio_and_samplerate",
     "generate_spectrogram",
     "pad_audio",
 ]
@@ -174,10 +173,10 @@ def load_audio(
         ValueError: If the audio file is stereo.
 
     """
-    sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration)
+    sample_rate, audio_data, _ = load_audio_and_samplerate(path, time_exp_fact, target_samp_rate, scale, max_duration)
     return sample_rate, audio_data
 
-def load_audio_data(
+def load_audio_and_samplerate(
     path:  Union[
         str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
     ],
@@ -200,6 +199,7 @@ def load_audio_data(
     Returns:
         sampling_rate: The sampling rate of the audio.
         audio_raw: The audio signal in a numpy array.
+        file_sampling_rate: The original sampling rate of the audio
 
     Raises:
         ValueError: If the audio file is stereo.
diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py
index f6e5776..93155b1 100644
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@@ -2,7 +2,6 @@ import json
 import os
 from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO
 
-import librosa
 import numpy as np
 import pandas as pd
 import torch
@@ -759,6 +758,9 @@ def process_file(
 
     config : ProcessingConfiguration
         Configuration for processing.
+    
+    file_id: Optional[str],
+        Give the data an id. Defaults to the filename if path is a string. Otherwise
 
     Returns
     -------
@@ -773,7 +775,7 @@ def process_file(
     spec_slices = []
 
     # load audio file
-    sampling_rate, audio_full, file_samp_rate = au.load_audio_data(
+    sampling_rate, audio_full, file_samp_rate = au.load_audio_and_samplerate(
         path,
         time_exp_fact=config.get("time_expansion", 1) or 1,
         target_samp_rate=config["target_samp_rate"],
diff --git a/tests/test_audio_utils.py b/tests/test_audio_utils.py
index a6c08fb..ed64b15 100644
--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@@ -146,9 +146,9 @@ def test_load_audio_using_bytesio():
     with open(path, "rb") as f:
         data = io.BytesIO(f.read())
     
-    sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+    sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_and_samplerate(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
 
-    expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
+    expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_and_samplerate(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
 
     assert expected_sample_rate == sample_rate
     assert exp_file_sample_rate == file_sample_rate

From 4282e2ae70a50848012f495fbfbffd82443a368d Mon Sep 17 00:00:00 2001
From: Kavi <kavi@ecos.au.dk>
Date: Fri, 16 May 2025 15:13:08 +0200
Subject: [PATCH 11/11] Added AudioPath as an alias for the path definition

---
 batdetect2/api.py                  |  9 +++------
 batdetect2/types.py                |  9 ++++++++-
 batdetect2/utils/audio_utils.py    | 10 ++++------
 batdetect2/utils/detector_utils.py | 14 ++++++--------
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/batdetect2/api.py b/batdetect2/api.py
index da2d106..86bf55b 100644
--- a/batdetect2/api.py
+++ b/batdetect2/api.py
@@ -99,6 +99,7 @@ consult the API documentation in the code.
 import warnings
 from typing import List, Optional, Tuple, BinaryIO, Any, Union
 
+from .types import AudioPath
 import numpy as np
 import torch
 
@@ -244,9 +245,7 @@ def generate_spectrogram(
 
 
 def process_file(
-    path:  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ],
+    path: AudioPath,
     model: DetectionModel = MODEL,
     config: Optional[ProcessingConfiguration] = None,
     device: torch.device = DEVICE,
@@ -256,9 +255,7 @@ def process_file(
 
     Parameters
     ----------
-    path :  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ]
+    path : AudioPath
         Path to audio data.
     model : DetectionModel, optional
         Detection model. Uses default model if not specified.
diff --git a/batdetect2/types.py b/batdetect2/types.py
index 57a60b4..3f22862 100644
--- a/batdetect2/types.py
+++ b/batdetect2/types.py
@@ -1,6 +1,10 @@
 """Types used in the code base."""
 
-from typing import List, NamedTuple, Optional, Union
+from typing import List, NamedTuple, Optional, Union, Any, BinaryIO
+
+import audioread
+import os 
+import soundfile as sf
 
 import numpy as np
 import torch
@@ -40,6 +44,9 @@ __all__ = [
     "SpectrogramParameters",
 ]
 
+AudioPath =  Union[
+        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    ]
 
 class SpectrogramParameters(TypedDict):
     """Parameters for generating spectrograms."""
diff --git a/batdetect2/utils/audio_utils.py b/batdetect2/utils/audio_utils.py
index a2cfe44..b89cdca 100644
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@@ -1,6 +1,8 @@
 import warnings
 from typing import Optional, Tuple, Union, Any, BinaryIO
 
+from ..types import AudioPath
+
 import librosa
 import librosa.core.spectrum
 import numpy as np
@@ -146,9 +148,7 @@ def generate_spectrogram(
     return spec, spec_for_viz
 
 def load_audio(
-    path:  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ],
+    path: AudioPath,
     time_exp_fact: float,
     target_samp_rate: int,
     scale: bool = False,
@@ -177,9 +177,7 @@ def load_audio(
     return sample_rate, audio_data
 
 def load_audio_and_samplerate(
-    path:  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ],
+    path: AudioPath,
     time_exp_fact: float,
     target_samp_rate: int,
     scale: bool = False,
diff --git a/batdetect2/utils/detector_utils.py b/batdetect2/utils/detector_utils.py
index 93155b1..f96c5d7 100644
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@@ -2,6 +2,8 @@ import json
 import os
 from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO
 
+from ..types import AudioPath
+
 import numpy as np
 import pandas as pd
 import torch
@@ -735,9 +737,7 @@ def process_audio_array(
 
 
 def process_file(
-    path:  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ],
+    path: AudioPath,
     model: DetectionModel,
     config: ProcessingConfiguration,
     device: torch.device,
@@ -750,7 +750,7 @@ def process_file(
 
     Parameters
     ----------
-    path : str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
+    path : AudioPath
         Path to audio file.
 
     model : torch.nn.Module
@@ -760,7 +760,7 @@ def process_file(
         Configuration for processing.
     
     file_id: Optional[str],
-        Give the data an id. Defaults to the filename if path is a string. Otherwise
+        Give the data an id. Defaults to the filename if path is a string. Otherwise an md5 will be calculated from the binary data.
 
     Returns
     -------
@@ -859,9 +859,7 @@ def process_file(
 
     return results
 
-def _generate_id(path:  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ]) -> str:
+def _generate_id(path: AudioPath) -> str:
     """ Generate an id based on the path.
     
     If the path is a str or PathLike it will parsed as the basename.