2025-06-30 15:12:06 +02:00
33 changed files with 1642 additions and 3205 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@ -1,8 +0,0 @@
-[bumpversion]
-current_version = 1.3.0
-commit = True
-tag = True
-
-[bumpversion:file:batdetect2/__init__.py]
-
-[bumpversion:file:pyproject.toml]
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -1,29 +1,34 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
 name: Python package

 on:
  push:
-    branches: ["main"]
+    branches: [ "main" ]
  pull_request:
-    branches: ["main"]
+    branches: [ "main" ]

 jobs:
  build:
+
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.8", "3.9", "3.10"]

    steps:
-      - uses: actions/checkout@v4
-      - name: Install uv
-        uses: astral-sh/setup-uv@v3
-        with:
-          enable-cache: true
-          cache-dependency-glob: "uv.lock"
-      - name: Set up Python ${{ matrix.python-version }}
-        run: uv python install ${{ matrix.python-version }}
-      - name: Install the project
-        run: uv sync --all-extras --dev
-      - name: Test with pytest
-        run: uv run pytest
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install pytest
+        pip install .
+    - name: Test with pytest
+      run: |
+        pytest
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@ -1,3 +1,11 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
 name: Upload Python Package

 on:
@ -9,22 +17,23 @@ permissions:

 jobs:
  deploy:
+
    runs-on: ubuntu-latest

    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v3
-        with:
-          python-version: "3.x"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install build
-      - name: Build package
-        run: python -m build
-      - name: Publish package
-        uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
-        with:
-          user: __token__
-          password: ${{ secrets.PYPI_API_TOKEN }}
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -65,7 +65,7 @@ ipython_config.py
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
-.pdm-python
+.pdm.toml

 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
@ -102,11 +102,10 @@ experiments/*
 .virtual_documents
 .ipynb_checkpoints
 *.ipynb
-
-# DO Include
 !batdetect2_notebook.ipynb
+
+# Batdetect Models [Include]
 !batdetect2/models/*.pth.tar
-!tests/data/*.wav
-!tests/data/**/*.wav
-notebooks/lightning_logs
-example_data/preprocessed
+
+# Bump2version
+.bumpversion.cfg
--- a/README.md
+++ b/README.md
@ -29,7 +29,7 @@ pip install batdetect2
 ```

 Alternatively, download this code from the repository (by clicking on the green button on top right) and unzip it.
-Once unzipped, run this from extracted folder.
+Once unziped, run this from extracted folder.

 ```bash
 pip install .
@ -96,30 +96,9 @@ detections, features = api.process_spectrogram(spec)

 You can integrate the detections or the extracted features to your custom analysis pipeline.

-#### Using the Python API with HTTP
-
-```python
-from batdetect2 import api
-import io
-import requests
-
-AUDIO_URL = "<insert your audio url here>"
-
-# Process a whole file from a url
-results = api.process_url(AUDIO_URL)
-
-# Or, load audio and compute spectrograms
-# 'requests.get(AUDIO_URL).content' fetches the raw bytes. You are free to use other sources to fetch the raw bytes
-audio = api.load_audio(io.BytesIO(requests.get(AUDIO_URL).content))
-spec = api.generate_spectrogram(audio)
-
-# And process the audio or the spectrogram with the model
-detections, features, spec = api.process_audio(audio)
-detections, features = api.process_spectrogram(spec)
-```

 ## Training the model on your own data
-Take a look at the steps outlined in finetuning readme [here](batdetect2/finetune/readme.md) for a description of how to train your own model.
+Take a look at the steps outlined in fintuning readme [here](bat_detect/finetune/readme.md) for a description of how to train your own model.


 ## Data and annotations
--- a/batdetect2/init.py
+++ b/batdetect2/init.py
@ -1,6 +1 @@
-import logging
-
-numba_logger = logging.getLogger("numba")
-numba_logger.setLevel(logging.WARNING)
-
-__version__ = "1.3.0"
+__version__ = '1.0.2'
--- a/batdetect2/api.py
+++ b/batdetect2/api.py
@ -97,9 +97,8 @@ consult the API documentation in the code.

 """
 import warnings
-from typing import List, Optional, Tuple, BinaryIO, Any, Union
+from typing import List, Optional, Tuple

-from .types import AudioPath
 import numpy as np
 import torch

@ -121,12 +120,6 @@ from batdetect2.types import (
 )
 from batdetect2.utils.detector_utils import list_audio_files, load_model

-import audioread
-import os 
-import soundfile as sf
-import requests
-import io
-
 # Remove warnings from torch
 warnings.filterwarnings("ignore", category=UserWarning, module="torch")

@ -245,82 +238,34 @@ def generate_spectrogram(


 def process_file(
-    path: AudioPath,
+    audio_file: str,
    model: DetectionModel = MODEL,
    config: Optional[ProcessingConfiguration] = None,
    device: torch.device = DEVICE,
-    file_id: Optional[str] = None
 ) -> du.RunResults:
    """Process audio file with model.

    Parameters
    ----------
-    path : AudioPath
-        Path to audio data.
+    audio_file : str
+        Path to audio file.
    model : DetectionModel, optional
        Detection model. Uses default model if not specified.
    config : Optional[ProcessingConfiguration], optional
        Processing configuration, by default None (uses default parameters).
    device : torch.device, optional
        Device to use, by default tries to use GPU if available.
-    file_id: Optional[str],
-        Give the data an id. If path is a string path to a file this can be ignored and
-        the file_id will be the basename of the file.
    """
    if config is None:
        config = CONFIG

    return du.process_file(
-        path,
+        audio_file,
        model,
        config,
        device,
-        file_id
    )

-def process_url(
-    url: str,
-    model: DetectionModel = MODEL,
-    config: Optional[ProcessingConfiguration] = None,
-    device: torch.device = DEVICE,
-    file_id: Optional[str] = None
-) -> du.RunResults:
-    """Process audio file with model.
-
-    Parameters
-    ----------
-    url : str
-        HTTP URL to load the audio data from
-    model : DetectionModel, optional
-        Detection model. Uses default model if not specified.
-    config : Optional[ProcessingConfiguration], optional
-        Processing configuration, by default None (uses default parameters).
-    device : torch.device, optional
-        Device to use, by default tries to use GPU if available.
-    file_id: Optional[str],
-        Give the data an id. Defaults to the URL
-    """
-    if config is None:
-        config = CONFIG
-
-    if file_id is None:
-        file_id = url
-
-    response = requests.get(url)
-
-    # Raise exception on HTTP error
-    response.raise_for_status()
-
-    # Retrieve body as raw bytes
-    raw_audio_data = response.content
-    
-    return du.process_file(
-        io.BytesIO(raw_audio_data),
-        model,
-        config,
-        device,
-        file_id
-    )

 def process_spectrogram(
    spec: torch.Tensor,
--- a/batdetect2/cli.py
+++ b/batdetect2/cli.py
@ -1,12 +1,10 @@
 """BatDetect2 command line interface."""
-
 import os

 import click

 from batdetect2 import api
 from batdetect2.detector.parameters import DEFAULT_MODEL_PATH
-from batdetect2.types import ProcessingConfiguration
 from batdetect2.utils.detector_utils import save_results_to_file

 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
@ -45,12 +43,6 @@ def cli():
    default=False,
    help="Extracts CNN call features",
 )
-@click.option(
-    "--chunk_size",
-    type=float,
-    default=2,
-    help="Specifies the duration of chunks in seconds. BatDetect2 will divide longer files into smaller chunks and process them independently. Larger chunks increase computation time and memory usage but may provide more contextual information for inference.",
-)
@click.option(
    "--spec_features",
    is_flag=True,
@ -85,8 +77,6 @@ def detect(
    audio_dir: str,
    ann_dir: str,
    detection_threshold: float,
-    time_expansion_factor: int,
-    chunk_size: float,
    **args,
 ):
    """Detect bat calls in files in AUDIO_DIR and save predictions to ANN_DIR.
@ -113,23 +103,16 @@ def detect(
        **{
            **params,
            **args,
-            "time_expansion": time_expansion_factor,
            "spec_slices": False,
-            "chunk_size": chunk_size,
+            "chunk_size": 2,
            "detection_threshold": detection_threshold,
        }
    )

-    if not args["quiet"]:
-        print_config(config)
-
    # process files
    error_files = []
-    for index, audio_file in enumerate(files):
+    for audio_file in files:
        try:
-            if not args["quiet"]:
-                click.echo(f"\n{index} {audio_file}")
-
            results = api.process_file(audio_file, model, config=config)

            if args["save_preds_if_empty"] or (
@ -137,9 +120,10 @@ def detect(
            ):
                results_path = audio_file.replace(audio_dir, ann_dir)
                save_results_to_file(results, results_path)
-        except (RuntimeError, ValueError, LookupError, EOFError) as err:
+        except (RuntimeError, ValueError, LookupError) as err:
            error_files.append(audio_file)
-            click.secho(f"Error processing file {audio_file}: {err}", fg="red")
+            click.secho(f"Error processing file!: {err}", fg="red")
+            raise err

    click.echo(f"\nResults saved to: {ann_dir}")

@ -149,13 +133,5 @@ def detect(
            click.echo(f"  {err}")


-def print_config(config: ProcessingConfiguration):
-    """Print the processing configuration."""
-    click.echo("\nProcessing Configuration:")
-    click.echo(f"Time Expansion Factor: {config.get('time_expansion')}")
-    click.echo(f"Detection Threshold: {config.get('detection_threshold')}")
-    click.echo(f"Chunk Size: {config.get('chunk_size')}s")
-
-
 if __name__ == "__main__":
    cli()
--- a/batdetect2/detector/compute_features.py
+++ b/batdetect2/detector/compute_features.py
@ -1,27 +1,22 @@
-"""Functions to compute features from predictions."""
-from typing import Dict, Optional
-
 import numpy as np

-from batdetect2 import types
-from batdetect2.detector.parameters import MAX_FREQ_HZ, MIN_FREQ_HZ
-

 def convert_int_to_freq(spec_ind, spec_height, min_freq, max_freq):
-    """Convert spectrogram index to frequency in Hz.""" ""
    spec_ind = spec_height - spec_ind
    return round(
        (spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2
    )


-def extract_spec_slices(spec, pred_nms):
-    """Extract spectrogram slices from spectrogram.
-
-    The slices are extracted based on detected call locations.
+def extract_spec_slices(spec, pred_nms, params):
    """
+    Extracts spectrogram slices from spectrogram based on detected call locations.
+    """
+
    x_pos = pred_nms["x_pos"]
+    y_pos = pred_nms["y_pos"]
    bb_width = pred_nms["bb_width"]
+    bb_height = pred_nms["bb_height"]
    slices = []

    # add 20% padding either side of call
@ -40,273 +35,100 @@ def extract_spec_slices(spec, pred_nms):
    return slices


-def compute_duration(
-    prediction: types.Prediction,
-    **_,
-) -> float:
-    """Compute duration of call in seconds."""
-    return round(prediction["end_time"] - prediction["start_time"], 5)
+def get_feature_names():
+    feature_names = [
+        "duration",
+        "low_freq_bb",
+        "high_freq_bb",
+        "bandwidth",
+        "max_power_bb",
+        "max_power",
+        "max_power_first",
+        "max_power_second",
+        "call_interval",
+    ]
+    return feature_names


-def compute_low_freq(
-    prediction: types.Prediction,
-    **_,
-) -> float:
-    """Compute lowest frequency in call in Hz."""
-    return int(prediction["low_freq"])
-
-
-def compute_high_freq(
-    prediction: types.Prediction,
-    **_,
-) -> float:
-    """Compute highest frequency in call in Hz."""
-    return int(prediction["high_freq"])
-
-
-def compute_bandwidth(
-    prediction: types.Prediction,
-    **_,
-) -> float:
-    """Compute bandwidth of call in Hz."""
-    return int(prediction["high_freq"] - prediction["low_freq"])
-
-
-def compute_max_power_bb(
-    prediction: types.Prediction,
-    spec: Optional[np.ndarray] = None,
-    min_freq: int = MIN_FREQ_HZ,
-    max_freq: int = MAX_FREQ_HZ,
-    **_,
-) -> float:
-    """Compute frequency with maximum power in call in Hz.
-
-    This is the frequency with the maximum power in the bounding box of the
-    call.
+def get_feats(spec, pred_nms, params):
    """
-    if spec is None:
-        return np.nan
-
-    x_start = max(0, prediction["x_pos"])
-    x_end = min(
-        spec.shape[1] - 1, prediction["x_pos"] + prediction["bb_width"]
-    )
-
-    # y low is the lowest freq but it will have a higher value due to array
-    # starting at 0 at top
-    y_low = min(spec.shape[0] - 1, prediction["y_pos"])
-    y_high = max(0, prediction["y_pos"] - prediction["bb_height"])
-
-    spec_bb = spec[y_high:y_low, x_start:x_end]
-    power_per_freq_band = np.sum(spec_bb, axis=1)
-
-    try:
-        max_power_ind = np.argmax(power_per_freq_band)
-    except ValueError:
-        # If the call is too short, the bounding box might be empty.
-        # In this case, return NaN.
-        return np.nan
-
-    return int(
-        convert_int_to_freq(
-            y_high + max_power_ind,
-            spec.shape[0],
-            min_freq,
-            max_freq,
-        )
-    )
-
-
-def compute_max_power(
-    prediction: types.Prediction,
-    spec: Optional[np.ndarray] = None,
-    min_freq: int = MIN_FREQ_HZ,
-    max_freq: int = MAX_FREQ_HZ,
-    **_,
-) -> float:
-    """Compute frequency with maximum power in during the call in Hz."""
-    if spec is None:
-        return np.nan
-
-    x_start = max(0, prediction["x_pos"])
-    x_end = min(
-        spec.shape[1] - 1, prediction["x_pos"] + prediction["bb_width"]
-    )
-    spec_call = spec[:, x_start:x_end]
-    power_per_freq_band = np.sum(spec_call, axis=1)
-    max_power_ind = np.argmax(power_per_freq_band)
-    return int(
-        convert_int_to_freq(
-            max_power_ind,
-            spec.shape[0],
-            min_freq,
-            max_freq,
-        )
-    )
-
-
-def compute_max_power_first(
-    prediction: types.Prediction,
-    spec: Optional[np.ndarray] = None,
-    min_freq: int = MIN_FREQ_HZ,
-    max_freq: int = MAX_FREQ_HZ,
-    **_,
-) -> float:
-    """Compute frequency with maximum power in first half of call in Hz."""
-    if spec is None:
-        return np.nan
-
-    x_start = max(0, prediction["x_pos"])
-    x_end = min(
-        spec.shape[1] - 1, prediction["x_pos"] + prediction["bb_width"]
-    )
-    spec_call = spec[:, x_start:x_end]
-    first_half = spec_call[:, : int(spec_call.shape[1] / 2)]
-    power_per_freq_band = np.sum(first_half, axis=1)
-    max_power_ind = np.argmax(power_per_freq_band)
-    return int(
-        convert_int_to_freq(
-            max_power_ind,
-            spec.shape[0],
-            min_freq,
-            max_freq,
-        )
-    )
-
-
-def compute_max_power_second(
-    prediction: types.Prediction,
-    spec: Optional[np.ndarray] = None,
-    min_freq: int = MIN_FREQ_HZ,
-    max_freq: int = MAX_FREQ_HZ,
-    **_,
-) -> float:
-    """Compute frequency with maximum power in second half of call in Hz."""
-    if spec is None:
-        return np.nan
-
-    x_start = max(0, prediction["x_pos"])
-    x_end = min(
-        spec.shape[1] - 1, prediction["x_pos"] + prediction["bb_width"]
-    )
-    spec_call = spec[:, x_start:x_end]
-    second_half = spec_call[:, int(spec_call.shape[1] / 2) :]
-    power_per_freq_band = np.sum(second_half, axis=1)
-    max_power_ind = np.argmax(power_per_freq_band)
-    return int(
-        convert_int_to_freq(
-            max_power_ind,
-            spec.shape[0],
-            min_freq,
-            max_freq,
-        )
-    )
-
-
-def compute_call_interval(
-    prediction: types.Prediction,
-    previous: Optional[types.Prediction] = None,
-    **_,
-) -> float:
-    """Compute time between this call and the previous call in seconds."""
-    if previous is None:
-        return np.nan
-    return round(prediction["start_time"] - previous["end_time"], 5)
-
-
-# NOTE: The order of the features in this dictionary is important. The
-# features are extracted in this order and the order of the columns in the
-# output csv file is determined by this order. In order to avoid breaking
-# changes in the output csv file, new features should be added to the end of
-# this dictionary.
-FEATURES: Dict[str, types.FeatureExtractor] = {
-    "duration": compute_duration,
-    "low_freq_bb": compute_low_freq,
-    "high_freq_bb": compute_high_freq,
-    "bandwidth": compute_bandwidth,
-    "max_power_bb": compute_max_power_bb,
-    "max_power": compute_max_power,
-    "max_power_first": compute_max_power_first,
-    "max_power_second": compute_max_power_second,
-    "call_interval": compute_call_interval,
-}
-
-
-def get_feats(
-    spec: np.ndarray,
-    pred_nms: types.PredictionResults,
-    params: types.FeatureExtractionParameters,
-):
-    """Extract features from spectrogram based on detected call locations.
-
-    The features extracted are:
-
-    - duration: duration of call in seconds
-    - low_freq: lowest frequency in call in kHz
-    - high_freq: highest frequency in call in kHz
-    - bandwidth: high_freq - low_freq
-    - max_power_bb: frequency with maximum power in call in kHz
-    - max_power: frequency with maximum power in spectrogram in kHz
-    - max_power_first: frequency with maximum power in first half of call in
-    kHz.
-    - max_power_second: frequency with maximum power in second half of call in
-    kHz.
-    - call_interval: time between this call and the previous call in seconds
-
-    Consider re-extracting spectrogram for this to get better temporal
-    resolution.
+    Extracts features from spectrogram based on detected call locations.
+    Condsider re-extracting spectrogram for this to get better temporal resolution.

    For more possible features check out:
    https://github.com/YvesBas/Tadarida-D/blob/master/Manual_Tadarida-D.odt
-
-    Parameters
-    ----------
-    spec : np.ndarray
-        Spectrogram from which to extract features.
-
-    pred_nms : types.PredictionResults
-        Information about detected calls from which to extract features.
-
-    params : types.FeatureExtractionParameters
-        Parameters for feature extraction.
-
-    Returns
-    -------
-    features : np.ndarray
-        Extracted features for each detected call. Shape is
-        (num_detections, num_features).
    """
+
+    x_pos = pred_nms["x_pos"]
+    y_pos = pred_nms["y_pos"]
+    bb_width = pred_nms["bb_width"]
+    bb_height = pred_nms["bb_height"]
+
+    feature_names = get_feature_names()
    num_detections = len(pred_nms["det_probs"])
-    features = np.empty((num_detections, len(FEATURES)), dtype=np.float32)
-    previous = None
+    features = (
+        np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1
+    )

-    for row in range(num_detections):
-        prediction: types.Prediction = {
-            "det_prob": float(pred_nms["det_probs"][row]),
-            "class_prob": pred_nms["class_probs"][:, row],
-            "start_time": float(pred_nms["start_times"][row]),
-            "end_time": float(pred_nms["end_times"][row]),
-            "low_freq": float(pred_nms["low_freqs"][row]),
-            "high_freq": float(pred_nms["high_freqs"][row]),
-            "x_pos": int(pred_nms["x_pos"][row]),
-            "y_pos": int(pred_nms["y_pos"][row]),
-            "bb_width": int(pred_nms["bb_width"][row]),
-            "bb_height": int(pred_nms["bb_height"][row]),
-        }
+    for ff in range(num_detections):
+        x_start = int(np.maximum(0, x_pos[ff]))
+        x_end = int(
+            np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff]))
+        )
+        # y low is the lowest freq but it will have a higher value due to array starting at 0 at top
+        y_low = int(np.minimum(spec.shape[0] - 1, y_pos[ff]))
+        y_high = int(np.maximum(0, np.round(y_pos[ff] - bb_height[ff])))
+        spec_slice = spec[:, x_start:x_end]

-        for col, feature in enumerate(FEATURES.values()):
-            features[row, col] = feature(
-                prediction,
-                previous=previous,
-                spec=spec,
-                **params,
+        if spec_slice.shape[1] > 1:
+            features[ff, 0] = round(
+                pred_nms["end_times"][ff] - pred_nms["start_times"][ff], 5
+            )
+            features[ff, 1] = int(pred_nms["low_freqs"][ff])
+            features[ff, 2] = int(pred_nms["high_freqs"][ff])
+            features[ff, 3] = int(
+                pred_nms["high_freqs"][ff] - pred_nms["low_freqs"][ff]
+            )
+            features[ff, 4] = int(
+                convert_int_to_freq(
+                    y_high + spec_slice[y_high:y_low, :].sum(1).argmax(),
+                    spec.shape[0],
+                    params["min_freq"],
+                    params["max_freq"],
+                )
+            )
+            features[ff, 5] = int(
+                convert_int_to_freq(
+                    spec_slice.sum(1).argmax(),
+                    spec.shape[0],
+                    params["min_freq"],
+                    params["max_freq"],
+                )
+            )
+            hlf_val = spec_slice.shape[1] // 2
+
+            features[ff, 6] = int(
+                convert_int_to_freq(
+                    spec_slice[:, :hlf_val].sum(1).argmax(),
+                    spec.shape[0],
+                    params["min_freq"],
+                    params["max_freq"],
+                )
+            )
+            features[ff, 7] = int(
+                convert_int_to_freq(
+                    spec_slice[:, hlf_val:].sum(1).argmax(),
+                    spec.shape[0],
+                    params["min_freq"],
+                    params["max_freq"],
+                )
            )

-        previous = prediction
+            if ff > 0:
+                features[ff, 8] = round(
+                    pred_nms["start_times"][ff]
+                    - pred_nms["start_times"][ff - 1],
+                    5,
+                )

    return features
-
-
-def get_feature_names():
-    """Get names of features in the order they are extracted."""
-    return list(FEATURES.keys())
--- a/batdetect2/train/train_utils.py
+++ b/batdetect2/train/train_utils.py
@ -1,5 +1,7 @@
 import glob
 import json
+import os
+import random

 import numpy as np

--- a/batdetect2/types.py
+++ b/batdetect2/types.py
@ -1,10 +1,5 @@
 """Types used in the code base."""
-
-from typing import List, NamedTuple, Optional, Union, Any, BinaryIO
-
-import audioread
-import os 
-import soundfile as sf
+from typing import List, NamedTuple, Optional

 import numpy as np
 import torch
@ -22,7 +17,7 @@ except ImportError:


 try:
-    from typing import NotRequired  # type: ignore
+    from typing import NotRequired
 except ImportError:
    from typing_extensions import NotRequired

@ -30,13 +25,10 @@ except ImportError:
 __all__ = [
    "Annotation",
    "DetectionModel",
-    "FeatureExtractionParameters",
-    "FeatureExtractor",
    "FileAnnotations",
    "ModelOutput",
    "ModelParameters",
    "NonMaximumSuppressionConfig",
-    "Prediction",
    "PredictionResults",
    "ProcessingConfiguration",
    "ResultParams",
@ -44,9 +36,6 @@ __all__ = [
    "SpectrogramParameters",
 ]

-AudioPath =  Union[
-        str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
-    ]

 class SpectrogramParameters(TypedDict):
    """Parameters for generating spectrograms."""
@ -323,40 +312,6 @@ class ModelOutput(NamedTuple):
    """Tensor with intermediate features."""


-class Prediction(TypedDict):
-    """Singe prediction."""
-
-    det_prob: float
-    """Detection probability."""
-
-    x_pos: int
-    """X position of the detection in pixels."""
-
-    y_pos: int
-    """Y position of the detection in pixels."""
-
-    bb_width: int
-    """Width of the detection in pixels."""
-
-    bb_height: int
-    """Height of the detection in pixels."""
-
-    start_time: float
-    """Start time of the detection in seconds."""
-
-    end_time: float
-    """End time of the detection in seconds."""
-
-    low_freq: float
-    """Low frequency of the detection in Hz."""
-
-    high_freq: float
-    """High frequency of the detection in Hz."""
-
-    class_prob: np.ndarray
-    """Vector holding the probability of each class."""
-
-
 class PredictionResults(TypedDict):
    """Results of the prediction.

@ -463,16 +418,6 @@ class NonMaximumSuppressionConfig(TypedDict):
    """Threshold for detection probability."""


-class FeatureExtractionParameters(TypedDict):
-    """Parameters that control the feature extraction function."""
-
-    min_freq: int
-    """Minimum frequency to consider in Hz."""
-
-    max_freq: int
-    """Maximum frequency to consider in Hz."""
-
-
 class HeatmapParameters(TypedDict):
    """Parameters that control the heatmap generation function."""

@ -528,11 +473,3 @@ class AnnotationGroup(TypedDict):

    y_inds: NotRequired[np.ndarray]
    """Y coordinate of the annotations in the spectrogram."""
-
-
-class FeatureExtractor(Protocol):
-    """Protocol for feature extractors."""
-
-    def __call__(self, prediction: Prediction, **kwargs) -> Union[float, int]:
-        """Extract features from a prediction."""
-        ...
--- a/batdetect2/utils/audio_utils.py
+++ b/batdetect2/utils/audio_utils.py
@ -1,67 +1,34 @@
 import warnings
-from typing import Optional, Tuple, Union, Any, BinaryIO
-
-from ..types import AudioPath
+from typing import Optional, Tuple

 import librosa
 import librosa.core.spectrum
 import numpy as np
 import torch

-import audioread
-import os 
-import soundfile as sf
-
-from batdetect2.detector import parameters
-
 from . import wavfile

 __all__ = [
    "load_audio",
-    "load_audio_and_samplerate",
    "generate_spectrogram",
    "pad_audio",
 ]


-def time_to_x_coords(
-    time_in_file: float,
-    samplerate: float = parameters.TARGET_SAMPLERATE_HZ,
-    window_duration: float = parameters.FFT_WIN_LENGTH_S,
-    window_overlap: float = parameters.FFT_OVERLAP,
-) -> float:
-    nfft = np.floor(window_duration * samplerate)  # int() uses floor
-    noverlap = np.floor(window_overlap * nfft)
-    return (time_in_file * samplerate - noverlap) / (nfft - noverlap)
+def time_to_x_coords(time_in_file, sampling_rate, fft_win_length, fft_overlap):
+    nfft = np.floor(fft_win_length * sampling_rate)  # int() uses floor
+    noverlap = np.floor(fft_overlap * nfft)
+    return (time_in_file * sampling_rate - noverlap) / (nfft - noverlap)


-def x_coords_to_time(
-    x_pos: int,
-    samplerate: float = parameters.TARGET_SAMPLERATE_HZ,
-    window_duration: float = parameters.FFT_WIN_LENGTH_S,
-    window_overlap: float = parameters.FFT_OVERLAP,
-) -> float:
-    n_fft = np.floor(window_duration * samplerate)
-    n_overlap = np.floor(window_overlap * n_fft)
-    n_step = n_fft - n_overlap
-    return ((x_pos * n_step) + n_overlap) / samplerate
+# NOTE this is also defined in post_process
+def x_coords_to_time(x_pos, sampling_rate, fft_win_length, fft_overlap):
+    nfft = np.floor(fft_win_length * sampling_rate)
+    noverlap = np.floor(fft_overlap * nfft)
+    return ((x_pos * (nfft - noverlap)) + noverlap) / sampling_rate
    # return (1.0 - fft_overlap) * fft_win_length * (x_pos + 0.5)  # 0.5 is for center of temporal window


-def x_coord_to_sample(
-    x_pos: int,
-    samplerate: float = parameters.TARGET_SAMPLERATE_HZ,
-    window_duration: float = parameters.FFT_WIN_LENGTH_S,
-    window_overlap: float = parameters.FFT_OVERLAP,
-    resize_factor: float = parameters.RESIZE_FACTOR,
-) -> int:
-    n_fft = np.floor(window_duration * samplerate)
-    n_overlap = np.floor(window_overlap * n_fft)
-    n_step = n_fft - n_overlap
-    x_pos = int(x_pos / resize_factor)
-    return int((x_pos * n_step) + n_overlap)
-
-
 def generate_spectrogram(
    audio,
    sampling_rate,
@ -147,20 +114,21 @@ def generate_spectrogram(

    return spec, spec_for_viz

+
 def load_audio(
-    path: AudioPath,
+    audio_file: str,
    time_exp_fact: float,
    target_samp_rate: int,
    scale: bool = False,
    max_duration: Optional[float] = None,
-) -> Tuple[int, np.ndarray ]:
+) -> Tuple[int, np.ndarray]:
    """Load an audio file and resample it to the target sampling rate.

    The audio is also scaled to [-1, 1] and clipped to the maximum duration.
    Only mono files are supported.

    Args:
-        path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
+        audio_file (str): Path to the audio file.
        target_samp_rate (int): Target sampling rate.
        scale (bool): Whether to scale the audio to [-1, 1].
        max_duration (float): Maximum duration of the audio in seconds.
@ -172,50 +140,20 @@ def load_audio(
    Raises:
        ValueError: If the audio file is stereo.

-    """
-    sample_rate, audio_data, _ = load_audio_and_samplerate(path, time_exp_fact, target_samp_rate, scale, max_duration)
-    return sample_rate, audio_data
-
-def load_audio_and_samplerate(
-    path: AudioPath,
-    time_exp_fact: float,
-    target_samp_rate: int,
-    scale: bool = False,
-    max_duration: Optional[float] = None,
-) -> Tuple[int, np.ndarray, Union[float, int]]:
-    """Load an audio file and resample it to the target sampling rate.
-
-    The audio is also scaled to [-1, 1] and clipped to the maximum duration.
-    Only mono files are supported.
-
-    Args:
-        path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
-        target_samp_rate (int): Target sampling rate.
-        scale (bool): Whether to scale the audio to [-1, 1].
-        max_duration (float): Maximum duration of the audio in seconds.
-
-    Returns:
-        sampling_rate: The sampling rate of the audio.
-        audio_raw: The audio signal in a numpy array.
-        file_sampling_rate: The original sampling rate of the audio
-
-    Raises:
-        ValueError: If the audio file is stereo.
-
    """
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=wavfile.WavFileWarning)
        # sampling_rate, audio_raw = wavfile.read(audio_file)
-        audio_raw, file_sampling_rate = librosa.load(
-            path,
+        audio_raw, sampling_rate = librosa.load(
+            audio_file,
            sr=None,
            dtype=np.float32,
        )
-    
+
    if len(audio_raw.shape) > 1:
        raise ValueError("Currently does not handle stereo files")

-    sampling_rate = file_sampling_rate * time_exp_fact
+    sampling_rate = sampling_rate * time_exp_fact

    # resample - need to do this after correcting for time expansion
    sampling_rate_old = sampling_rate
@ -243,121 +181,58 @@ def load_audio_and_samplerate(
        audio_raw = audio_raw - audio_raw.mean()
        audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6)

-    return sampling_rate, audio_raw, file_sampling_rate
-
-
-def compute_spectrogram_width(
-    length: int,
-    samplerate: int = parameters.TARGET_SAMPLERATE_HZ,
-    window_duration: float = parameters.FFT_WIN_LENGTH_S,
-    window_overlap: float = parameters.FFT_OVERLAP,
-    resize_factor: float = parameters.RESIZE_FACTOR,
-) -> int:
-    n_fft = int(window_duration * samplerate)
-    n_overlap = int(window_overlap * n_fft)
-    n_step = n_fft - n_overlap
-    width = (length - n_overlap) // n_step
-    return int(width * resize_factor)
+    return sampling_rate, audio_raw


 def pad_audio(
-    audio: np.ndarray,
-    samplerate: int = parameters.TARGET_SAMPLERATE_HZ,
-    window_duration: float = parameters.FFT_WIN_LENGTH_S,
-    window_overlap: float = parameters.FFT_OVERLAP,
-    resize_factor: float = parameters.RESIZE_FACTOR,
-    divide_factor: int = parameters.SPEC_DIVIDE_FACTOR,
-    fixed_width: Optional[int] = None,
+    audio_raw,
+    fs,
+    ms,
+    overlap_perc,
+    resize_factor,
+    divide_factor,
+    fixed_width=None,
 ):
-    """Pad audio to be evenly divisible by `divide_factor`.
+    # Adds zeros to the end of the raw data so that the generated sepctrogram
+    # will be evenly divisible by `divide_factor`
+    # Also deals with very short audio clips and fixed_width during training

-    This function pads the audio signal with zeros to ensure that the
-    generated spectrogram length will be evenly divisible by `divide_factor`.
-    This is important for the model to work correctly.
+    # This code could be clearer, clean up
+    nfft = int(ms * fs)
+    noverlap = int(overlap_perc * nfft)
+    step = nfft - noverlap
+    min_size = int(divide_factor * (1.0 / resize_factor))
+    spec_width = (audio_raw.shape[0] - noverlap) // step
+    spec_width_rs = spec_width * resize_factor

-    This `divide_factor` comes from the model architecture as it downscales
-    the spectrogram by this factor, so the input must be divisible by this
-    integer number.
-
-    Parameters
-    ----------
-    audio : np.ndarray
-        The audio signal.
-    samplerate : int
-        The sampling rate of the audio signal.
-    window_size : float
-        The window size in seconds used for the spectrogram computation.
-    window_overlap : float
-        The overlap between windows in the spectrogram computation.
-    resize_factor : float
-        This factor is used to resize the spectrogram after the STFT
-        computation. Default is 0.5 which means that the spectrogram will be
-        reduced by half. Important to take into account for the final size of
-        the spectrogram.
-    divide_factor : int
-        The factor by which the spectrogram will be divided.
-    fixed_width : int, optional
-        If provided, the audio will be padded or cut so that the resulting
-        spectrogram width will be equal to this value.
-
-    Returns
-    -------
-    np.ndarray
-        The padded audio signal.
-    """
-    spec_width = compute_spectrogram_width(
-        audio.shape[0],
-        samplerate=samplerate,
-        window_duration=window_duration,
-        window_overlap=window_overlap,
-        resize_factor=resize_factor,
-    )
-
-    if fixed_width:
-        target_samples = x_coord_to_sample(
-            fixed_width,
-            samplerate=samplerate,
-            window_duration=window_duration,
-            window_overlap=window_overlap,
-            resize_factor=resize_factor,
+    if fixed_width is not None and spec_width < fixed_width:
+        # too small
+        # used during training to ensure all the batches are the same size
+        diff = fixed_width * step + noverlap - audio_raw.shape[0]
+        audio_raw = np.hstack(
+            (audio_raw, np.zeros(diff, dtype=audio_raw.dtype))
        )

-        if spec_width < fixed_width:
-            # need to be at least min_size
-            diff = target_samples - audio.shape[0]
-            return np.hstack((audio, np.zeros(diff, dtype=audio.dtype)))
+    elif fixed_width is not None and spec_width > fixed_width:
+        # too big
+        # used during training to ensure all the batches are the same size
+        diff = fixed_width * step + noverlap - audio_raw.shape[0]
+        audio_raw = audio_raw[:diff]

-        if spec_width > fixed_width:
-            return audio[:target_samples]
-
-        return audio
-
-    min_width = int(divide_factor / resize_factor)
-
-    if spec_width < min_width:
-        target_samples = x_coord_to_sample(
-            min_width,
-            samplerate=samplerate,
-            window_duration=window_duration,
-            window_overlap=window_overlap,
-            resize_factor=resize_factor,
+    elif (
+        spec_width_rs < min_size
+        or (np.floor(spec_width_rs) % divide_factor) != 0
+    ):
+        # need to be at least min_size
+        div_amt = np.ceil(spec_width_rs / float(divide_factor))
+        div_amt = np.maximum(1, div_amt)
+        target_size = int(div_amt * divide_factor * (1.0 / resize_factor))
+        diff = target_size * step + noverlap - audio_raw.shape[0]
+        audio_raw = np.hstack(
+            (audio_raw, np.zeros(diff, dtype=audio_raw.dtype))
        )
-        diff = target_samples - audio.shape[0]
-        return np.hstack((audio, np.zeros(diff, dtype=audio.dtype)))

-    if (spec_width % divide_factor) == 0:
-        return audio
-
-    target_width = int(np.ceil(spec_width / divide_factor)) * divide_factor
-    target_samples = x_coord_to_sample(
-        target_width,
-        samplerate=samplerate,
-        window_duration=window_duration,
-        window_overlap=window_overlap,
-        resize_factor=resize_factor,
-    )
-    diff = target_samples - audio.shape[0]
-    return np.hstack((audio, np.zeros(diff, dtype=audio.dtype)))
+    return audio_raw


 def gen_mag_spectrogram(x, fs, ms, overlap_perc):
@ -372,11 +247,7 @@ def gen_mag_spectrogram(x, fs, ms, overlap_perc):

    # compute spec
    spec, _ = librosa.core.spectrum._spectrogram(
-        y=x,
-        power=1,
-        n_fft=nfft,
-        hop_length=step,
-        center=False,
+        y=x, power=1, n_fft=nfft, hop_length=step, center=False
    )

    # remove DC component and flip vertical orientation
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@ -1,19 +1,12 @@
 import json
 import os
-from typing import Any, Iterator, List, Optional, Tuple, Union, BinaryIO
-
-from ..types import AudioPath
+from typing import Any, Iterator, List, Optional, Tuple, Union

 import numpy as np
 import pandas as pd
 import torch
 import torch.nn.functional as F

-try:
-    from numpy.exceptions import AxisError
-except ImportError:
-    from numpy import AxisError  # type: ignore
-
 import batdetect2.detector.compute_features as feats
 import batdetect2.detector.post_process as pp
 import batdetect2.utils.audio_utils as au
@ -32,13 +25,6 @@ from batdetect2.types import (
    SpectrogramParameters,
 )

-import audioread
-import os 
-import io
-import soundfile as sf
-import hashlib
-import uuid
-
 __all__ = [
    "load_model",
    "list_audio_files",
@ -80,6 +66,7 @@ def list_audio_files(ip_dir: str) -> List[str]:

    Raises:
        FileNotFoundError: Input directory not found.
+
    """
    matches = []
    for root, _, filenames in os.walk(ip_dir):
@ -93,7 +80,6 @@ def load_model(
    model_path: str = DEFAULT_MODEL_PATH,
    load_weights: bool = True,
    device: Optional[torch.device] = None,
-    weights_only: bool = True,
 ) -> Tuple[DetectionModel, ModelParameters]:
    """Load model from file.

@ -114,11 +100,7 @@ def load_model(
    if not os.path.isfile(model_path):
        raise FileNotFoundError("Model file not found.")

-    net_params = torch.load(
-        model_path,
-        map_location=device,
-        weights_only=weights_only,
-    )
+    net_params = torch.load(model_path, map_location=device)

    params = net_params["params"]

@ -161,19 +143,7 @@ def load_model(


 def _merge_results(predictions, spec_feats, cnn_feats, spec_slices):
-    predictions_m = {
-        "det_probs": np.array([]),
-        "x_pos": np.array([]),
-        "y_pos": np.array([]),
-        "bb_widths": np.array([]),
-        "bb_heights": np.array([]),
-        "start_times": np.array([]),
-        "end_times": np.array([]),
-        "low_freqs": np.array([]),
-        "high_freqs": np.array([]),
-        "class_probs": np.array([]),
-    }
-
+    predictions_m = {}
    num_preds = np.sum([len(pp["det_probs"]) for pp in predictions])

    if num_preds > 0:
@ -181,6 +151,10 @@ def _merge_results(predictions, spec_feats, cnn_feats, spec_slices):
            predictions_m[key] = np.hstack(
                [pp[key] for pp in predictions if pp["det_probs"].shape[0] > 0]
            )
+    else:
+        # hack in case where no detected calls as we need some of the key
+        # names in dict
+        predictions_m = predictions[0]

    if len(spec_feats) > 0:
        spec_feats = np.vstack(spec_feats)
@ -252,19 +226,11 @@ def format_single_result(
    Returns:
        dict: Results in the format expected by the annotation tool.
    """
-    try:
-        # Get a single class prediction for the file
-        class_overall = pp.overall_class_pred(
-            predictions["det_probs"],
-            predictions["class_probs"],
-        )
-        class_name = class_names[np.argmax(class_overall)]
-        annotations = get_annotations_from_preds(predictions, class_names)
-    except (AxisError, ValueError):
-        # No detections
-        class_overall = np.zeros(len(class_names))
-        class_name = "None"
-        annotations = []
+    # Get a single class prediction for the file
+    class_overall = pp.overall_class_pred(
+        predictions["det_probs"],
+        predictions["class_probs"],
+    )

    return {
        "id": file_id,
@ -273,8 +239,8 @@ def format_single_result(
        "notes": "Automatically generated.",
        "time_exp": time_exp,
        "duration": round(float(duration), 4),
-        "annotation": annotations,
-        "class_name": class_name,
+        "annotation": get_annotations_from_preds(predictions, class_names),
+        "class_name": class_names[np.argmax(class_overall)],
    }


@ -287,7 +253,6 @@ def convert_results(
    spec_feats,
    cnn_feats,
    spec_slices,
-    nyquist_freq: Optional[float] = None,
 ) -> RunResults:
    """Convert results to dictionary as expected by the annotation tool.

@ -303,8 +268,8 @@ def convert_results(

    Returns:
        dict: Dictionary with results.
-    """

+    """
    pred_dict = format_single_result(
        file_id,
        time_exp,
@ -313,14 +278,6 @@ def convert_results(
        params["class_names"],
    )

-    # Remove high frequency detections
-    if nyquist_freq is not None:
-        pred_dict["annotation"] = [
-            pred
-            for pred in pred_dict["annotation"]
-            if pred["high_freq"] <= nyquist_freq
-        ]
-
    # combine into final results dictionary
    results: RunResults = {
        "pred_dict": pred_dict,
@ -353,6 +310,7 @@ def save_results_to_file(results, op_path: str) -> None:
    Args:
        results (dict): Results.
        op_path (str): Output path.
+
    """
    # make directory if it does not exist
    if not os.path.isdir(os.path.dirname(op_path)):
@ -514,6 +472,7 @@ def iterate_over_chunks(
    chunk_start : float
        Start time of chunk in seconds.
    chunk : np.ndarray
+
    """
    nsamples = audio.shape[0]
    duration_full = nsamples / samplerate
@ -719,6 +678,7 @@ def process_audio_array(
        The array is of shape (num_detections, num_features).
    spec : torch.Tensor
        Spectrogram of the audio used as input.
+
    """
    pred_nms, features, spec = _process_audio_array(
        audio,
@ -737,11 +697,10 @@ def process_audio_array(


 def process_file(
-    path: AudioPath,
+    audio_file: str,
    model: DetectionModel,
    config: ProcessingConfiguration,
    device: torch.device,
-    file_id: Optional[str] = None
 ) -> Union[RunResults, Any]:
    """Process a single audio file with detection model.

@ -750,7 +709,7 @@ def process_file(

    Parameters
    ----------
-    path : AudioPath
+    audio_file : str
        Path to audio file.

    model : torch.nn.Module
@ -758,9 +717,6 @@ def process_file(

    config : ProcessingConfiguration
        Configuration for processing.
-    
-    file_id: Optional[str],
-        Give the data an id. Defaults to the filename if path is a string. Otherwise an md5 will be calculated from the binary data.

    Returns
    -------
@ -775,16 +731,15 @@ def process_file(
    spec_slices = []

    # load audio file
-    sampling_rate, audio_full, file_samp_rate = au.load_audio_and_samplerate(
-        path,
+    print("time_exp_fact", config.get("time_expansion", 1) or 1)
+    sampling_rate, audio_full = au.load_audio(
+        audio_file,
        time_exp_fact=config.get("time_expansion", 1) or 1,
        target_samp_rate=config["target_samp_rate"],
        scale=config["scale_raw_audio"],
        max_duration=config.get("max_duration"),
    )

-    orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)
-
    # loop through larger file and split into chunks
    # TODO: fix so that it overlaps correctly and takes care of
    # duplicate detections at borders
@ -803,7 +758,7 @@ def process_file(
        )

        # convert to numpy
-        spec_np = spec.detach().cpu().numpy().squeeze()
+        spec_np = spec.detach().cpu().numpy()

        # add chunk time to start and end times
        pred_nms["start_times"] += chunk_time
@ -823,7 +778,9 @@ def process_file(

        if config["spec_slices"]:
            # FIX: This is not currently working. Returns empty slices
-            spec_slices.extend(feats.extract_spec_slices(spec_np, pred_nms))
+            spec_slices.extend(
+                feats.extract_spec_slices(spec_np, pred_nms, config)
+            )

    # Merge results from chunks
    predictions, spec_feats, cnn_feats, spec_slices = _merge_results(
@ -833,13 +790,9 @@ def process_file(
        spec_slices,
    )

-    _file_id = file_id
-    if _file_id is None:
-        _file_id = _generate_id(path)
-
    # convert results to a dictionary in the right format
    results = convert_results(
-        file_id=_file_id,
+        file_id=os.path.basename(audio_file),
        time_exp=config.get("time_expansion", 1) or 1,
        duration=audio_full.shape[0] / float(sampling_rate),
        params=config,
@ -847,7 +800,6 @@ def process_file(
        spec_feats=spec_feats,
        cnn_feats=cnn_feats,
        spec_slices=spec_slices,
-        nyquist_freq=orig_samp_rate / 2,
    )

    # summarize results
@ -859,22 +811,6 @@ def process_file(

    return results

-def _generate_id(path: AudioPath) -> str:
-    """ Generate an id based on the path.
-    
-    If the path is a str or PathLike it will parsed as the basename. 
-    This should ensure backwards compatibility with previous versions.     
-    """
-    if isinstance(path, str) or isinstance(path, os.PathLike):
-        return os.path.basename(path)
-    elif isinstance(path, (BinaryIO, io.BytesIO)):
-        path.seek(0)
-        md5 = hashlib.md5(path.read()).hexdigest()
-        path.seek(0)
-        return md5
-    else:
-        return str(uuid.uuid4())
-

 def summarize_results(results, predictions, config):
    """Print summary of results."""
--- a/pdm.lock
+++ b/pdm.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,82 +1,82 @@
+[tool.pdm]
+[tool.pdm.dev-dependencies]
+dev = [
+    "pytest>=7.2.2",
+]
+
 [project]
 name = "batdetect2"
-version = "1.3.0"
+version = "1.0.2"
 description = "Deep learning model for detecting and classifying bat echolocation calls in high frequency audio recordings."
 authors = [
-  { "name" = "Oisin Mac Aodha", "email" = "oisin.macaodha@ed.ac.uk" },
-  { "name" = "Santiago Martinez Balvanera", "email" = "santiago.balvanera.20@ucl.ac.uk" },
+    { "name" = "Oisin Mac Aodha", "email" = "oisin.macaodha@ed.ac.uk" },
+    { "name" = "Santiago Martinez Balvanera", "email" = "santiago.balvanera.20@ucl.ac.uk" }
 ]
 dependencies = [
-    "click>=8.1.7",
-    "librosa>=0.10.1",
-    "matplotlib>=3.7.1",
-    "numpy>=1.23.5",
-    "pandas>=1.5.3",
-    "scikit-learn>=1.2.2",
-    "scipy>=1.10.1",
-    "torch>=1.13.1,<2.5.0",
-    "torchaudio>=1.13.1,<2.5.0",
-    "torchvision>=0.14.0",
+    "librosa",
+    "matplotlib",
+    "numpy",
+    "pandas",
+    "scikit-learn",
+    "scipy",
+    "torch>=1.13.1,<2",
+    "torchaudio",
+    "torchvision",
+    "click",
 ]
-requires-python = ">=3.9,<3.13"
+requires-python = ">=3.8,<3.11"
 readme = "README.md"
 license = { text = "CC-by-nc-4" }
 classifiers = [
-  "Development Status :: 4 - Beta",
-  "Intended Audience :: Science/Research",
-  "Natural Language :: English",
-  "Operating System :: OS Independent",
-  "Programming Language :: Python :: 3.9",
-  "Programming Language :: Python :: 3.10",
-  "Programming Language :: Python :: 3.11",
-  "Programming Language :: Python :: 3.12",
-  "Topic :: Scientific/Engineering :: Artificial Intelligence",
-  "Topic :: Software Development :: Libraries :: Python Modules",
-  "Topic :: Multimedia :: Sound/Audio :: Analysis",
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Science/Research",
+    "Natural Language :: English",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Multimedia :: Sound/Audio :: Analysis",
 ]
 keywords = [
-  "bat",
-  "echolocation",
-  "deep learning",
-  "audio",
-  "machine learning",
-  "classification",
-  "detection",
+    "bat",
+    "echolocation",
+    "deep learning",
+    "audio",
+    "machine learning",
+    "classification",
+    "detection",
 ]

 [build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
+requires = ["pdm-pep517>=1.0.0"]
+build-backend = "pdm.pep517.api"

 [project.scripts]
 batdetect2 = "batdetect2.cli:cli"

-[tool.uv]
-dev-dependencies = [
-    "debugpy>=1.8.8",
-    "hypothesis>=6.118.7",
-    "pyright>=1.1.388",
-    "pytest>=7.2.2",
-    "ruff>=0.7.3",
+[tool.black]
+line-length = 80
+
+[[tool.mypy.overrides]]
+module = [
+    "librosa",
+    "pandas",
 ]
+ignore_missing_imports = true

-[tool.ruff]
-line-length = 79
-target-version = "py39"
+[tool.pylsp-mypy]
+enabled = false
+live_mode = true
+strict = true

-[tool.ruff.format]
-docstring-code-format = true
-docstring-code-line-length = 79
-
-[tool.ruff.lint]
-select = ["E4", "E7", "E9", "F", "B", "Q", "I", "NPY201"]
-
-[tool.ruff.lint.pydocstyle]
+[tool.pydocstyle]
 convention = "numpy"

 [tool.pyright]
-include = ["batdetect2", "tests"]
+include = [
+    "bat_detect",
+    "tests",
+]
 venvPath = "."
 venv = ".venv"
-pythonVersion = "3.9"
-pythonPlatform = "All"
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,40 +0,0 @@
-from pathlib import Path
-from typing import List
-
-import pytest
-
-
-@pytest.fixture
-def example_data_dir() -> Path:
-    pkg_dir = Path(__file__).parent.parent
-    example_data_dir = pkg_dir / "example_data"
-    assert example_data_dir.exists()
-    return example_data_dir
-
-
-@pytest.fixture
-def example_audio_dir(example_data_dir: Path) -> Path:
-    example_audio_dir = example_data_dir / "audio"
-    assert example_audio_dir.exists()
-    return example_audio_dir
-
-
-@pytest.fixture
-def example_audio_files(example_audio_dir: Path) -> List[Path]:
-    audio_files = list(example_audio_dir.glob("*.[wW][aA][vV]"))
-    assert len(audio_files) == 3
-    return audio_files
-
-
-@pytest.fixture
-def data_dir() -> Path:
-    dir = Path(__file__).parent / "data"
-    assert dir.exists()
-    return dir
-
-
-@pytest.fixture
-def contrib_dir(data_dir) -> Path:
-    dir = data_dir / "contrib"
-    assert dir.exists()
-    return dir
--- a/tests/data/20230322_172000_selec2.wav
+++ b/tests/data/20230322_172000_selec2.wav
--- a/tests/data/contrib/jeff37/0166_20240531_223911.wav
+++ b/tests/data/contrib/jeff37/0166_20240531_223911.wav
--- a/tests/data/contrib/jeff37/0166_20240602_225340.wav
+++ b/tests/data/contrib/jeff37/0166_20240602_225340.wav
--- a/tests/data/contrib/jeff37/0166_20240603_033731.wav
+++ b/tests/data/contrib/jeff37/0166_20240603_033731.wav
--- a/tests/data/contrib/jeff37/0166_20240603_033937.wav
+++ b/tests/data/contrib/jeff37/0166_20240603_033937.wav
--- a/tests/data/contrib/jeff37/0166_20240604_233500.wav
+++ b/tests/data/contrib/jeff37/0166_20240604_233500.wav
--- a/tests/data/contrib/padpadpadpad/Audiomoth.WAV
+++ b/tests/data/contrib/padpadpadpad/Audiomoth.WAV
--- a/tests/data/contrib/padpadpadpad/AudiomothNoBatCalls.WAV
+++ b/tests/data/contrib/padpadpadpad/AudiomothNoBatCalls.WAV
--- a/tests/data/contrib/padpadpadpad/Echometer.wav
+++ b/tests/data/contrib/padpadpadpad/Echometer.wav
--- a/tests/test_api.py
+++ b/tests/test_api.py
@ -2,21 +2,17 @@

 import os
 from glob import glob
-from pathlib import Path

 import numpy as np
-import soundfile as sf
 import torch
 from torch import nn

 from batdetect2 import api
-import io 

 PKG_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 TEST_DATA_DIR = os.path.join(PKG_DIR, "example_data", "audio")
 TEST_DATA = glob(os.path.join(TEST_DATA_DIR, "*.wav"))

-DATA_DIR = os.path.join(os.path.dirname(__file__), "data")

 def test_load_model_with_default_params():
    """Test loading model with default parameters."""
@ -266,44 +262,3 @@ def test_process_file_with_spec_slices():
    assert "spec_slices" in results
    assert isinstance(results["spec_slices"], list)
    assert len(results["spec_slices"]) == len(detections)
-
-
-def test_process_file_with_empty_predictions_does_not_fail(
-    tmp_path: Path,
-):
-    """Test process file with empty predictions does not fail."""
-    # Create empty file
-    empty_file = tmp_path / "empty.wav"
-    empty_wav = np.zeros((0, 1), dtype=np.float32)
-    sf.write(empty_file, empty_wav, 256000)
-
-    # Process file
-    results = api.process_file(str(empty_file))
-
-    assert results is not None
-    assert len(results["pred_dict"]["annotation"]) == 0
-
-def test_process_file_file_id_defaults_to_basename():
-    """Test that process_file assigns basename as an id if no file_id is provided."""
-    # Recording donated by @@kdarras
-    basename = "20230322_172000_selec2.wav"
-    path = os.path.join(DATA_DIR, basename)
-
-    output = api.process_file(path)
-    predictions = output["pred_dict"]
-    id = predictions["id"]
-    assert id == basename
-
-def test_bytesio_file_id_defaults_to_md5():
-    """Test that process_file assigns an md5 sum as an id if no file_id is provided when using binary data."""
-    # Recording donated by @@kdarras
-    basename = "20230322_172000_selec2.wav"
-    path = os.path.join(DATA_DIR, basename)
-
-    with open(path, "rb") as f:
-        data = io.BytesIO(f.read())
-
-    output = api.process_file(data)
-    predictions = output["pred_dict"]
-    id = predictions["id"]
-    assert id == "7ade9ebf1a9fe5477ff3a2dc57001929"
--- a/tests/test_audio_utils.py
+++ b/tests/test_audio_utils.py
@ -1,156 +0,0 @@
-import numpy as np
-import torch
-import torch.nn.functional as F
-from hypothesis import given
-from hypothesis import strategies as st
-
-from batdetect2.detector import parameters
-from batdetect2.utils import audio_utils, detector_utils
-import io
-import os
-
-DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
-
-@given(duration=st.floats(min_value=0.1, max_value=2))
-def test_can_compute_correct_spectrogram_width(duration: float):
-    samplerate = parameters.TARGET_SAMPLERATE_HZ
-    params = parameters.DEFAULT_SPECTROGRAM_PARAMETERS
-
-    length = int(duration * samplerate)
-    audio = np.random.rand(length)
-
-    spectrogram, _ = audio_utils.generate_spectrogram(
-        audio,
-        samplerate,
-        params,
-    )
-
-    # convert to pytorch
-    spectrogram = torch.from_numpy(spectrogram)
-
-    # add batch and channel dimensions
-    spectrogram = spectrogram.unsqueeze(0).unsqueeze(0)
-
-    # resize the spec
-    resize_factor = params["resize_factor"]
-    spec_op_shape = (
-        int(params["spec_height"] * resize_factor),
-        int(spectrogram.shape[-1] * resize_factor),
-    )
-    spectrogram = F.interpolate(
-        spectrogram,
-        size=spec_op_shape,
-        mode="bilinear",
-        align_corners=False,
-    )
-
-    expected_width = audio_utils.compute_spectrogram_width(
-        length,
-        samplerate=parameters.TARGET_SAMPLERATE_HZ,
-        window_duration=params["fft_win_length"],
-        window_overlap=params["fft_overlap"],
-        resize_factor=params["resize_factor"],
-    )
-
-    assert spectrogram.shape[-1] == expected_width
-
-
-@given(duration=st.floats(min_value=0.1, max_value=2))
-def test_pad_audio_without_fixed_size(duration: float):
-    # Test the pad_audio function
-    # This function is used to pad audio with zeros to a specific length
-    # It is used in the generate_spectrogram function
-    # The function is tested with a simplepas
-    samplerate = parameters.TARGET_SAMPLERATE_HZ
-    params = parameters.DEFAULT_SPECTROGRAM_PARAMETERS
-
-    length = int(duration * samplerate)
-    audio = np.random.rand(length)
-
-    # pad the audio to be divisible by divide factor
-    padded_audio = audio_utils.pad_audio(
-        audio,
-        samplerate=samplerate,
-        window_duration=params["fft_win_length"],
-        window_overlap=params["fft_overlap"],
-        resize_factor=params["resize_factor"],
-        divide_factor=params["spec_divide_factor"],
-    )
-
-    # check that the padded audio is divisible by the divide factor
-    expected_width = audio_utils.compute_spectrogram_width(
-        len(padded_audio),
-        samplerate=parameters.TARGET_SAMPLERATE_HZ,
-        window_duration=params["fft_win_length"],
-        window_overlap=params["fft_overlap"],
-        resize_factor=params["resize_factor"],
-    )
-
-    assert expected_width % params["spec_divide_factor"] == 0
-
-
-@given(duration=st.floats(min_value=0.1, max_value=2))
-def test_computed_spectrograms_are_actually_divisible_by_the_spec_divide_factor(
-    duration: float,
-):
-    samplerate = parameters.TARGET_SAMPLERATE_HZ
-    params = parameters.DEFAULT_SPECTROGRAM_PARAMETERS
-    length = int(duration * samplerate)
-    audio = np.random.rand(length)
-    _, spectrogram, _ = detector_utils.compute_spectrogram(
-        audio,
-        samplerate,
-        params,
-        torch.device("cpu"),
-    )
-    assert spectrogram.shape[-1] % params["spec_divide_factor"] == 0
-
-
-@given(
-    duration=st.floats(min_value=0.1, max_value=2),
-    width=st.integers(min_value=128, max_value=1024),
-)
-def test_pad_audio_with_fixed_width(duration: float, width: int):
-    samplerate = parameters.TARGET_SAMPLERATE_HZ
-    params = parameters.DEFAULT_SPECTROGRAM_PARAMETERS
-
-    length = int(duration * samplerate)
-    audio = np.random.rand(length)
-
-    # pad the audio to be divisible by divide factor
-    padded_audio = audio_utils.pad_audio(
-        audio,
-        samplerate=samplerate,
-        window_duration=params["fft_win_length"],
-        window_overlap=params["fft_overlap"],
-        resize_factor=params["resize_factor"],
-        divide_factor=params["spec_divide_factor"],
-        fixed_width=width,
-    )
-
-    # check that the padded audio is divisible by the divide factor
-    expected_width = audio_utils.compute_spectrogram_width(
-        len(padded_audio),
-        samplerate=parameters.TARGET_SAMPLERATE_HZ,
-        window_duration=params["fft_win_length"],
-        window_overlap=params["fft_overlap"],
-        resize_factor=params["resize_factor"],
-    )
-    assert expected_width == width
-    
-
-def test_load_audio_using_bytesio():
-    basename = "20230322_172000_selec2.wav"
-    path = os.path.join(DATA_DIR, basename)
-
-    with open(path, "rb") as f:
-        data = io.BytesIO(f.read())
-    
-    sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_and_samplerate(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
-
-    expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_and_samplerate(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
-
-    assert expected_sample_rate == sample_rate
-    assert exp_file_sample_rate == file_sample_rate
-
-    assert np.array_equal(audio_data, expected_audio_data)
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -1,26 +1,20 @@
 """Test the command line interface."""
-
-from pathlib import Path
-
-import pandas as pd
 from click.testing import CliRunner

 from batdetect2.cli import cli

-runner = CliRunner()
-

 def test_cli_base_command():
    """Test the base command."""
+    runner = CliRunner()
    result = runner.invoke(cli, ["--help"])
    assert result.exit_code == 0
-    assert (
-        "BatDetect2 - Bat Call Detection and Classification" in result.output
-    )
+    assert "BatDetect2 - Bat Call Detection and Classification" in result.output


 def test_cli_detect_command_help():
    """Test the detect command help."""
+    runner = CliRunner()
    result = runner.invoke(cli, ["detect", "--help"])
    assert result.exit_code == 0
    assert "Detect bat calls in files in AUDIO_DIR" in result.output
@ -34,6 +28,7 @@ def test_cli_detect_command_on_test_audio(tmp_path):
    if results_dir.exists():
        results_dir.rmdir()

+    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
@ -47,112 +42,3 @@ def test_cli_detect_command_on_test_audio(tmp_path):
    assert results_dir.exists()
    assert len(list(results_dir.glob("*.csv"))) == 3
    assert len(list(results_dir.glob("*.json"))) == 3
-
-
-def test_cli_detect_command_with_non_trivial_time_expansion(tmp_path):
-    """Test the detect command with a non-trivial time expansion factor."""
-    results_dir = tmp_path / "results"
-
-    # Remove results dir if it exists
-    if results_dir.exists():
-        results_dir.rmdir()
-
-    result = runner.invoke(
-        cli,
-        [
-            "detect",
-            "example_data/audio",
-            str(results_dir),
-            "0.3",
-            "--time_expansion_factor",
-            "10",
-        ],
-    )
-
-    assert result.exit_code == 0
-    assert "Time Expansion Factor: 10" in result.stdout
-
-
-def test_cli_detect_command_with_the_spec_feature_flag(tmp_path: Path):
-    """Test the detect command with the spec feature flag."""
-    results_dir = tmp_path / "results"
-
-    # Remove results dir if it exists
-    if results_dir.exists():
-        results_dir.rmdir()
-
-    result = runner.invoke(
-        cli,
-        [
-            "detect",
-            "example_data/audio",
-            str(results_dir),
-            "0.3",
-            "--spec_features",
-        ],
-    )
-    assert result.exit_code == 0
-    assert results_dir.exists()
-
-    csv_files = [path.name for path in results_dir.glob("*.csv")]
-
-    expected_files = [
-        "20170701_213954-MYOMYS-LR_0_0.5.wav_spec_features.csv",
-        "20180530_213516-EPTSER-LR_0_0.5.wav_spec_features.csv",
-        "20180627_215323-RHIFER-LR_0_0.5.wav_spec_features.csv",
-    ]
-
-    for expected_file in expected_files:
-        assert expected_file in csv_files
-
-        df = pd.read_csv(results_dir / expected_file)
-        assert not (df.duration == -1).any()
-
-
-def test_cli_detect_fails_gracefully_on_empty_file(tmp_path: Path):
-    results_dir = tmp_path / "results"
-    target = tmp_path / "audio"
-    target.mkdir()
-
-    # Create an empty file with the .wav extension
-    empty_file = target / "empty.wav"
-    empty_file.touch()
-
-    result = runner.invoke(
-        cli,
-        args=[
-            "detect",
-            str(target),
-            str(results_dir),
-            "0.3",
-            "--spec_features",
-        ],
-    )
-    assert result.exit_code == 0
-    assert f"Error processing file {empty_file}" in result.output
-
-
-def test_can_set_chunk_size(tmp_path: Path):
-    results_dir = tmp_path / "results"
-
-    # Remove results dir if it exists
-    if results_dir.exists():
-        results_dir.rmdir()
-
-    result = runner.invoke(
-        cli,
-        [
-            "detect",
-            "example_data/audio",
-            str(results_dir),
-            "0.3",
-            "--chunk_size",
-            "1",
-        ],
-    )
-
-    assert "Chunk Size: 1.0s" in result.output
-    assert result.exit_code == 0
-    assert results_dir.exists()
-    assert len(list(results_dir.glob("*.csv"))) == 3
-    assert len(list(results_dir.glob("*.json"))) == 3
--- a/tests/test_contrib.py
+++ b/tests/test_contrib.py
@ -1,73 +0,0 @@
-"""Test suite to ensure user provided files are correctly processed."""
-
-from pathlib import Path
-
-from click.testing import CliRunner
-
-from batdetect2.cli import cli
-
-runner = CliRunner()
-
-
-def test_can_process_jeff37_files(
-    contrib_dir: Path,
-    tmp_path: Path,
-):
-    """This test stems from issue #31.
-
-    A user provided a set of files which which batdetect2 cli failed and
-    generated the following error message:
-
-        [2272] "Error processing file!: negative dimensions are not allowed"
-
-    This test ensures that the error message is not generated when running
-    batdetect2 cli with the same set of files.
-    """
-    path = contrib_dir / "jeff37"
-    assert path.exists()
-
-    results_dir = tmp_path / "results"
-    result = runner.invoke(
-        cli,
-        [
-            "detect",
-            str(path),
-            str(results_dir),
-            "0.3",
-        ],
-    )
-    assert result.exit_code == 0
-    assert results_dir.exists()
-    assert len(list(results_dir.glob("*.csv"))) == 5
-    assert len(list(results_dir.glob("*.json"))) == 5
-
-
-def test_can_process_padpadpadpad_files(
-    contrib_dir: Path,
-    tmp_path: Path,
-):
-    """This test stems from issue #29.
-
-    Batdetect2 cli failed on the files provided by the user @padpadpadpad
-    with the following error message:
-
-        AttributeError: module 'numpy' has no attribute 'AxisError'
-
-    This test ensures that the files are processed without any error.
-    """
-    path = contrib_dir / "padpadpadpad"
-    assert path.exists()
-    results_dir = tmp_path / "results"
-    result = runner.invoke(
-        cli,
-        [
-            "detect",
-            str(path),
-            str(results_dir),
-            "0.3",
-        ],
-    )
-    assert result.exit_code == 0
-    assert results_dir.exists()
-    assert len(list(results_dir.glob("*.csv"))) == 2
-    assert len(list(results_dir.glob("*.json"))) == 2
--- a/tests/test_detections.py
+++ b/tests/test_detections.py
@ -1,23 +0,0 @@
-"""Test suite to ensure that model detections are not incorrect."""
-
-import os
-
-from batdetect2 import api
-
-DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
-
-
-def test_no_detections_above_nyquist():
-    """Test that no detections are made above the nyquist frequency."""
-    # Recording donated by @@kdarras
-    path = os.path.join(DATA_DIR, "20230322_172000_selec2.wav")
-
-    # This recording has a sampling rate of 192 kHz
-    nyquist = 192_000 / 2
-
-    output = api.process_file(path)
-    predictions = output["pred_dict"]
-    assert len(predictions["annotation"]) != 0
-    assert all(
-        pred["high_freq"] < nyquist for pred in predictions["annotation"]
-    )
--- a/tests/test_features.py
+++ b/tests/test_features.py
@ -1,291 +0,0 @@
-"""Test suite for feature extraction functions."""
-
-import logging
-
-import librosa
-import numpy as np
-import pytest
-
-import batdetect2.detector.compute_features as feats
-from batdetect2 import api, types
-from batdetect2.utils import audio_utils as au
-
-numba_logger = logging.getLogger("numba")
-numba_logger.setLevel(logging.WARNING)
-
-
-def index_to_freq(
-    index: int,
-    spec_height: int,
-    min_freq: int,
-    max_freq: int,
-) -> float:
-    """Convert spectrogram index to frequency in Hz."""
-    index = spec_height - index
-    return round(
-        (index / float(spec_height)) * (max_freq - min_freq) + min_freq, 2
-    )
-
-
-def index_to_time(
-    index: int,
-    spec_width: int,
-    spec_duration: float,
-) -> float:
-    """Convert spectrogram index to time in seconds."""
-    return round((index / float(spec_width)) * spec_duration, 2)
-
-
-def test_get_feats_function_with_empty_spectrogram():
-    """Test get_feats function with empty spectrogram.
-
-    This tests that the overall flow of the function works, even if the
-    spectrogram is empty.
-    """
-    spec_duration = 3
-    spec_width = 100
-    spec_height = 100
-    min_freq = 10_000
-    max_freq = 120_000
-    spectrogram = np.zeros((spec_height, spec_width))
-
-    x_pos = 20
-    y_pos = 80
-    bb_width = 20
-    bb_height = 20
-
-    start_time = index_to_time(x_pos, spec_width, spec_duration)
-    end_time = index_to_time(x_pos + bb_width, spec_width, spec_duration)
-    low_freq = index_to_freq(y_pos, spec_height, min_freq, max_freq)
-    high_freq = index_to_freq(
-        y_pos - bb_height, spec_height, min_freq, max_freq
-    )
-
-    pred_nms: types.PredictionResults = {
-        "det_probs": np.array([1]),
-        "class_probs": np.array([[1]]),
-        "x_pos": np.array([x_pos]),
-        "y_pos": np.array([y_pos]),
-        "bb_width": np.array([bb_width]),
-        "bb_height": np.array([bb_height]),
-        "start_times": np.array([start_time]),
-        "end_times": np.array([end_time]),
-        "low_freqs": np.array([low_freq]),
-        "high_freqs": np.array([high_freq]),
-    }
-
-    params: types.FeatureExtractionParameters = {
-        "min_freq": min_freq,
-        "max_freq": max_freq,
-    }
-
-    features = feats.get_feats(spectrogram, pred_nms, params)
-    assert low_freq < high_freq
-    assert isinstance(features, np.ndarray)
-    assert features.shape == (len(pred_nms["det_probs"]), 9)
-    assert np.isclose(
-        features[0],
-        np.array(
-            [
-                end_time - start_time,
-                low_freq,
-                high_freq,
-                high_freq - low_freq,
-                high_freq,
-                max_freq,
-                max_freq,
-                max_freq,
-                np.nan,
-            ]
-        ),
-        equal_nan=True,
-    ).all()
-
-
-@pytest.mark.parametrize(
-    "max_power",
-    [
-        30_000,
-        31_000,
-        32_000,
-        33_000,
-        34_000,
-        35_000,
-        36_000,
-        37_000,
-        38_000,
-        39_000,
-        40_000,
-    ],
-)
-def test_compute_max_power_bb(max_power: int):
-    """Test compute_max_power_bb function."""
-    duration = 1
-    samplerate = 256_000
-    min_freq = 0
-    max_freq = 128_000
-
-    start_time = 0.3
-    end_time = 0.6
-    low_freq = 30_000
-    high_freq = 40_000
-
-    audio = np.zeros((int(duration * samplerate),))
-
-    # Add a signal during the time and frequency range of interest
-    audio[
-        int(start_time * samplerate) : int(end_time * samplerate)
-    ] = 0.5 * librosa.tone(
-        max_power, sr=samplerate, duration=end_time - start_time
-    )
-
-    # Add a more powerful signal outside frequency range of interest
-    audio[
-        int(start_time * samplerate) : int(end_time * samplerate)
-    ] += 2 * librosa.tone(
-        80_000, sr=samplerate, duration=end_time - start_time
-    )
-
-    params = api.get_config(
-        min_freq=min_freq,
-        max_freq=max_freq,
-        target_samp_rate=samplerate,
-    )
-
-    spec, _ = au.generate_spectrogram(
-        audio,
-        samplerate,
-        params,
-    )
-
-    x_start = int(
-        au.time_to_x_coords(
-            start_time,
-            samplerate,
-            params["fft_win_length"],
-            params["fft_overlap"],
-        )
-    )
-
-    x_end = int(
-        au.time_to_x_coords(
-            end_time,
-            samplerate,
-            params["fft_win_length"],
-            params["fft_overlap"],
-        )
-    )
-
-    num_freq_bins = spec.shape[0]
-    y_low = num_freq_bins - int(num_freq_bins * low_freq / max_freq)
-    y_high = num_freq_bins - int(num_freq_bins * high_freq / max_freq)
-
-    prediction: types.Prediction = {
-        "det_prob": 1,
-        "class_prob": np.ones((1,)),
-        "x_pos": x_start,
-        "y_pos": int(y_low),
-        "bb_width": int(x_end - x_start),
-        "bb_height": int(y_low - y_high),
-        "start_time": start_time,
-        "end_time": end_time,
-        "low_freq": low_freq,
-        "high_freq": high_freq,
-    }
-
-    print(prediction)
-
-    max_power_bb = feats.compute_max_power_bb(
-        prediction,
-        spec,
-        min_freq=min_freq,
-        max_freq=max_freq,
-    )
-
-    assert abs(max_power_bb - max_power) <= 500
-
-
-def test_compute_max_power():
-    """Test compute_max_power_bb function."""
-    duration = 3
-    samplerate = 16_000
-    min_freq = 0
-    max_freq = 8_000
-
-    start_time = 1
-    end_time = 2
-    low_freq = 3_000
-    high_freq = 4_000
-    max_power = 5_000
-
-    audio = np.zeros((int(duration * samplerate),))
-
-    # Add a signal during the time and frequency range of interest
-    audio[
-        int(start_time * samplerate) : int(end_time * samplerate)
-    ] = 0.5 * librosa.tone(
-        3_500, sr=samplerate, duration=end_time - start_time
-    )
-
-    # Add a more powerful signal outside frequency range of interest
-    audio[
-        int(start_time * samplerate) : int(end_time * samplerate)
-    ] += 2 * librosa.tone(
-        max_power, sr=samplerate, duration=end_time - start_time
-    )
-
-    params = api.get_config(
-        min_freq=min_freq,
-        max_freq=max_freq,
-        target_samp_rate=samplerate,
-    )
-
-    spec, _ = au.generate_spectrogram(
-        audio,
-        samplerate,
-        params,
-    )
-
-    x_start = int(
-        au.time_to_x_coords(
-            start_time,
-            samplerate,
-            params["fft_win_length"],
-            params["fft_overlap"],
-        )
-    )
-
-    x_end = int(
-        au.time_to_x_coords(
-            end_time,
-            samplerate,
-            params["fft_win_length"],
-            params["fft_overlap"],
-        )
-    )
-
-    num_freq_bins = spec.shape[0]
-    y_low = int(num_freq_bins * low_freq / max_freq)
-    y_high = int(num_freq_bins * high_freq / max_freq)
-
-    prediction: types.Prediction = {
-        "det_prob": 1,
-        "class_prob": np.ones((1,)),
-        "x_pos": x_start,
-        "y_pos": int(y_high),
-        "bb_width": int(x_end - x_start),
-        "bb_height": int(y_high - y_low),
-        "start_time": start_time,
-        "end_time": end_time,
-        "low_freq": low_freq,
-        "high_freq": high_freq,
-    }
-
-    computed_max_power = feats.compute_max_power(
-        prediction,
-        spec,
-        min_freq=min_freq,
-        max_freq=max_freq,
-    )
-
-    assert abs(computed_max_power - max_power) < 100
--- a/tests/test_model.py
+++ b/tests/test_model.py
@ -1,78 +0,0 @@
-"""Test suite for model functions."""
-
-import warnings
-from pathlib import Path
-from typing import List
-
-import numpy as np
-from hypothesis import given, settings
-from hypothesis import strategies as st
-
-from batdetect2 import api
-from batdetect2.detector import parameters
-
-
-def test_can_import_model_without_warnings():
-    with warnings.catch_warnings():
-        warnings.simplefilter("error")
-        api.load_model()
-
-
-@settings(deadline=None, max_examples=5)
-@given(duration=st.floats(min_value=0.1, max_value=2))
-def test_can_import_model_without_pickle(duration: float):
-    # NOTE: remove this test once no other issues are found This is a temporary
-    # test to check that change in model loading did not impact model behaviour
-    # in any way.
-
-    samplerate = parameters.TARGET_SAMPLERATE_HZ
-    audio = np.random.rand(int(duration * samplerate))
-
-    model_without_pickle, model_params_without_pickle = api.load_model(
-        weights_only=True
-    )
-    model_with_pickle, model_params_with_pickle = api.load_model(
-        weights_only=False
-    )
-
-    assert model_params_without_pickle == model_params_with_pickle
-
-    predictions_without_pickle, _, _ = api.process_audio(
-        audio,
-        model=model_without_pickle,
-    )
-    predictions_with_pickle, _, _ = api.process_audio(
-        audio,
-        model=model_with_pickle,
-    )
-
-    assert predictions_without_pickle == predictions_with_pickle
-
-
-def test_can_import_model_without_pickle_on_test_data(
-    example_audio_files: List[Path],
-):
-    # NOTE: remove this test once no other issues are found This is a temporary
-    # test to check that change in model loading did not impact model behaviour
-    # in any way.
-
-    model_without_pickle, model_params_without_pickle = api.load_model(
-        weights_only=True
-    )
-    model_with_pickle, model_params_with_pickle = api.load_model(
-        weights_only=False
-    )
-
-    assert model_params_without_pickle == model_params_with_pickle
-
-    for audio_file in example_audio_files:
-        audio = api.load_audio(str(audio_file))
-        predictions_without_pickle, _, _ = api.process_audio(
-            audio,
-            model=model_without_pickle,
-        )
-        predictions_with_pickle, _, _ = api.process_audio(
-            audio,
-            model=model_with_pickle,
-        )
-        assert predictions_without_pickle == predictions_with_pickle
--- a/uv.lock
+++ b/uv.lock