From b4efcfcf0f9c75aeadbab9908896df2ccc278350 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Wed, 6 May 2026 14:06:04 +0100 Subject: [PATCH] docs: refresh api reference guidance --- README.md | 165 +++++++++++++++++++------------ src/batdetect2/api_v2.py | 124 ++++++++++++++++++----- src/batdetect2/cli/finetune.py | 17 ++-- tests/test_api_v2/test_api_v2.py | 9 +- 4 files changed, 216 insertions(+), 99 deletions(-) diff --git a/README.md b/README.md index a3f05ad..24ac15f 100644 --- a/README.md +++ b/README.md @@ -1,49 +1,67 @@ # BatDetect2 - Code for detecting and classifying bat echolocation calls in high frequency audio recordings. +Code for + detecting and + classifying bat + echolocation + calls in high + frequency audio + recordings. + +> [!WARNING] +> BatDetect2 2.0.1 is out. +> There are many changes and new recommended workflows. +> We have left the previous `batdetect2.api` module intact, but if you run +> into issues or want to upgrade, see the migration guide in the docs site. +> +> This update also ships with a refreshed default model. +> It was trained in the same way and on the same data as before, but you +> should still expect small output differences in some cases. ## What BatDetect2 is useful for -BatDetect2 can help you screen recordings for bat calls, -find recordings that need expert review, -and compare model outputs across sites or projects with appropriate caution. +BatDetect2 can help you screen recordings for bat calls, find recordings that +need expert review, and compare model outputs across sites or projects with +appropriate caution. -It is best used as a tool to support ecological work, -not as a replacement for validation or expert interpretation. +It is best used as a tool to support ecological work, not as a replacement for +validation or expert interpretation. ## Start here -If you want the simplest current workflow, -use the documentation site and start with: +If you want the simplest current workflow, use the documentation site and start +with: -- getting started: `docs/source/getting_started.md` -- first tutorial: `docs/source/tutorials/run-inference-on-folder.md` +- getting started: + `docs/source/getting_started.md` +- first tutorial: + `docs/source/tutorials/run-inference-on-folder.md` -The current docs default to: - -- the current command-line workflow: `batdetect2 predict` -- the current Python workflow: `batdetect2.api_v2.BatDetect2API` - -If you need the previous workflow based on `batdetect2 detect` or `batdetect2.api`, -use the legacy docs section and migration guide in the docs site. +If you need the previous workflow based on `batdetect2 detect` or +`batdetect2.api`, use the legacy docs section and migration guide in the docs +site. ## Install BatDetect2 -If you already use Python, -activate the environment where you want BatDetect2 to live. +If you already use Python, activate the environment where you want BatDetect2 to +live. -If not, -create a fresh one first so BatDetect2 stays separate from other software on your machine. +If not, create a fresh one first so BatDetect2 stays separate from other +software on your machine. Two common options are: -* Install the Anaconda Python 3.10 distribution for your operating system from [here](https://www.continuum.io/downloads). Create a new environment and activate it: +* Install the Anaconda Python 3.10 distribution for your operating system from + [here](https://www.continuum.io/downloads). + Create a new environment and activate it: ```bash conda create -y --name batdetect2 python==3.10 conda activate batdetect2 ``` -* If you already have Python installed (version >= 3.10,< 3.14), you can create a fresh environment with: +* If you already have Python installed (version >= 3.10,< 3.14), you can create + a fresh environment with: ```bash python -m venv .venv @@ -57,7 +75,8 @@ You can use pip to install `batdetect2`: pip install batdetect2 ``` -Alternatively, download this code from the repository (by clicking on the green button on top right) and unzip it. +Alternatively, download this code from the repository (by clicking on the green +button on top right) and unzip it. Once unzipped, run this from extracted folder. ```bash @@ -68,11 +87,11 @@ Make sure you have the environment activated before installing `batdetect2`. ## Run BatDetect2 on a folder of recordings -Once installed, -the simplest current workflow is to run BatDetect2 on a folder of `.wav` files. +Once installed, the simplest current workflow is to run BatDetect2 on a folder +of `.wav` files. -If you are working from this repository checkout, -you can use this example checkpoint path: +If you are working from this repository checkout, you can use this example +checkpoint path: ```text src/batdetect2/models/checkpoints/Net2DFast_UK_same.pth.tar @@ -87,32 +106,37 @@ batdetect2 predict directory \ outputs ``` -This will scan the audio files in `example_data/audio` -and save model outputs to `outputs`. +This will scan the audio files in `example_data/audio` and save model outputs to +`outputs`. -For the full beginner walkthrough, -use `docs/source/tutorials/run-inference-on-folder.md`. +For the full beginner walkthrough, use +`docs/source/tutorials/run-inference-on-folder.md`. ## Legacy workflow -The sections below are kept only for people maintaining older BatDetect2 scripts and analysis pipelines. +The sections below are kept only for people maintaining older BatDetect2 scripts +and analysis pipelines. -If you are new to BatDetect2, -stop here and use the current docs and command above. +If you are new to BatDetect2, stop here and use the current docs and command +above. -If you really do need the older workflow, -the reference material is below. +If you really do need the older workflow, the reference material is below. ## Try the model -1) You can try a demo of the model (for UK species) on [huggingface](https://huggingface.co/spaces/macaodha/batdetect2). +1) You can try a demo of the model (for UK species) on + [huggingface](https://huggingface.co/spaces/macaodha/batdetect2). -2) Alternatively, click [here](https://colab.research.google.com/github/macaodha/batdetect2/blob/master/batdetect2_notebook.ipynb) to run the model using Google Colab. You can also run this notebook locally. +2) Alternatively, click + [here](https://colab.research.google.com/github/macaodha/batdetect2/blob/master/batdetect2_notebook.ipynb) + to run the model using Google Colab. + You can also run this notebook locally. ## Running the model on your own data -After following the above steps to install the code you can run the model on your own data. +After following the above steps to install the code you can run the model on +your own data. The remainder of this section is legacy reference material. @@ -133,23 +157,35 @@ batdetect2 detect example_data/audio/ example_data/anns/ 0.3 ``` `AUDIO_DIR` is the path on your computer to the audio wav files of interest. -`ANN_DIR` is the path on your computer where the model predictions will be saved. The model will output both `.csv` and `.json` results for each audio file. -`DETECTION_THRESHOLD` is a number between 0 and 1 specifying the cut-off threshold applied to the calls. A smaller number will result in more calls detected, but with the chance of introducing more mistakes. +`ANN_DIR` is the path on your computer where the model predictions will be +saved. +The model will output both `.csv` and `.json` results for each audio file. +`DETECTION_THRESHOLD` is a number between 0 and 1 specifying the cut-off +threshold applied to the calls. +A smaller number will result in more calls detected, but with the chance of +introducing more mistakes. -There are also optional arguments, e.g. you can request that the model outputs features (i.e. estimated call parameters) such as duration, max_frequency, etc. by setting the flag `--spec_features`. These will be saved as `*_spec_features.csv` files: +There are also optional arguments, e.g. you can request that the model outputs +features (i.e. estimated call parameters) such as duration, max_frequency, etc. +by setting the flag `--spec_features`. +These will be saved as `*_spec_features.csv` files: `batdetect2 detect example_data/audio/ example_data/anns/ 0.3 --spec_features` -You can also specify which model to use by setting the `--model_path` argument. If not specified, it will default to using a model trained on UK data e.g. -`batdetect2 detect example_data/audio/ example_data/anns/ 0.3 --model_path models/Net2DFast_UK_same.pth.tar` +You can also specify which model to use by setting the `--model_path` argument. +If not specified, it will default to using a model trained on UK data e.g. +`batdetect2 detect example_data/audio/ example_data/anns/ 0.3 --model_path +models/Net2DFast_UK_same.pth.tar` ### Using the Python API The examples below describe the legacy Python API. -For new work, prefer `batdetect2.api_v2.BatDetect2API` and the current docs site. +For new work, prefer `batdetect2.api_v2.BatDetect2API` and the current docs +site. -If you prefer to process your data within a Python script then you can use the `batdetect2` Python API. +If you prefer to process your data within a Python script then you can use the +`batdetect2` Python API. ```python from batdetect2 import api @@ -170,25 +206,32 @@ detections, features = api.process_spectrogram(spec) # Do something else ... ``` -You can integrate the detections or the extracted features to your custom analysis pipeline. +You can integrate the detections or the extracted features to your custom +analysis pipeline. ## Training the model on your own data Take a look at the training tutorial in the docs site first. -If you are working from this repository checkout, -start with `docs/source/tutorials/train-a-custom-model.md`. +If you are working from this repository checkout, start with +`docs/source/tutorials/train-a-custom-model.md`. ## Data and annotations -The raw audio data and annotations used to train the models in the paper will be added soon. -The audio interface used to annotate audio data for training and evaluation is available [here](https://github.com/macaodha/batdetect2_GUI). +The raw audio data and annotations used to train the models in the paper will be +added soon. +The audio interface used to annotate audio data for training and evaluation is +available [here](https://github.com/macaodha/batdetect2_GUI). ## Warning -The models developed and shared as part of this repository should be used with caution. -While they have been evaluated on held out audio data, great care should be taken when using the model outputs for any form of biodiversity assessment. -Your data may differ, and as a result it is very strongly recommended that you validate the model first using data with known species to ensure that the outputs can be trusted. +The models developed and shared as part of this repository should be used with +caution. +While they have been evaluated on held out audio data, great care should be +taken when using the model outputs for any form of biodiversity assessment. +Your data may differ, and as a result it is very strongly recommended that you +validate the model first using data with known species to ensure that the +outputs can be trusted. ## FAQ @@ -196,7 +239,9 @@ For more information please consult our [FAQ](docs/source/faq.md). ## Reference -If you find our work useful in your research please consider citing our paper which you can find [here](https://www.biorxiv.org/content/10.1101/2022.12.14.520490v1): +If you find our work useful in your research please consider citing our paper +which you can find +[here](https://www.biorxiv.org/content/10.1101/2022.12.14.520490v1): ``` @article{batdetect2_2022, title = {Towards a General Approach for Bat Echolocation Detection and Classification}, @@ -207,10 +252,6 @@ If you find our work useful in your research please consider citing our paper wh ``` ## Acknowledgements -Thanks to all the contributors who spent time collecting and annotating audio data. - -### TODOs -- [x] Release the code and pretrained model -- [ ] Release the datasets and annotations used the experiments in the paper -- [ ] Add the scripts used to generate the tables and figures from the paper +Thanks to all the contributors who spent time collecting and annotating audio +data. diff --git a/src/batdetect2/api_v2.py b/src/batdetect2/api_v2.py index ec068b1..d2c0a8f 100644 --- a/src/batdetect2/api_v2.py +++ b/src/batdetect2/api_v2.py @@ -54,7 +54,8 @@ class BatDetect2API: evaluate predictions, and train or fine-tune models. In most cases, start with :meth:`from_checkpoint` to load a trained model. - Use :meth:`from_config` when you want to build a new model with custom configs. + Use :meth:`from_config` when you want to build a new model with custom + configs. Examples -------- @@ -93,7 +94,7 @@ class BatDetect2API: ): """Create a fully configured API instance. - This initializer is mainly for internal wiring. + This initializer is mainly for internal use. In most cases, users should create the API with :meth:`from_checkpoint` or :meth:`from_config`. @@ -264,7 +265,7 @@ class BatDetect2API: targets_config: TargetConfig, val_annotations: Sequence[data.ClipAnnotation] | None = None, trainable: Literal[ - "all", "heads", "classifier_head", "bbox_head" + "all", "heads", "classifier_head", "size_head" ] = "heads", train_workers: int = 0, val_workers: int = 0, @@ -279,11 +280,11 @@ class BatDetect2API: logger_config: LoggerConfig | None = None, logging_callbacks: Sequence[LoggingCallback[TrainLoggingContext]] = (), ) -> "BatDetect2API": - """Fine-tune the current model with a new target definition. + """Fine-tune the current model for new target sounds. Use this when you want to keep the existing model weights but change the target sounds. You can fine-tune the whole model or just the - classifier heads. + heads. Parameters ---------- @@ -293,7 +294,7 @@ class BatDetect2API: Target definition to train against. val_annotations : Sequence[data.ClipAnnotation] | None, optional Validation annotations. - trainable : {"all", "heads", "classifier_head", "bbox_head"}, optional + trainable : {"all", "heads", "classifier_head", "size_head"}, optional Which model parameters remain trainable. train_workers : int, optional Number of worker processes for training data loading. @@ -509,19 +510,63 @@ class BatDetect2API: return metrics def load_audio(self, path: data.PathLike) -> np.ndarray: - """Load one audio file into a waveform array.""" + """Load one audio file into a waveform array. + + Parameters + ---------- + path : data.PathLike + Path to the audio file. + + Returns + ------- + np.ndarray + Audio waveform loaded from disk. + """ return self.audio_loader.load_file(path) def load_recording(self, recording: data.Recording) -> np.ndarray: - """Load one recording object into a waveform array.""" + """Load one recording object into a waveform array. + + Parameters + ---------- + recording : data.Recording + Recording object describing the audio to load. + + Returns + ------- + np.ndarray + Audio waveform for the requested recording. + """ return self.audio_loader.load_recording(recording) def load_clip(self, clip: data.Clip) -> np.ndarray: - """Load one clip object into a waveform array.""" + """Load one clip object into a waveform array. + + Parameters + ---------- + clip : data.Clip + Clip object describing the section of audio to load. + + Returns + ------- + np.ndarray + Audio waveform for the requested clip. + """ return self.audio_loader.load_clip(clip) def get_top_class_name(self, detection: Detection) -> str: - """Get highest-confidence class name for one detection.""" + """Get the name of the highest-confidence class for one detection. + + Parameters + ---------- + detection : Detection + Detection whose class scores will be inspected. + + Returns + ------- + str + Class name with the highest score. + """ import numpy as np @@ -535,7 +580,22 @@ class BatDetect2API: include_top_class: bool = True, sort_descending: bool = True, ) -> list[tuple[str, float]]: - """Get class score list as ``(class_name, score)`` pairs.""" + """Get class scores as ``(class_name, score)`` pairs. + + Parameters + ---------- + detection : Detection + Detection whose class scores will be returned. + include_top_class : bool, optional + If ``False``, omit the highest-scoring class from the result. + sort_descending : bool, optional + If ``True``, sort scores from highest to lowest. + + Returns + ------- + list[tuple[str, float]] + Class-score pairs for the detection. + """ scores = [ (class_name, float(score)) @@ -559,17 +619,22 @@ class BatDetect2API: if class_name != top_class_name ] - @staticmethod - def get_detection_features(detection: Detection) -> np.ndarray: - """Get extracted feature vector for one detection.""" - - return detection.features - def generate_spectrogram( self, audio: np.ndarray, ) -> torch.Tensor: - """Convert a waveform array into a model spectrogram.""" + """Convert a waveform array into a spectrogram tensor. + + Parameters + ---------- + audio : np.ndarray + Audio waveform. + + Returns + ------- + torch.Tensor + Spectrogram tensor ready for model inference. + """ import torch tensor = torch.tensor(audio).unsqueeze(0) @@ -703,7 +768,20 @@ class BatDetect2API: audio_dir: data.PathLike, detection_threshold: float | None = None, ) -> list[ClipDetections]: - """Run inference on all supported audio files in a directory.""" + """Run inference on all supported audio files in a directory. + + Parameters + ---------- + audio_dir : data.PathLike + Directory containing audio files. + detection_threshold : float | None, optional + Detection score threshold override. + + Returns + ------- + list[ClipDetections] + Predictions for all supported audio files found in the directory. + """ from soundevent.audio.files import get_audio_files files = list(get_audio_files(audio_dir)) @@ -904,8 +982,8 @@ class BatDetect2API: ) -> "BatDetect2API": """Build an API instance from config objects. - Use this when you want to create a new model stack without loading a - saved checkpoint. + Use this when you want to create a new model without loading a saved + checkpoint. Parameters ---------- @@ -1161,7 +1239,7 @@ class BatDetect2API: def _set_trainable_parameters( self, - trainable: Literal["all", "heads", "classifier_head", "bbox_head"], + trainable: Literal["all", "heads", "classifier_head", "size_head"], ) -> None: detector = self.model.detector @@ -1177,6 +1255,6 @@ class BatDetect2API: for parameter in detector.classifier_head.parameters(): parameter.requires_grad = True - if trainable in {"heads", "bbox_head"}: + if trainable in {"heads", "size_head"}: for parameter in detector.size_head.parameters(): parameter.requires_grad = True diff --git a/src/batdetect2/cli/finetune.py b/src/batdetect2/cli/finetune.py index f0f4791..53fd1ba 100644 --- a/src/batdetect2/cli/finetune.py +++ b/src/batdetect2/cli/finetune.py @@ -1,11 +1,10 @@ from pathlib import Path -from typing import Literal, cast +from typing import Literal import click from loguru import logger from batdetect2.cli.base import cli -from batdetect2.train.checkpoints import DEFAULT_CHECKPOINT __all__ = ["finetune_command"] @@ -20,8 +19,7 @@ __all__ = ["finetune_command"] type=str, help=( "Path to a checkpoint, bundled checkpoint alias, or a Hugging Face " - "URI to fine-tune from. Defaults to " - f"'{DEFAULT_CHECKPOINT}'." + "URI to fine-tune from. Defaults to uk_same" ), ) @click.option( @@ -61,7 +59,7 @@ __all__ = ["finetune_command"] ) @click.option( "--trainable", - type=click.Choice(["all", "heads", "classifier_head", "bbox_head"]), + type=click.Choice(["all", "heads", "classifier_head", "size_head"]), default="heads", show_default=True, help="Which model parameters remain trainable during fine-tuning.", @@ -119,7 +117,9 @@ def finetune_command( training_config: Path | None = None, audio_config: Path | None = None, logging_config: Path | None = None, - trainable: str = "heads", + trainable: Literal[ + "all", "heads", "classifier_head", "size_head" + ] = "heads", seed: int | None = None, num_epochs: int | None = None, train_workers: int = 0, @@ -196,10 +196,7 @@ def finetune_command( train_annotations=train_annotations, val_annotations=val_annotations, targets_config=target_conf, - trainable=cast( - Literal["all", "heads", "classifier_head", "bbox_head"], - trainable, - ), + trainable=trainable, train_workers=train_workers, val_workers=val_workers, checkpoint_dir=ckpt_dir, diff --git a/tests/test_api_v2/test_api_v2.py b/tests/test_api_v2/test_api_v2.py index 9f7a109..dc1794d 100644 --- a/tests/test_api_v2/test_api_v2.py +++ b/tests/test_api_v2/test_api_v2.py @@ -200,13 +200,14 @@ def test_user_can_read_extracted_features_per_detection( ) -> None: """User story: inspect extracted feature vectors per detection.""" + # Given prediction = api_v2.process_file(example_audio_files[0]) - assert len(prediction.detections) > 0 + # When + feature_vectors = [det.features for det in prediction.detections] - feature_vectors = [ - api_v2.get_detection_features(det) for det in prediction.detections - ] + # Then + assert len(prediction.detections) > 0 assert len(feature_vectors) == len(prediction.detections) assert all(vec.ndim == 1 for vec in feature_vectors) assert all(vec.size > 0 for vec in feature_vectors)