Merge pull request #65 from macaodha/train

BatDetect2 Version 2.0 Beta
2026-07-07 21:00:10 +02:00 · 2026-05-07 09:19:36 +01:00 · 2026-05-07 09:19:36 +01:00 · 6784815cd9
commit 6784815cd9
parent 4ae567bc1d 7cdb6221dc
363 changed files with 43024 additions and 6981 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@ -1,8 +1,10 @@
 [bumpversion]
-current_version = 1.3.1
+current_version = 1.1.1
 commit = True
 tag = True

-[bumpversion:file:batdetect2/__init__.py]
+[bumpversion:file:src/batdetect2/__init__.py]

 [bumpversion:file:pyproject.toml]
+
+[bumpversion:file:docs/source/conf.py]
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -0,0 +1,79 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  checks:
+    name: Checks
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install just
+        uses: taiki-e/install-action@just
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install dependencies
+        run: uv sync --all-extras --all-groups
+
+      - name: Run formatting, lint, and type checks
+        run: just check
+
+  tests:
+    name: Tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version:
+          - "3.10"
+          - "3.11"
+          - "3.12"
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install just
+        uses: taiki-e/install-action@just
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install dependencies
+        run: uv sync --all-extras --all-groups
+
+      - name: Run test suite
+        run: just test
--- a/.github/workflows/docs-pages.yml
+++ b/.github/workflows/docs-pages.yml
@ -0,0 +1,69 @@
+name: Docs Pages
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: docs-pages
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Build Docs
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install just
+        uses: taiki-e/install-action@just
+
+      - name: Configure GitHub Pages
+        uses: actions/configure-pages@v5
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install dependencies
+        run: uv sync --all-extras --all-groups
+
+      - name: Build docs
+        run: just check-docs
+
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v4
+        with:
+          path: docs/build
+
+  deploy:
+    name: Deploy Docs
+    needs: build
+    runs-on: ubuntu-latest
+    permissions:
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@ -0,0 +1,70 @@
+name: Publish PyPI
+
+on:
+  release:
+    types:
+      - published
+
+permissions:
+  contents: read
+
+concurrency:
+  group: publish-pypi
+  cancel-in-progress: false
+
+jobs:
+  build:
+    name: Build Distributions
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install just
+        uses: taiki-e/install-action@just
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install dependencies
+        run: just install-dev
+
+      - name: Build distributions
+        run: just build-dist
+
+      - name: Upload distributions
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+
+  publish:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+    environment:
+      name: pypi
+      url: https://pypi.org/p/batdetect2
+
+    steps:
+      - name: Download distributions
+        uses: actions/download-artifact@v5
+        with:
+          name: release-dists
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -1,29 +0,0 @@
-name: Python package
-
-on:
-  push:
-    branches: ["main"]
-  pull_request:
-    branches: ["main"]
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
-
-    steps:
-      - uses: actions/checkout@v4
-      - name: Install uv
-        uses: astral-sh/setup-uv@v3
-        with:
-          enable-cache: true
-          cache-dependency-glob: "uv.lock"
-      - name: Set up Python ${{ matrix.python-version }}
-        run: uv python install ${{ matrix.python-version }}
-      - name: Install the project
-        run: uv sync --all-extras --dev
-      - name: Test with pytest
-        run: uv run pytest
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@ -1,30 +0,0 @@
-name: Upload Python Package
-
-on:
-  release:
-    types: [published]
-
-permissions:
-  contents: read
-
-jobs:
-  deploy:
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v3
-        with:
-          python-version: "3.x"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install build
-      - name: Build package
-        run: python -m build
-      - name: Publish package
-        uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
-        with:
-          user: __token__
-          password: ${{ secrets.PYPI_API_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -50,6 +50,7 @@ cover/

 # Sphinx documentation
 docs/_build/
+docs/build/

 # PyBuilder
 .pybuilder/
@ -95,8 +96,15 @@ dmypy.json
 *.json
 plots/*

+!example_data/anns/*.json
+
 # Model experiments
 experiments/*
+DvcLiveLogger/checkpoints
+logs/
+mlruns/
+/outputs/
+notebooks/lightning_logs

 # Jupiter notebooks
 .virtual_documents
@ -105,8 +113,24 @@ experiments/*

 # DO Include
 !batdetect2_notebook.ipynb
-!batdetect2/models/*.pth.tar
+!src/batdetect2/models/checkpoints/*.pth.tar
 !tests/data/*.wav
+!notebooks/*.ipynb
 !tests/data/**/*.wav
-notebooks/lightning_logs
+.aider*
+
+# Intermediate artifacts
 example_data/preprocessed
+
+# Dev notebooks
+notebooks/tmp
+/tmp
+/.agents/skills
+/notebooks
+/AGENTS.md
+/scripts
+/todo.md
+
+# Assets
+!assets/*
+/models
--- a/.pylintrc
+++ b/.pylintrc
@ -1,5 +0,0 @@
-[TYPECHECK]
-
-# List of members which are set dynamically and missed by Pylint inference
-# system, and so shouldn't trigger E1101 when accessed.
-generated-members=torch.*
--- a/README.md
+++ b/README.md
@ -1,161 +1,156 @@
 # BatDetect2
-<img style="display: block-inline;" width="64" height="64" src="ims/bat_icon.png"> Code for detecting and classifying bat echolocation calls in high frequency audio recordings.

-> [!NOTE]
-> We’re actively working to make it easier to train and fine-tune BatDetect2 models using custom data. A major update is coming soon to the main branch—stay tuned! In the meantime, you can follow our progress in the train branch.
+<img style="display:block-inline;" width="64" height="64" src="assets/bat_icon.png">

-## Getting started
-### Python Environment
+Code for detecting and classifying bat echolocation calls in high-frequency
+audio recordings.

-We recommend using an isolated Python environment to avoid dependency issues. Choose one
-of the following options:
+> [!WARNING]
+> `batdetect2` 2.0.0b1 is out.
+> This is a beta release and we are gathering user feedback.
+> If you run into issues or have feedback on the new workflows, please use the
+> GitHub issues page to let us know.
+>
+> There are many changes and new recommended workflows.
+> We have left the previous `batdetect2.api` module intact, but if you run
+> into issues or want to upgrade, see the
+> [migration guide](docs/source/legacy/migration-guide.md) in the docs site.
+>
+> This update also ships with a refreshed default model.
+> It was trained in the same way and on the same data as before, but you should
+> still expect small output differences in some cases.

-* Install the Anaconda Python 3.10 distribution for your operating system from [here](https://www.continuum.io/downloads). Create a new environment and activate it:
+## What is BatDetect2

-```bash
-conda create -y --name batdetect2 python==3.10
-conda activate batdetect2
-```
+BatDetect2 is a deep learning model for detecting and classifying bat
+echolocation calls.
+The model generates multiple predictions for each input recording by providing a
+bounding box and predicted class for each individual call within it.

-* If you already have Python installed (version >= 3.8,< 3.11) and prefer using virtual environments then:
+This repository also holds `batdetect2`, a Python-based tool to run, train,
+finetune and evaluate BatDetect2-type models, including the built-in model for
+detecting UK bat species.
+You can use the tool from the command line (terminal) or from Python as needed.

-```bash
-python -m venv .venv
-source .venv/bin/activate
-```
+## Getting Started
+
+We have [extensive documentation](docs/source/index.md) on how to use
+`batdetect2`.
+
+The docs site is still being built and will be live soon.
+If you want a quick peek for now, see the `docs/` folder in this repository.
+
+See our [getting started](docs/source/getting_started.md) guide and then jump
+into any of our tutorials:
+
+- Run the model on a folder of recordings:
+  `docs/source/tutorials/run-inference-on-folder.md`
+- Train your own model:
+  `docs/source/tutorials/train-a-custom-model.md`
+- Evaluate your model:
+  `docs/source/tutorials/evaluate-on-a-test-set.md`
+- Fine-tune a model:
+  `docs/source/tutorials/integrate-with-a-python-pipeline.md`
+
+### Try the model
+
+If you want to try the model for UK bat species without installing anything, you
+can try the following:
+
+1. Demo of the model (for UK species) on
+   [huggingface](https://huggingface.co/spaces/macaodha/batdetect2).
+
+2. Alternatively, click
+   [here](https://colab.research.google.com/github/macaodha/batdetect2/blob/master/batdetect2_notebook.ipynb)
+   to run the model using Google Colab.
+   You can also run this notebook locally.

 ### Installing BatDetect2
-You can use pip to install `batdetect2`:
+
+If you have `uv` installed (if not, we recommend it; follow the instructions
+[here](https://docs.astral.sh/uv/getting-started/installation/)), then you can
+run `batdetect2` one-off with

 ```bash
-pip install batdetect2
+uvx batdetect2
 ```

-Alternatively, download this code from the repository (by clicking on the green button on top right) and unzip it.
-Once unzipped, run this from extracted folder.
+or if you want to install it permanently:

 ```bash
-pip install .
+uv tool install batdetect2
 ```

-Make sure you have the environment activated before installing `batdetect2`.
+and test it with

-
-## Try the model
-1) You can try a demo of the model (for UK species) on [huggingface](https://huggingface.co/spaces/macaodha/batdetect2).
-
-2) Alternatively, click [here](https://colab.research.google.com/github/macaodha/batdetect2/blob/master/batdetect2_notebook.ipynb) to run the model using Google Colab. You can also run this notebook locally.
-
-
-## Running the model on your own data
-
-After following the above steps to install the code you can run the model on your own data.
-
-
-### Using the command line
-
-You can run the model by opening the command line and typing:
 ```bash
-batdetect2 detect AUDIO_DIR ANN_DIR DETECTION_THRESHOLD
+batdetect2
 ```
-e.g.
+
+### Run BatDetect2 on a folder of recordings
+
+Once installed, you can run BatDetect2 on a folder of `.wav` files.
+By default it will use the model trained on UK data.
+
+Example command:
+
 ```bash
-batdetect2 detect example_data/audio/ example_data/anns/ 0.3
+batdetect2 process directory example_data/audio outputs
 ```

-`AUDIO_DIR` is the path on your computer to the audio wav files of interest.
-`ANN_DIR` is the path on your computer where the model predictions will be saved. The model will output both `.csv` and `.json` results for each audio file.
-`DETECTION_THRESHOLD` is a number between 0 and 1 specifying the cut-off threshold applied to the calls. A smaller number will result in more calls detected, but with the chance of introducing more mistakes.
+This will scan the audio files in `example_data/audio` and save model outputs to
+`outputs`.
+If you have your own model checkpoint, you can use it:

-There are also optional arguments, e.g. you can request that the model outputs features (i.e. estimated call parameters) such as duration, max_frequency, etc. by setting the flag `--spec_features`. These will be saved as `*_spec_features.csv` files:
-`batdetect2 detect example_data/audio/ example_data/anns/ 0.3 --spec_features`
-
-You can also specify which model to use by setting the `--model_path` argument. If not specified, it will default to using a model trained on UK data e.g.
-`batdetect2 detect example_data/audio/ example_data/anns/ 0.3 --model_path models/Net2DFast_UK_same.pth.tar`
-
-
-### Using the Python API
-
-If you prefer to process your data within a Python script then you can use the `batdetect2` Python API.
-
-```python
-from batdetect2 import api
-
-AUDIO_FILE = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
-
-# Process a whole file
-results = api.process_file(AUDIO_FILE)
-
-# Or, load audio and compute spectrograms
-audio = api.load_audio(AUDIO_FILE)
-spec = api.generate_spectrogram(audio)
-
-# And process the audio or the spectrogram with the model
-detections, features, spec = api.process_audio(audio)
-detections, features = api.process_spectrogram(spec)
-
-# Do something else ...
+```bash
+batdetect2 process directory --model path/to/checkpoint.ckpt example_data/audio outputs
 ```

-You can integrate the detections or the extracted features to your custom analysis pipeline.
-
-#### Using the Python API with HTTP
-
-```python
-from batdetect2 import api
-import io
-import requests
-
-AUDIO_URL = "<insert your audio url here>"
-
-# Process a whole file from a url
-results = api.process_url(AUDIO_URL)
-
-# Or, load audio and compute spectrograms
-# 'requests.get(AUDIO_URL).content' fetches the raw bytes. You are free to use other sources to fetch the raw bytes
-audio = api.load_audio(io.BytesIO(requests.get(AUDIO_URL).content))
-spec = api.generate_spectrogram(audio)
-
-# And process the audio or the spectrogram with the model
-detections, features, spec = api.process_audio(audio)
-detections, features = api.process_spectrogram(spec)
-```
-
-## Training the model on your own data
-Take a look at the steps outlined in finetuning readme [here](batdetect2/finetune/readme.md) for a description of how to train your own model.
-
+For the full walkthrough, use
+`docs/source/tutorials/run-inference-on-folder.md`.

 ## Data and annotations
-The raw audio data and annotations used to train the models in the paper will be added soon.
-The audio interface used to annotate audio data for training and evaluation is available [here](https://github.com/macaodha/batdetect2_GUI).

+The raw audio data and annotations used to train the models in the paper will be
+added soon.
+`batdetect2` supports annotations in various formats and is compatible with the
+outputs of [`whombat`](https://github.com/mbsantiago/whombat/) and this
+[earlier version](https://github.com/macaodha/batdetect2_GUI).
+If you're interested in supporting another format, please reach out or submit a
+PR.

 ## Warning
-The models developed and shared as part of this repository should be used with caution.
-While they have been evaluated on held out audio data, great care should be taken when using the model outputs for any form of biodiversity assessment.
-Your data may differ, and as a result it is very strongly recommended that you validate the model first using data with known species to ensure that the outputs can be trusted.

+The models developed and shared as part of this repository should be used with
+caution.
+While they have been evaluated on held-out audio data, great care should be
+taken when using the model outputs for any form of biodiversity assessment.
+Your data may differ, and as a result it is very strongly recommended that you
+validate the model first using data with known species to ensure that the
+outputs can be trusted.
+If you train a model, make the best effort to be transparent about its training
+and evaluation data, and inform downstream users about its limitations.

 ## FAQ
-For more information please consult our [FAQ](faq.md).

+For more information please consult our [FAQ](docs/source/faq.md).

 ## Reference
-If you find our work useful in your research please consider citing our paper which you can find [here](https://www.biorxiv.org/content/10.1101/2022.12.14.520490v1):
+
+If you find our work useful in your research, please consider citing our paper,
+which you can find
+[here](https://www.biorxiv.org/content/10.1101/2022.12.14.520490v1):
+
 ```
@article{batdetect2_2022,
    title     = {Towards a General Approach for Bat Echolocation Detection and Classification},
-    author    = {Mac Aodha, Oisin and  Mart\'{i}nez Balvanera, Santiago and  Damstra, Elise and  Cooke, Martyn and  Eichinski, Philip and  Browning, Ella and  Barataudm, Michel and  Boughey, Katherine and  Coles, Roger and  Giacomini, Giada and MacSwiney G., M. Cristina and  K. Obrist, Martin and Parsons, Stuart and  Sattler, Thomas and  Jones, Kate E.},
+    author    = {Mac Aodha, Oisin and  Mart\'{i}nez Balvanera, Santiago and  Damstra, Elise and  Cooke, Martyn and  Eichinski, Philip and  Browning, Ella and  Barataud, Michel and  Boughey, Katherine and  Coles, Roger and  Giacomini, Giada and MacSwiney G., M. Cristina and  K. Obrist, Martin and Parsons, Stuart and  Sattler, Thomas and  Jones, Kate E.},
    journal   = {bioRxiv},
    year      = {2022}
 }
 ```

 ## Acknowledgements
-Thanks to all the contributors who spent time collecting and annotating audio data.

-
-### TODOs
- [x] Release the code and pretrained model  
- [ ] Release the datasets and annotations used the experiments in the paper 
- [ ] Add the scripts used to generate the tables and figures from the paper 
+Thanks to all the contributors who spent time collecting and annotating audio
+data.
--- a/assets/bat_icon.png
+++ b/assets/bat_icon.png
--- a/batdetect2/init.py
+++ b/batdetect2/init.py
@ -1,6 +0,0 @@
-import logging
-
-numba_logger = logging.getLogger("numba")
-numba_logger.setLevel(logging.WARNING)
-
-__version__ = "1.3.1"
--- a/batdetect2/detector/parameters.py
+++ b/batdetect2/detector/parameters.py
@ -1,232 +0,0 @@
-import datetime
-import os
-
-from batdetect2.types import ProcessingConfiguration, SpectrogramParameters
-
-TARGET_SAMPLERATE_HZ = 256000
-FFT_WIN_LENGTH_S = 512 / 256000.0
-FFT_OVERLAP = 0.75
-MAX_FREQ_HZ = 120000
-MIN_FREQ_HZ = 10000
-RESIZE_FACTOR = 0.5
-SPEC_DIVIDE_FACTOR = 32
-SPEC_HEIGHT = 256
-SCALE_RAW_AUDIO = False
-DETECTION_THRESHOLD = 0.01
-NMS_KERNEL_SIZE = 9
-NMS_TOP_K_PER_SEC = 200
-SPEC_SCALE = "pcen"
-DENOISE_SPEC_AVG = True
-MAX_SCALE_SPEC = False
-
-
-DEFAULT_MODEL_PATH = os.path.join(
-    os.path.dirname(os.path.dirname(__file__)),
-    "models",
-    "Net2DFast_UK_same.pth.tar",
-)
-
-
-DEFAULT_SPECTROGRAM_PARAMETERS: SpectrogramParameters = {
-    "fft_win_length": FFT_WIN_LENGTH_S,
-    "fft_overlap": FFT_OVERLAP,
-    "spec_height": SPEC_HEIGHT,
-    "resize_factor": RESIZE_FACTOR,
-    "spec_divide_factor": SPEC_DIVIDE_FACTOR,
-    "max_freq": MAX_FREQ_HZ,
-    "min_freq": MIN_FREQ_HZ,
-    "spec_scale": SPEC_SCALE,
-    "denoise_spec_avg": DENOISE_SPEC_AVG,
-    "max_scale_spec": MAX_SCALE_SPEC,
-}
-
-
-DEFAULT_PROCESSING_CONFIGURATIONS: ProcessingConfiguration = {
-    "detection_threshold": DETECTION_THRESHOLD,
-    "spec_slices": False,
-    "chunk_size": 3,
-    "spec_features": False,
-    "cnn_features": False,
-    "quiet": True,
-    "target_samp_rate": TARGET_SAMPLERATE_HZ,
-    "fft_win_length": FFT_WIN_LENGTH_S,
-    "fft_overlap": FFT_OVERLAP,
-    "resize_factor": RESIZE_FACTOR,
-    "spec_divide_factor": SPEC_DIVIDE_FACTOR,
-    "spec_height": SPEC_HEIGHT,
-    "scale_raw_audio": SCALE_RAW_AUDIO,
-    "class_names": [],
-    "time_expansion": 1,
-    "top_n": 3,
-    "return_raw_preds": False,
-    "max_duration": None,
-    "nms_kernel_size": NMS_KERNEL_SIZE,
-    "max_freq": MAX_FREQ_HZ,
-    "min_freq": MIN_FREQ_HZ,
-    "nms_top_k_per_sec": NMS_TOP_K_PER_SEC,
-    "spec_scale": SPEC_SCALE,
-    "denoise_spec_avg": DENOISE_SPEC_AVG,
-    "max_scale_spec": MAX_SCALE_SPEC,
-}
-
-
-def mk_dir(path):
-    if not os.path.isdir(path):
-        os.makedirs(path)
-
-
-def get_params(make_dirs=False, exps_dir="../../experiments/"):
-    params = {}
-
-    params[
-        "model_name"
-    ] = "Net2DFast"  # Net2DFast, Net2DSkip, Net2DSimple, Net2DSkipDS, Net2DRN
-    params["num_filters"] = 128
-
-    now_str = datetime.datetime.now().strftime("%Y_%m_%d__%H_%M_%S")
-    model_name = now_str + ".pth.tar"
-    params["experiment"] = os.path.join(exps_dir, now_str, "")
-    params["model_file_name"] = os.path.join(params["experiment"], model_name)
-    params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "")
-    params["op_im_dir_test"] = os.path.join(
-        params["experiment"], "op_ims_test", ""
-    )
-    # params['notes']           = ''  # can save notes about an experiment here
-
-    # spec parameters
-    params[
-        "target_samp_rate"
-    ] = TARGET_SAMPLERATE_HZ  # resamples all audio so that it is at this rate
-    params[
-        "fft_win_length"
-    ] = FFT_WIN_LENGTH_S  # in milliseconds, amount of time per stft time step
-    params["fft_overlap"] = FFT_OVERLAP  # stft window overlap
-
-    params[
-        "max_freq"
-    ] = MAX_FREQ_HZ  # in Hz, everything above this will be discarded
-    params[
-        "min_freq"
-    ] = MIN_FREQ_HZ  # in Hz, everything below this will be discarded
-
-    params[
-        "resize_factor"
-    ] = RESIZE_FACTOR  # resize so the spectrogram at the input of the network
-    params[
-        "spec_height"
-    ] = SPEC_HEIGHT  # units are number of frequency bins (before resizing is performed)
-    params[
-        "spec_train_width"
-    ] = 512  # units are number of time steps (before resizing is performed)
-    params[
-        "spec_divide_factor"
-    ] = SPEC_DIVIDE_FACTOR  # spectrogram should be divisible by this amount in width and height
-
-    # spec processing params
-    params[
-        "denoise_spec_avg"
-    ] = DENOISE_SPEC_AVG  # removes the mean for each frequency band
-    params[
-        "scale_raw_audio"
-    ] = SCALE_RAW_AUDIO  # scales the raw audio to [-1, 1]
-    params[
-        "max_scale_spec"
-    ] = MAX_SCALE_SPEC  # scales the spectrogram so that it is max 1
-    params["spec_scale"] = SPEC_SCALE  # 'log', 'pcen', 'none'
-
-    # detection params
-    params[
-        "detection_overlap"
-    ] = 0.01  # has to be within this number of ms to count as detection
-    params[
-        "ignore_start_end"
-    ] = 0.01  # if start of GT calls are within this time from the start/end of file ignore
-    params[
-        "detection_threshold"
-    ] = DETECTION_THRESHOLD  # the smaller this is the better the recall will be
-    params[
-        "nms_kernel_size"
-    ] = NMS_KERNEL_SIZE  # size of the kernel for non-max suppression
-    params[
-        "nms_top_k_per_sec"
-    ] = NMS_TOP_K_PER_SEC  # keep top K highest predictions per second of audio
-    params["target_sigma"] = 2.0
-
-    # augmentation params
-    params[
-        "aug_prob"
-    ] = 0.20  # augmentations will be performed with this probability
-    params["augment_at_train"] = True
-    params["augment_at_train_combine"] = True
-    params[
-        "echo_max_delay"
-    ] = 0.005  # simulate echo by adding copy of raw audio
-    params["stretch_squeeze_delta"] = 0.04  # stretch or squeeze spec
-    params[
-        "mask_max_time_perc"
-    ] = 0.05  # max mask size - here percentage, not ideal
-    params[
-        "mask_max_freq_perc"
-    ] = 0.10  # max mask size - here percentage, not ideal
-    params[
-        "spec_amp_scaling"
-    ] = 2.0  # multiply the "volume" by 0:X times current amount
-    params["aug_sampling_rates"] = [
-        220500,
-        256000,
-        300000,
-        312500,
-        384000,
-        441000,
-        500000,
-    ]
-
-    # loss params
-    params["train_loss"] = "focal"  # mse or focal
-    params["det_loss_weight"] = 1.0  # weight for the detection part of the loss
-    params["size_loss_weight"] = 0.1  # weight for the bbox size loss
-    params["class_loss_weight"] = 2.0  # weight for the classification loss
-    params["individual_loss_weight"] = 0.0  # not used
-    if params["individual_loss_weight"] == 0.0:
-        params[
-            "emb_dim"
-        ] = 0  # number of dimensions used for individual id embedding
-    else:
-        params["emb_dim"] = 3
-
-    # train params
-    params["lr"] = 0.001
-    params["batch_size"] = 8
-    params["num_workers"] = 4
-    params["num_epochs"] = 200
-    params["num_eval_epochs"] = 5  # run evaluation every X epochs
-    params["device"] = "cuda"
-    params["save_test_image_during_train"] = False
-    params["save_test_image_after_train"] = True
-
-    params["convert_to_genus"] = False
-    params["genus_mapping"] = []
-    params["class_names"] = []
-    params["classes_to_ignore"] = ["", " ", "Unknown", "Not Bat"]
-    params["generic_class"] = ["Bat"]
-    params["events_of_interest"] = [
-        "Echolocation"
-    ]  # will ignore all other types of events e.g. social calls
-
-    # the classes in this list are standardized during training so that the same low and high freq are used
-    params["standardize_classs_names"] = []
-
-    # create directories
-    if make_dirs:
-        print("Model name : " + params["model_name"])
-        print("Model file : " + params["model_file_name"])
-        print("Experiment : " + params["experiment"])
-
-        mk_dir(params["experiment"])
-        if params["save_test_image_during_train"]:
-            mk_dir(params["op_im_dir"])
-        if params["save_test_image_after_train"]:
-            mk_dir(params["op_im_dir_test"])
-        mk_dir(os.path.dirname(params["model_file_name"]))
-
-    return params
--- a/batdetect2/finetune/prep_data_finetune.py
+++ b/batdetect2/finetune/prep_data_finetune.py
@ -1,201 +0,0 @@
-import argparse
-import json
-import os
-
-import numpy as np
-
-import batdetect2.train.train_utils as tu
-
-
-def print_dataset_stats(data, split_name, classes_to_ignore):
-    print("\nSplit:", split_name)
-    print("Num files:", len(data))
-
-    class_cnts = {}
-    for dd in data:
-        for aa in dd["annotation"]:
-            if aa["class"] not in classes_to_ignore:
-                if aa["class"] in class_cnts:
-                    class_cnts[aa["class"]] += 1
-                else:
-                    class_cnts[aa["class"]] = 1
-
-    if len(class_cnts) == 0:
-        class_names = []
-    else:
-        class_names = np.sort([*class_cnts]).tolist()
-        print("Class count:")
-        str_len = np.max([len(cc) for cc in class_names]) + 5
-
-        for ii, cc in enumerate(class_names):
-            print(str(ii).ljust(5) + cc.ljust(str_len) + str(class_cnts[cc]))
-
-    return class_names
-
-
-def load_file_names(file_name):
-    if os.path.isfile(file_name):
-        with open(file_name) as da:
-            files = [line.rstrip() for line in da.readlines()]
-        for ff in files:
-            if ff.lower()[-3:] != "wav":
-                print("Error: Filenames need to end in .wav - ", ff)
-                assert False
-    else:
-        print("Error: Input file not found - ", file_name)
-        assert False
-
-    return files
-
-
-if __name__ == "__main__":
-    info_str = "\nBatDetect - Prepare Data for Finetuning\n"
-
-    print(info_str)
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "dataset_name", type=str, help="Name to call your dataset"
-    )
-    parser.add_argument("audio_dir", type=str, help="Input directory for audio")
-    parser.add_argument(
-        "ann_dir",
-        type=str,
-        help="Input directory for where the audio annotations are stored",
-    )
-    parser.add_argument(
-        "op_dir",
-        type=str,
-        help="Path where the train and test splits will be stored",
-    )
-    parser.add_argument(
-        "--percent_val",
-        type=float,
-        default=0.20,
-        help="Hold out this much data for validation. Should be number between 0 and 1",
-    )
-    parser.add_argument(
-        "--rand_seed",
-        type=int,
-        default=2001,
-        help="Random seed used for creating the validation split",
-    )
-    parser.add_argument(
-        "--train_file",
-        type=str,
-        default="",
-        help="Text file where each line is a wav file in train split",
-    )
-    parser.add_argument(
-        "--test_file",
-        type=str,
-        default="",
-        help="Text file where each line is a wav file in test split",
-    )
-    parser.add_argument(
-        "--input_class_names",
-        type=str,
-        default="",
-        help='Specify names of classes that you want to change. Separate with ";"',
-    )
-    parser.add_argument(
-        "--output_class_names",
-        type=str,
-        default="",
-        help='New class names to use instead. One to one mapping with "--input_class_names". \
-                         Separate with ";"',
-    )
-    args = vars(parser.parse_args())
-
-    np.random.seed(args["rand_seed"])
-
-    classes_to_ignore = ["", " ", "Unknown", "Not Bat"]
-    generic_class = ["Bat"]
-    events_of_interest = ["Echolocation"]
-
-    if args["input_class_names"] != "" and args["output_class_names"] != "":
-        # change the names of the classes
-        ip_names = args["input_class_names"].split(";")
-        op_names = args["output_class_names"].split(";")
-        name_dict = dict(zip(ip_names, op_names))
-    else:
-        name_dict = False
-
-    # load annotations
-    data_all, _, _ = tu.load_set_of_anns(
-        {"ann_path": args["ann_dir"], "wav_path": args["audio_dir"]},
-        classes_to_ignore,
-        events_of_interest,
-        False,
-        False,
-        list_of_anns=True,
-        filter_issues=True,
-        name_replace=name_dict,
-    )
-
-    print("Dataset name:         " + args["dataset_name"])
-    print("Audio directory:      " + args["audio_dir"])
-    print("Annotation directory: " + args["ann_dir"])
-    print("Ouput directory:      " + args["op_dir"])
-    print("Num annotated files:  " + str(len(data_all)))
-
-    if args["train_file"] != "" and args["test_file"] != "":
-        # user has specifed the train / test split
-        train_files = load_file_names(args["train_file"])
-        test_files = load_file_names(args["test_file"])
-        file_names_all = [dd["id"] for dd in data_all]
-        train_inds = [
-            file_names_all.index(ff)
-            for ff in train_files
-            if ff in file_names_all
-        ]
-        test_inds = [
-            file_names_all.index(ff)
-            for ff in test_files
-            if ff in file_names_all
-        ]
-
-    else:
-        # split the data into train and test at the file level
-        num_exs = len(data_all)
-        test_inds = np.random.choice(
-            np.arange(num_exs),
-            int(num_exs * args["percent_val"]),
-            replace=False,
-        )
-        test_inds = np.sort(test_inds)
-        train_inds = np.setdiff1d(np.arange(num_exs), test_inds)
-
-    data_train = [data_all[ii] for ii in train_inds]
-    data_test = [data_all[ii] for ii in test_inds]
-
-    if not os.path.isdir(args["op_dir"]):
-        os.makedirs(args["op_dir"])
-    op_name = os.path.join(args["op_dir"], args["dataset_name"])
-    op_name_train = op_name + "_TRAIN.json"
-    op_name_test = op_name + "_TEST.json"
-
-    class_un_train = print_dataset_stats(data_train, "Train", classes_to_ignore)
-    class_un_test = print_dataset_stats(data_test, "Test", classes_to_ignore)
-
-    if len(data_train) > 0 and len(data_test) > 0:
-        if class_un_train != class_un_test:
-            print(
-                '\nError: some classes are not in both the training and test sets.\
-                   \nTry a different random seed "--rand_seed".'
-            )
-            assert False
-
-    print("\n")
-    if len(data_train) == 0:
-        print("No train annotations to save")
-    else:
-        print("Saving: ", op_name_train)
-        with open(op_name_train, "w") as da:
-            json.dump(data_train, da, indent=2)
-
-    if len(data_test) == 0:
-        print("No test annotations to save")
-    else:
-        print("Saving: ", op_name_test)
-        with open(op_name_test, "w") as da:
-            json.dump(data_test, da, indent=2)
--- a/batdetect2/train/audio_dataloader.py
+++ b/batdetect2/train/audio_dataloader.py
@ -1,603 +0,0 @@
-import copy
-from typing import Tuple
-
-import librosa
-import numpy as np
-import torch
-import torch.nn.functional as F
-import torchaudio
-
-import batdetect2.utils.audio_utils as au
-from batdetect2.types import AnnotationGroup, HeatmapParameters
-
-
-def generate_gt_heatmaps(
-    spec_op_shape: Tuple[int, int],
-    sampling_rate: int,
-    ann: AnnotationGroup,
-    params: HeatmapParameters,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, AnnotationGroup]:
-    """Generate ground truth heatmaps from annotations.
-
-    Parameters
-    ----------
-    spec_op_shape : Tuple[int, int]
-        Shape of the input spectrogram.
-    sampling_rate : int
-        Sampling rate of the input audio in Hz.
-    ann : AnnotationGroup
-        Dictionary containing the annotation information.
-    params : HeatmapParameters
-        Parameters controlling the generation of the heatmaps.
-
-    Returns
-    -------
-
-    y_2d_det : np.ndarray
-        2D heatmap of the presence of an event.
-
-    y_2d_size : np.ndarray
-        2D heatmap of the size of the bounding box associated to event.
-
-    y_2d_classes : np.ndarray
-        3D array containing the ground-truth class probabilities for each
-        pixel.
-
-    ann_aug : AnnotationGroup
-        A dictionary containing the annotation information of the
-        annotations that are within the input spectrogram, augmented with
-        the x and y indices of their pixel location in the input spectrogram.
-
-    """
-    # spec may be resized on input into the network
-    num_classes = len(params["class_names"])
-    op_height = spec_op_shape[0]
-    op_width = spec_op_shape[1]
-    freq_per_bin = (params["max_freq"] - params["min_freq"]) / op_height
-
-    # start and end times
-    x_pos_start = au.time_to_x_coords(
-        ann["start_times"],
-        sampling_rate,
-        params["fft_win_length"],
-        params["fft_overlap"],
-    )
-    x_pos_start = (params["resize_factor"] * x_pos_start).astype(np.int)
-    x_pos_end = au.time_to_x_coords(
-        ann["end_times"],
-        sampling_rate,
-        params["fft_win_length"],
-        params["fft_overlap"],
-    )
-    x_pos_end = (params["resize_factor"] * x_pos_end).astype(np.int)
-
-    # location on y axis i.e. frequency
-    y_pos_low = (ann["low_freqs"] - params["min_freq"]) / freq_per_bin
-    y_pos_low = (op_height - y_pos_low).astype(np.int)
-    y_pos_high = (ann["high_freqs"] - params["min_freq"]) / freq_per_bin
-    y_pos_high = (op_height - y_pos_high).astype(np.int)
-    bb_widths = x_pos_end - x_pos_start
-    bb_heights = y_pos_low - y_pos_high
-
-    # Only include annotations that are within the input spectrogram
-    valid_inds = np.where(
-        (x_pos_start >= 0)
-        & (x_pos_start < op_width)
-        & (y_pos_low >= 0)
-        & (y_pos_low < (op_height - 1))
-    )[0]
-
-    ann_aug: AnnotationGroup = {
-        "start_times": ann["start_times"][valid_inds],
-        "end_times": ann["end_times"][valid_inds],
-        "high_freqs": ann["high_freqs"][valid_inds],
-        "low_freqs": ann["low_freqs"][valid_inds],
-        "class_ids": ann["class_ids"][valid_inds],
-        "individual_ids": ann["individual_ids"][valid_inds],
-    }
-    ann_aug["x_inds"] = x_pos_start[valid_inds]
-    ann_aug["y_inds"] = y_pos_low[valid_inds]
-    # keys = [
-    #     "start_times",
-    #     "end_times",
-    #     "high_freqs",
-    #     "low_freqs",
-    #     "class_ids",
-    #     "individual_ids",
-    # ]
-    # for kk in keys:
-    #     ann_aug[kk] = ann[kk][valid_inds]
-
-    # if the number of calls is only 1, then it is unique
-    # TODO would be better if we found these unique calls at the merging stage
-    if len(ann_aug["individual_ids"]) == 1:
-        ann_aug["individual_ids"][0] = 0
-
-    y_2d_det = np.zeros((1, op_height, op_width), dtype=np.float32)
-    y_2d_size = np.zeros((2, op_height, op_width), dtype=np.float32)
-    # num classes and "background" class
-    y_2d_classes: np.ndarray = np.zeros(
-        (num_classes + 1, op_height, op_width), dtype=np.float32
-    )
-
-    # create 2D ground truth heatmaps
-    for ii in valid_inds:
-        draw_gaussian(
-            y_2d_det[0, :],
-            (x_pos_start[ii], y_pos_low[ii]),
-            params["target_sigma"],
-        )
-        # draw_gaussian(y_2d_det[0,:], (x_pos_start[ii], y_pos_low[ii]), params['target_sigma'], params['target_sigma']*2)
-        y_2d_size[0, y_pos_low[ii], x_pos_start[ii]] = bb_widths[ii]
-        y_2d_size[1, y_pos_low[ii], x_pos_start[ii]] = bb_heights[ii]
-
-        cls_id = ann["class_ids"][ii]
-        if cls_id > -1:
-            draw_gaussian(
-                y_2d_classes[cls_id, :],
-                (x_pos_start[ii], y_pos_low[ii]),
-                params["target_sigma"],
-            )
-            # draw_gaussian(y_2d_classes[cls_id, :], (x_pos_start[ii], y_pos_low[ii]), params['target_sigma'], params['target_sigma']*2)
-
-    # be careful as this will have a 1.0 places where we have event but dont know gt class
-    # this will be masked in training anyway
-    y_2d_classes[num_classes, :] = 1.0 - y_2d_classes.sum(0)
-    y_2d_classes = y_2d_classes / y_2d_classes.sum(0)[np.newaxis, ...]
-    y_2d_classes[np.isnan(y_2d_classes)] = 0.0
-
-    return y_2d_det, y_2d_size, y_2d_classes, ann_aug
-
-
-def draw_gaussian(heatmap, center, sigmax, sigmay=None):
-    # center is (x, y)
-    # this edits the heatmap inplace
-
-    if sigmay is None:
-        sigmay = sigmax
-    tmp_size = np.maximum(sigmax, sigmay) * 3
-    mu_x = int(center[0] + 0.5)
-    mu_y = int(center[1] + 0.5)
-    w, h = heatmap.shape[0], heatmap.shape[1]
-    ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
-    br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
-
-    if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
-        return False
-
-    size = 2 * tmp_size + 1
-    x = np.arange(0, size, 1, np.float32)
-    y = x[:, np.newaxis]
-    x0 = y0 = size // 2
-    # g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
-    g = np.exp(
-        -((x - x0) ** 2) / (2 * sigmax**2)
-        - ((y - y0) ** 2) / (2 * sigmay**2)
-    )
-    g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
-    g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
-    img_x = max(0, ul[0]), min(br[0], h)
-    img_y = max(0, ul[1]), min(br[1], w)
-    heatmap[img_y[0] : img_y[1], img_x[0] : img_x[1]] = np.maximum(
-        heatmap[img_y[0] : img_y[1], img_x[0] : img_x[1]],
-        g[g_y[0] : g_y[1], g_x[0] : g_x[1]],
-    )
-    return True
-
-
-def pad_aray(ip_array, pad_size):
-    return np.hstack((ip_array, np.ones(pad_size, dtype=np.int) * -1))
-
-
-def warp_spec_aug(spec, ann, return_spec_for_viz, params):
-    # This is messy
-    # Augment spectrogram by randomly stretch and squeezing
-    # NOTE this also changes the start and stop time in place
-
-    # not taking care of spec for viz
-    if return_spec_for_viz:
-        assert False
-
-    delta = params["stretch_squeeze_delta"]
-    op_size = (spec.shape[1], spec.shape[2])
-    resize_fract_r = np.random.rand() * delta * 2 - delta + 1.0
-    resize_amt = int(spec.shape[2] * resize_fract_r)
-    if resize_amt >= spec.shape[2]:
-        spec_r = torch.cat(
-            (
-                spec,
-                torch.zeros(
-                    (1, spec.shape[1], resize_amt - spec.shape[2]),
-                    dtype=spec.dtype,
-                ),
-            ),
-            2,
-        )
-    else:
-        spec_r = spec[:, :, :resize_amt]
-    spec = F.interpolate(
-        spec_r.unsqueeze(0), size=op_size, mode="bilinear", align_corners=False
-    ).squeeze(0)
-    ann["start_times"] *= 1.0 / resize_fract_r
-    ann["end_times"] *= 1.0 / resize_fract_r
-    return spec
-
-
-def mask_time_aug(spec, params):
-    # Mask out a random block of time - repeat up to 3 times
-    # SpecAugment: A Simple Data Augmentation Methodfor Automatic Speech Recognition
-    fm = torchaudio.transforms.TimeMasking(
-        int(spec.shape[1] * params["mask_max_time_perc"])
-    )
-    for ii in range(np.random.randint(1, 4)):
-        spec = fm(spec)
-    return spec
-
-
-def mask_freq_aug(spec, params):
-    # Mask out a random frequncy range - repeat up to 3 times
-    # SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition
-    fm = torchaudio.transforms.FrequencyMasking(
-        int(spec.shape[1] * params["mask_max_freq_perc"])
-    )
-    for ii in range(np.random.randint(1, 4)):
-        spec = fm(spec)
-    return spec
-
-
-def scale_vol_aug(spec, params):
-    return spec * np.random.random() * params["spec_amp_scaling"]
-
-
-def echo_aug(audio, sampling_rate, params):
-    sample_offset = (
-        int(params["echo_max_delay"] * np.random.random() * sampling_rate) + 1
-    )
-    audio[:-sample_offset] += np.random.random() * audio[sample_offset:]
-    return audio
-
-
-def resample_aug(audio, sampling_rate, params):
-    sampling_rate_old = sampling_rate
-    sampling_rate = np.random.choice(params["aug_sampling_rates"])
-    audio = librosa.resample(
-        audio,
-        orig_sr=sampling_rate_old,
-        target_sr=sampling_rate,
-        res_type="polyphase",
-    )
-
-    audio = au.pad_audio(
-        audio,
-        sampling_rate,
-        params["fft_win_length"],
-        params["fft_overlap"],
-        params["resize_factor"],
-        params["spec_divide_factor"],
-        params["spec_train_width"],
-    )
-    duration = audio.shape[0] / float(sampling_rate)
-    return audio, sampling_rate, duration
-
-
-def resample_audio(num_samples, sampling_rate, audio2, sampling_rate2):
-    if sampling_rate != sampling_rate2:
-        audio2 = librosa.resample(
-            audio2,
-            orig_sr=sampling_rate2,
-            target_sr=sampling_rate,
-            res_type="polyphase",
-        )
-        sampling_rate2 = sampling_rate
-    if audio2.shape[0] < num_samples:
-        audio2 = np.hstack(
-            (
-                audio2,
-                np.zeros((num_samples - audio2.shape[0]), dtype=audio2.dtype),
-            )
-        )
-    elif audio2.shape[0] > num_samples:
-        audio2 = audio2[:num_samples]
-    return audio2, sampling_rate2
-
-
-def combine_audio_aug(audio, sampling_rate, ann, audio2, sampling_rate2, ann2):
-
-    # resample so they are the same
-    audio2, sampling_rate2 = resample_audio(
-        audio.shape[0], sampling_rate, audio2, sampling_rate2
-    )
-
-    # # set mean and std to be the same
-    # audio2 = (audio2 - audio2.mean())
-    # audio2 = (audio2/audio2.std())*audio.std()
-    # audio2 = audio2 + audio.mean()
-
-    if (
-        ann["annotated"]
-        and (ann2["annotated"])
-        and (sampling_rate2 == sampling_rate)
-        and (audio.shape[0] == audio2.shape[0])
-    ):
-        comb_weight = 0.3 + np.random.random() * 0.4
-        audio = comb_weight * audio + (1 - comb_weight) * audio2
-        inds = np.argsort(np.hstack((ann["start_times"], ann2["start_times"])))
-        for kk in ann.keys():
-
-            # when combining calls from different files, assume they come from different individuals
-            if kk == "individual_ids":
-                if (ann[kk] > -1).sum() > 0:
-                    ann2[kk][ann2[kk] > -1] += np.max(ann[kk][ann[kk] > -1]) + 1
-
-            if (kk != "class_id_file") and (kk != "annotated"):
-                ann[kk] = np.hstack((ann[kk], ann2[kk]))[inds]
-
-    return audio, ann
-
-
-class AudioLoader(torch.utils.data.Dataset):
-    def __init__(self, data_anns_ip, params, dataset_name=None, is_train=False):
-
-        self.data_anns = []
-        self.is_train = is_train
-        self.params = params
-        self.return_spec_for_viz = False
-
-        for ii in range(len(data_anns_ip)):
-            dd = copy.deepcopy(data_anns_ip[ii])
-
-            # filter out unused annotation here
-            filtered_annotations = []
-            for ii, aa in enumerate(dd["annotation"]):
-
-                if "individual" in aa.keys():
-                    aa["individual"] = int(aa["individual"])
-
-                    # if only one call labeled it has to be from the same individual
-                    if len(dd["annotation"]) == 1:
-                        aa["individual"] = 0
-
-                # convert class name into class label
-                if aa["class"] in self.params["class_names"]:
-                    aa["class_id"] = self.params["class_names"].index(
-                        aa["class"]
-                    )
-                else:
-                    aa["class_id"] = -1
-
-                if aa["class"] not in self.params["classes_to_ignore"]:
-                    filtered_annotations.append(aa)
-
-            dd["annotation"] = filtered_annotations
-            dd["start_times"] = np.array(
-                [aa["start_time"] for aa in dd["annotation"]]
-            )
-            dd["end_times"] = np.array(
-                [aa["end_time"] for aa in dd["annotation"]]
-            )
-            dd["high_freqs"] = np.array(
-                [float(aa["high_freq"]) for aa in dd["annotation"]]
-            )
-            dd["low_freqs"] = np.array(
-                [float(aa["low_freq"]) for aa in dd["annotation"]]
-            )
-            dd["class_ids"] = np.array(
-                [aa["class_id"] for aa in dd["annotation"]]
-            ).astype(np.int)
-            dd["individual_ids"] = np.array(
-                [aa["individual"] for aa in dd["annotation"]]
-            ).astype(np.int)
-
-            # file level class name
-            dd["class_id_file"] = -1
-            if "class_name" in dd.keys():
-                if dd["class_name"] in self.params["class_names"]:
-                    dd["class_id_file"] = self.params["class_names"].index(
-                        dd["class_name"]
-                    )
-
-            self.data_anns.append(dd)
-
-        ann_cnt = [len(aa["annotation"]) for aa in self.data_anns]
-        self.max_num_anns = 2 * np.max(
-            ann_cnt
-        )  # x2 because we may be combining files during training
-
-        print("\n")
-        if dataset_name is not None:
-            print("Dataset     : " + dataset_name)
-        if self.is_train:
-            print("Split type  : train")
-        else:
-            print("Split type  : test")
-        print("Num files   : " + str(len(self.data_anns)))
-        print("Num calls   : " + str(np.sum(ann_cnt)))
-
-    def get_file_and_anns(self, index=None):
-
-        # if no file specified, choose random one
-        if index == None:
-            index = np.random.randint(0, len(self.data_anns))
-
-        audio_file = self.data_anns[index]["file_path"]
-        sampling_rate, audio_raw = au.load_audio(
-            audio_file,
-            self.data_anns[index]["time_exp"],
-            self.params["target_samp_rate"],
-            self.params["scale_raw_audio"],
-        )
-
-        # copy annotation
-        ann = {}
-        ann["annotated"] = self.data_anns[index]["annotated"]
-        ann["class_id_file"] = self.data_anns[index]["class_id_file"]
-        keys = [
-            "start_times",
-            "end_times",
-            "high_freqs",
-            "low_freqs",
-            "class_ids",
-            "individual_ids",
-        ]
-        for kk in keys:
-            ann[kk] = self.data_anns[index][kk].copy()
-
-        # if train then grab a random crop
-        if self.is_train:
-            nfft = int(self.params["fft_win_length"] * sampling_rate)
-            noverlap = int(self.params["fft_overlap"] * nfft)
-            length_samples = (
-                self.params["spec_train_width"] * (nfft - noverlap) + noverlap
-            )
-
-            if audio_raw.shape[0] - length_samples > 0:
-                sample_crop = np.random.randint(
-                    audio_raw.shape[0] - length_samples
-                )
-            else:
-                sample_crop = 0
-            audio_raw = audio_raw[sample_crop : sample_crop + length_samples]
-            ann["start_times"] = ann["start_times"] - sample_crop / float(
-                sampling_rate
-            )
-            ann["end_times"] = ann["end_times"] - sample_crop / float(
-                sampling_rate
-            )
-
-        # pad audio
-        if self.is_train:
-            op_spec_target_size = self.params["spec_train_width"]
-        else:
-            op_spec_target_size = None
-        audio_raw = au.pad_audio(
-            audio_raw,
-            sampling_rate,
-            self.params["fft_win_length"],
-            self.params["fft_overlap"],
-            self.params["resize_factor"],
-            self.params["spec_divide_factor"],
-            op_spec_target_size,
-        )
-        duration = audio_raw.shape[0] / float(sampling_rate)
-
-        # sort based on time
-        inds = np.argsort(ann["start_times"])
-        for kk in ann.keys():
-            if (kk != "class_id_file") and (kk != "annotated"):
-                ann[kk] = ann[kk][inds]
-
-        return audio_raw, sampling_rate, duration, ann
-
-    def __getitem__(self, index):
-
-        # load audio file
-        audio, sampling_rate, duration, ann = self.get_file_and_anns(index)
-
-        # augment on raw audio
-        if self.is_train and self.params["augment_at_train"]:
-            # augment - combine with random audio file
-            if (
-                self.params["augment_at_train_combine"]
-                and np.random.random() < self.params["aug_prob"]
-            ):
-                (
-                    audio2,
-                    sampling_rate2,
-                    duration2,
-                    ann2,
-                ) = self.get_file_and_anns()
-                audio, ann = combine_audio_aug(
-                    audio, sampling_rate, ann, audio2, sampling_rate2, ann2
-                )
-
-            # simulate echo by adding delayed copy of the file
-            if np.random.random() < self.params["aug_prob"]:
-                audio = echo_aug(audio, sampling_rate, self.params)
-
-            # resample the audio
-            # if np.random.random() < self.params['aug_prob']:
-            #   audio, sampling_rate, duration = resample_aug(audio, sampling_rate, self.params)
-
-        # create spectrogram
-        spec, spec_for_viz = au.generate_spectrogram(
-            audio, sampling_rate, self.params, self.return_spec_for_viz
-        )
-        rsf = self.params["resize_factor"]
-        spec_op_shape = (
-            int(self.params["spec_height"] * rsf),
-            int(spec.shape[1] * rsf),
-        )
-
-        # resize the spec
-        spec = torch.from_numpy(spec).unsqueeze(0).unsqueeze(0)
-        spec = F.interpolate(
-            spec, size=spec_op_shape, mode="bilinear", align_corners=False
-        ).squeeze(0)
-
-        # augment spectrogram
-        if self.is_train and self.params["augment_at_train"]:
-
-            if np.random.random() < self.params["aug_prob"]:
-                spec = scale_vol_aug(spec, self.params)
-
-            if np.random.random() < self.params["aug_prob"]:
-                spec = warp_spec_aug(
-                    spec, ann, self.return_spec_for_viz, self.params
-                )
-
-            if np.random.random() < self.params["aug_prob"]:
-                spec = mask_time_aug(spec, self.params)
-
-            if np.random.random() < self.params["aug_prob"]:
-                spec = mask_freq_aug(spec, self.params)
-
-        outputs = {}
-        outputs["spec"] = spec
-        if self.return_spec_for_viz:
-            outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze(
-                0
-            )
-
-        # create ground truth heatmaps
-        (
-            outputs["y_2d_det"],
-            outputs["y_2d_size"],
-            outputs["y_2d_classes"],
-            ann_aug,
-        ) = generate_gt_heatmaps(spec_op_shape, sampling_rate, ann, self.params)
-
-        # hack to get around requirement that all vectors are the same length in
-        # the output batch
-        pad_size = self.max_num_anns - len(ann_aug["individual_ids"])
-        outputs["is_valid"] = pad_aray(
-            np.ones(len(ann_aug["individual_ids"])), pad_size
-        )
-        keys = [
-            "class_ids",
-            "individual_ids",
-            "x_inds",
-            "y_inds",
-            "start_times",
-            "end_times",
-            "low_freqs",
-            "high_freqs",
-        ]
-        for kk in keys:
-            outputs[kk] = pad_aray(ann_aug[kk], pad_size)
-
-        # convert to pytorch
-        for kk in outputs.keys():
-            if type(outputs[kk]) != torch.Tensor:
-                outputs[kk] = torch.from_numpy(outputs[kk])
-
-        # scalars
-        outputs["class_id_file"] = ann["class_id_file"]
-        outputs["annotated"] = ann["annotated"]
-        outputs["duration"] = duration
-        outputs["sampling_rate"] = sampling_rate
-        outputs["file_id"] = index
-
-        return outputs
-
-    def __len__(self):
-        return len(self.data_anns)
--- a/batdetect2/train/losses.py
+++ b/batdetect2/train/losses.py
@ -1,63 +0,0 @@
-import torch
-import torch.nn.functional as F
-
-
-def bbox_size_loss(pred_size, gt_size):
-    """
-    Bounding box size loss. Only compute loss where there is a bounding box.
-    """
-    gt_size_mask = (gt_size > 0).float()
-    return F.l1_loss(pred_size * gt_size_mask, gt_size, reduction="sum") / (
-        gt_size_mask.sum() + 1e-5
-    )
-
-
-def focal_loss(pred, gt, weights=None, valid_mask=None):
-    """
-    Focal loss adapted from CornerNet: Detecting Objects as Paired Keypoints
-    pred  (batch x c x h x w)
-    gt    (batch x c x h x w)
-    """
-    eps = 1e-5
-    beta = 4
-    alpha = 2
-
-    pos_inds = gt.eq(1).float()
-    neg_inds = gt.lt(1).float()
-
-    pos_loss = torch.log(pred + eps) * torch.pow(1 - pred, alpha) * pos_inds
-    neg_loss = (
-        torch.log(1 - pred + eps)
-        * torch.pow(pred, alpha)
-        * torch.pow(1 - gt, beta)
-        * neg_inds
-    )
-
-    if weights is not None:
-        pos_loss = pos_loss * weights
-        # neg_loss = neg_loss*weights
-
-    if valid_mask is not None:
-        pos_loss = pos_loss * valid_mask
-        neg_loss = neg_loss * valid_mask
-
-    pos_loss = pos_loss.sum()
-    neg_loss = neg_loss.sum()
-
-    num_pos = pos_inds.float().sum()
-    if num_pos == 0:
-        loss = -neg_loss
-    else:
-        loss = -(pos_loss + neg_loss) / num_pos
-    return loss
-
-
-def mse_loss(pred, gt, weights=None, valid_mask=None):
-    """
-    Mean squared error loss.
-    """
-    if valid_mask is None:
-        op = ((gt - pred) ** 2).mean()
-    else:
-        op = (valid_mask * ((gt - pred) ** 2)).sum() / valid_mask.sum()
-    return op
--- a/batdetect2/train/train_utils.py
+++ b/batdetect2/train/train_utils.py
@ -1,207 +0,0 @@
-import glob
-import json
-
-import numpy as np
-
-
-def write_notes_file(file_name, text):
-    with open(file_name, "a") as da:
-        da.write(text + "\n")
-
-
-def get_blank_dataset_dict(dataset_name, is_test, ann_path, wav_path):
-    ddict = {
-        "dataset_name": dataset_name,
-        "is_test": is_test,
-        "is_binary": False,
-        "ann_path": ann_path,
-        "wav_path": wav_path,
-    }
-    return ddict
-
-
-def get_short_class_names(class_names, str_len=3):
-    class_names_short = []
-    for cc in class_names:
-        class_names_short.append(
-            " ".join([sp[:str_len] for sp in cc.split(" ")])
-        )
-    return class_names_short
-
-
-def remove_dupes(data_train, data_test):
-    test_ids = [dd["id"] for dd in data_test]
-    data_train_prune = []
-    for aa in data_train:
-        if aa["id"] not in test_ids:
-            data_train_prune.append(aa)
-    diff = len(data_train) - len(data_train_prune)
-    if diff != 0:
-        print(diff, "items removed from train set")
-    return data_train_prune
-
-
-def get_genus_mapping(class_names):
-    genus_names, genus_mapping = np.unique(
-        [cc.split(" ")[0] for cc in class_names], return_inverse=True
-    )
-    return genus_names.tolist(), genus_mapping.tolist()
-
-
-def standardize_low_freq(data, class_of_interest):
-    # address the issue of highly variable low frequency annotations
-    # this often happens for contstant frequency calls
-    # for the class of interest sets the low and high freq to be the dataset mean
-    low_freqs = []
-    high_freqs = []
-    for dd in data:
-        for aa in dd["annotation"]:
-            if aa["class"] == class_of_interest:
-                low_freqs.append(aa["low_freq"])
-                high_freqs.append(aa["high_freq"])
-
-    low_mean = np.mean(low_freqs)
-    high_mean = np.mean(high_freqs)
-    assert low_mean < high_mean
-
-    print("\nStandardizing low and high frequency for:")
-    print(class_of_interest)
-    print("low:  ", round(low_mean, 2))
-    print("high: ", round(high_mean, 2))
-
-    # only set the low freq, high stays the same
-    # assumes that low_mean < high_mean
-    for dd in data:
-        for aa in dd["annotation"]:
-            if aa["class"] == class_of_interest:
-                aa["low_freq"] = low_mean
-                if aa["high_freq"] < low_mean:
-                    aa["high_freq"] = high_mean
-
-    return data
-
-
-def load_set_of_anns(
-    data,
-    classes_to_ignore=[],
-    events_of_interest=None,
-    convert_to_genus=False,
-    verbose=True,
-    list_of_anns=False,
-    filter_issues=False,
-    name_replace=False,
-):
-
-    # load the annotations
-    anns = []
-    if list_of_anns:
-        # path to list of individual json files
-        anns.extend(load_anns_from_path(data["ann_path"], data["wav_path"]))
-    else:
-        # dictionary of datasets
-        for dd in data:
-            anns.extend(load_anns(dd["ann_path"], dd["wav_path"]))
-
-    # discarding unannoated files
-    anns = [aa for aa in anns if aa["annotated"] is True]
-
-    # filter files that have annotation issues - is the input is a dictionary of
-    # datasets, this will lilely have already been done
-    if filter_issues:
-        anns = [aa for aa in anns if aa["issues"] is False]
-
-    # check for some basic formatting errors with class names
-    for ann in anns:
-        for aa in ann["annotation"]:
-            aa["class"] = aa["class"].strip()
-
-    # only load specified events - i.e. types of calls
-    if events_of_interest is not None:
-        for ann in anns:
-            filtered_events = []
-            for aa in ann["annotation"]:
-                if aa["event"] in events_of_interest:
-                    filtered_events.append(aa)
-            ann["annotation"] = filtered_events
-
-    # change class names
-    # replace_names will be a dictionary mapping input name to output
-    if type(name_replace) is dict:
-        for ann in anns:
-            for aa in ann["annotation"]:
-                if aa["class"] in name_replace:
-                    aa["class"] = name_replace[aa["class"]]
-
-    # convert everything to genus name
-    if convert_to_genus:
-        for ann in anns:
-            for aa in ann["annotation"]:
-                aa["class"] = aa["class"].split(" ")[0]
-
-    # get unique class names
-    class_names_all = []
-    for ann in anns:
-        for aa in ann["annotation"]:
-            if aa["class"] not in classes_to_ignore:
-                class_names_all.append(aa["class"])
-
-    class_names, class_cnts = np.unique(class_names_all, return_counts=True)
-    class_inv_freq = class_cnts.sum() / (
-        len(class_names) * class_cnts.astype(np.float32)
-    )
-
-    if verbose:
-        print("Class count:")
-        str_len = np.max([len(cc) for cc in class_names]) + 5
-        for cc in range(len(class_names)):
-            print(
-                str(cc).ljust(5)
-                + class_names[cc].ljust(str_len)
-                + str(class_cnts[cc])
-            )
-
-    if len(classes_to_ignore) == 0:
-        return anns
-    else:
-        return anns, class_names.tolist(), class_inv_freq.tolist()
-
-
-def load_anns(ann_file_name, raw_audio_dir):
-    with open(ann_file_name) as da:
-        anns = json.load(da)
-
-    for aa in anns:
-        aa["file_path"] = raw_audio_dir + aa["id"]
-
-    return anns
-
-
-def load_anns_from_path(ann_file_dir, raw_audio_dir):
-    files = glob.glob(ann_file_dir + "*.json")
-    anns = []
-    for ff in files:
-        with open(ff) as da:
-            ann = json.load(da)
-        ann["file_path"] = raw_audio_dir + ann["id"]
-        anns.append(ann)
-
-    return anns
-
-
-class AverageMeter(object):
-    """Computes and stores the average and current value"""
-
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-
-    def update(self, val, n=1):
-        self.val = val
-        self.sum += val * n
-        self.count += n
-        self.avg = self.sum / self.count
--- a/docs/Makefile
+++ b/docs/Makefile
@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/make.bat
+++ b/docs/make.bat
@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
--- a/docs/source/_static/.gitkeep
+++ b/docs/source/_static/.gitkeep
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -0,0 +1,78 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = "batdetect2"
+copyright = "2025, Oisin Mac Aodha, Santiago Martinez Balvanera"
+author = "Oisin Mac Aodha, Santiago Martinez Balvanera"
+release = "1.1.1"
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.intersphinx",
+    "sphinxcontrib.autodoc_pydantic",
+    "sphinx_click",
+    "numpydoc",
+    "myst_parser",
+    "sphinx_autodoc_typehints",
+]
+
+templates_path = ["_templates"]
+exclude_patterns = []
+
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".txt": "markdown",
+    ".md": "markdown",
+}
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "sphinx_book_theme"
+html_static_path = ["_static"]
+html_theme_options = {
+    "home_page_in_toc": True,
+    "show_navbar_depth": 2,
+    "show_toc_level": 2,
+}
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "click": ("https://click.palletsprojects.com/en/stable/", None),
+    "librosa": ("https://librosa.org/doc/latest/", None),
+    "lightning": ("https://lightning.ai/docs/pytorch/stable/", None),
+    "loguru": ("https://loguru.readthedocs.io/en/stable/", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+    "omegaconf": ("https://omegaconf.readthedocs.io/en/latest/", None),
+    "pytorch": ("https://pytorch.org/docs/stable/", None),
+    "soundevent": ("https://mbsantiago.github.io/soundevent/", None),
+    "pydantic": ("https://docs.pydantic.dev/latest/", None),
+    "xarray": ("https://docs.xarray.dev/en/stable/", None),
+}
+
+# -- Options for autodoc ------------------------------------------------------
+autosummary_generate = False
+autosummary_imported_members = True
+
+autodoc_default_options = {
+    "members": True,
+    "undoc-members": False,
+    "private-members": False,
+    "special-members": False,
+    "inherited-members": False,
+    "show-inheritance": True,
+    "module-first": True,
+}
+
+numpydoc_show_class_members = False
+numpydoc_show_inherited_class_members = False
+numpydoc_class_members_toctree = False
--- a/docs/source/development/index.md
+++ b/docs/source/development/index.md
@ -0,0 +1,34 @@
+# Development and contribution
+
+Thanks for your interest in improving batdetect2.
+
+## Ways to contribute
+
+- Report bugs and request features on
+  [GitHub Issues](https://github.com/macaodha/batdetect2/issues)
+- Improve docs by opening pull requests with clearer examples, fixes, or
+  missing workflows
+- Contribute code for models, data handling, evaluation, or CLI workflows
+
+## Basic contribution workflow
+
+1. Open an issue (or comment on an existing one) so work is visible.
+2. Create a branch for your change.
+3. Run checks locally before opening a PR:
+
+```bash
+just check
+just docs
+```
+
+4. Open a pull request with a clear summary of what changed and why.
+
+## Development environment
+
+Use `uv` for dependency and environment management.
+
+```bash
+uv sync
+```
+
+For more setup details, see {doc}`../getting_started`.
--- a/docs/source/explanation/evaluation-concepts-and-matching.md
+++ b/docs/source/explanation/evaluation-concepts-and-matching.md
@ -0,0 +1,48 @@
+# Evaluation concepts and matching
+
+Evaluation is not just "run predictions and compute one number".
+
+The reported metric depends on the evaluation task, the matching rule, and the treatment of clip boundaries and generic labels.
+
+## Task families answer different questions
+
+Built-in task families include:
+
+- sound event detection,
+- sound event classification,
+- top-class detection,
+- clip detection,
+- clip classification.
+
+Choose the task that matches the scientific or engineering question.
+
+## Matching matters
+
+For sound-event-style tasks, predictions and annotations are matched using an affinity function.
+
+Important controls include:
+
+- `affinity`,
+- `affinity_threshold`,
+- `strict_match`,
+- `ignore_start_end`.
+
+Small changes here can change the reported metric without changing the underlying predictions.
+
+## Boundary handling matters
+
+The evaluation base task can exclude events near clip boundaries through `ignore_start_end`.
+
+This is useful when clip boundaries make matches ambiguous.
+
+## Generic labels can matter in classification
+
+Classification tasks can include or exclude generic targets depending on configuration.
+
+That affects what counts as a valid class-level comparison.
+
+## Related pages
+
+- Evaluate on a test set: {doc}`../tutorials/evaluate-on-a-test-set`
+- Evaluation config reference: {doc}`../reference/evaluation-config`
+- Model output and validation: {doc}`model-output-and-validation`
--- a/docs/source/explanation/extracted-features-and-embeddings.md
+++ b/docs/source/explanation/extracted-features-and-embeddings.md
@ -0,0 +1,43 @@
+# Extracted features and embeddings
+
+The current API exposes a per-detection `features` vector.
+
+Older BatDetect2 workflows also exposed concepts such as `cnn_feats`,
+`spec_features`, and `spec_slices`.
+
+## What the current feature vector is
+
+In the current stack, each retained detection can carry an internal feature
+representation produced by the model output pipeline.
+
+This is useful for downstream exploration, comparison, and custom analysis.
+
+## What these features are not
+
+They are not automatically human-interpretable ecological variables.
+
+They are also not a substitute for careful validation.
+
+## Why people refer to them as embeddings
+
+In practice, users often treat these feature vectors as embeddings because they
+can be used as dense learned representations of detections.
+
+That usage is reasonable, but you should still treat them as model-derived
+internal representations whose meaning depends on the training setup.
+
+## Legacy terminology versus current terminology
+
+- legacy `cnn_feats` referred to CNN feature outputs in the older workflow,
+- legacy `spec_features` referred to lower-level extracted call features,
+- current `features` are the per-detection vectors attached to `Detection`
+  objects.
+
+These are related ideas, but not necessarily one-to-one replacements.
+
+## Related pages
+
+- Inspect detection features in Python:
+  {doc}`../how_to/inspect-detection-features-in-python`
+- Legacy migration guide:
+  {doc}`../legacy/migration-guide`
--- a/docs/source/explanation/index.md
+++ b/docs/source/explanation/index.md
@ -0,0 +1,19 @@
+# Understanding
+
+Understanding pages explain how BatDetect2 works, what its outputs mean, and how to reason about trade-offs.
+
+Use this section when you want help interpreting the tool, not just running it.
+
+```{toctree}
+:maxdepth: 1
+
+what-batdetect2-predicts
+interpreting-formatted-outputs
+extracted-features-and-embeddings
+model-output-and-validation
+postprocessing-and-thresholds
+pipeline-overview
+preprocessing-consistency
+target-encoding-and-decoding
+evaluation-concepts-and-matching
+```
--- a/docs/source/explanation/interpreting-formatted-outputs.md
+++ b/docs/source/explanation/interpreting-formatted-outputs.md
@ -0,0 +1,36 @@
+# Interpreting formatted outputs
+
+BatDetect2 can write predictions in several output formats.
+
+Those formats are different views of the same underlying detections, not different model behaviors.
+
+## Separate the underlying detection from the serialized file
+
+Internally, the current stack works with clip-level detections containing geometry, detection score, class scores, and features.
+
+Output formatters then serialize those detections in different ways.
+
+## Raw outputs are richest
+
+The `raw` format preserves the broadest structured view of detections and is a good default when you want to inspect or reload predictions later.
+
+## Tabular outputs are for analysis convenience
+
+The `parquet` format is convenient for data analysis workflows, but the tabular representation is only one projection of the underlying detection object.
+
+## Legacy-shaped outputs are mainly for compatibility
+
+The `batdetect2` formatter writes the older BatDetect2-style JSON shape.
+
+Use it when you need compatibility with older downstream tools or workflows.
+
+## The meaning does not come from the file extension
+
+Do not assume that a `.json`, `.parquet`, or `.nc` file changes what the model predicted.
+
+It changes how the prediction is packaged and how much detail is retained.
+
+## Related pages
+
+- Output formats reference: {doc}`../reference/output-formats`
+- Outputs config reference: {doc}`../reference/outputs-config`
--- a/docs/source/explanation/model-output-and-validation.md
+++ b/docs/source/explanation/model-output-and-validation.md
@ -0,0 +1,29 @@
+# Model output and validation
+
+BatDetect2 outputs model predictions, not ground truth. The same configuration
+can behave differently across recording conditions, species compositions, and
+acoustic environments.
+
+## Why threshold choice matters
+
+- Lower detection thresholds increase sensitivity but can increase false
+  positives.
+- Higher thresholds reduce false positives but can miss faint calls.
+
+No threshold is universally correct. The right setting depends on your survey
+objectives and tolerance for false positives versus missed detections.
+
+## Why local validation is required
+
+Model performance depends on how similar your data are to training data.
+Before ecological interpretation, validate predictions on a representative,
+locally reviewed subset.
+
+Recommended validation checks:
+
+1. Compare detection counts against expert-reviewed clips.
+2. Inspect species-level predictions for plausible confusion patterns.
+3. Repeat checks across sites, seasons, and recorder setups.
+
+For practical threshold workflows, see
+{doc}`../how_to/tune-detection-threshold`.
--- a/docs/source/explanation/pipeline-overview.md
+++ b/docs/source/explanation/pipeline-overview.md
@ -0,0 +1,34 @@
+# Pipeline overview
+
+batdetect2 processes recordings as a sequence of modules. Each stage has a
+clear role and configuration surface.
+
+## End-to-end flow
+
+1. Audio loading
+2. Preprocessing (waveform -> spectrogram)
+3. Detector forward pass
+4. Postprocessing (peaks, decoding, thresholds)
+5. Output formatting and export
+
+## Why the modular design matters
+
+The model, preprocessing, postprocessing, targets, and output formatting are
+configured separately. That makes it easier to:
+
+- swap components without rewriting the whole pipeline,
+- keep experiments reproducible,
+- adapt workflows to new datasets.
+
+## Core objects in the stack
+
+- `BatDetect2API` orchestrates training, inference, and evaluation workflows.
+- `ModelConfig` defines architecture, preprocessing, postprocessing, and
+  targets.
+- `Targets` controls event filtering, class encoding/decoding, and ROI mapping.
+
+## Related pages
+
+- Preprocessing rationale: {doc}`preprocessing-consistency`
+- Postprocessing rationale: {doc}`postprocessing-and-thresholds`
+- Target rationale: {doc}`target-encoding-and-decoding`
--- a/docs/source/explanation/postprocessing-and-thresholds.md
+++ b/docs/source/explanation/postprocessing-and-thresholds.md
@ -0,0 +1,43 @@
+# Postprocessing and thresholds
+
+After the detector runs on a spectrogram, the model output is still a set of
+dense prediction tensors. Postprocessing turns that into a final list of call
+detections with positions, sizes, and class scores.
+
+## What postprocessing does
+
+In broad terms, the pipeline:
+
+1. suppresses nearby duplicate peaks,
+2. extracts candidate detections,
+3. reads size and class values at each detected location,
+4. decodes outputs into call-level predictions.
+
+This is where score thresholds and output density limits are applied.
+
+## Why thresholds matter
+
+Thresholds control the balance between sensitivity and precision.
+
+- Lower thresholds keep more detections, including weaker calls, but may add
+  false positives.
+- Higher thresholds remove low-confidence detections, but may miss faint calls.
+
+You can tune this behavior per run without retraining the model.
+
+## Two common threshold controls
+
+- `detection_threshold`: minimum score required to keep a detection.
+- `classification_threshold`: minimum class score used when assigning class
+  labels.
+
+Both settings shape the final output and should be validated on reviewed local
+data.
+
+## Practical workflow
+
+Tune thresholds on a representative subset first, then lock settings for the
+full analysis run.
+
+- How-to: {doc}`../how_to/tune-detection-threshold`
+- CLI reference: {doc}`../reference/cli/predict`
--- a/docs/source/explanation/preprocessing-consistency.md
+++ b/docs/source/explanation/preprocessing-consistency.md
@ -0,0 +1,36 @@
+# Preprocessing consistency
+
+Preprocessing consistency is one of the biggest factors behind stable model
+performance.
+
+## Why consistency matters
+
+The detector is trained on spectrograms produced by a specific preprocessing
+pipeline. If inference uses different settings, the model can see a shifted
+input distribution and performance may drop.
+
+Typical mismatch sources:
+
+- sample-rate differences,
+- changed frequency crop,
+- changed STFT window/hop,
+- changed spectrogram transforms.
+
+## Practical implication
+
+When possible, keep preprocessing settings aligned between:
+
+- training,
+- evaluation,
+- deployment inference.
+
+If you intentionally change preprocessing, treat this as a new experiment and
+re-validate on reviewed local data.
+
+## Related pages
+
+- Configure audio preprocessing:
+  {doc}`../how_to/configure-audio-preprocessing`
+- Configure spectrogram preprocessing:
+  {doc}`../how_to/configure-spectrogram-preprocessing`
+- Preprocessing config reference: {doc}`../reference/preprocessing-config`
--- a/docs/source/explanation/target-encoding-and-decoding.md
+++ b/docs/source/explanation/target-encoding-and-decoding.md
@ -0,0 +1,40 @@
+# Target encoding and decoding
+
+batdetect2 turns annotated sound events into training targets, then maps model
+outputs back into interpretable predictions.
+
+## Encoding path (annotations -> model targets)
+
+At training time, the target system:
+
+1. checks whether an event belongs to the configured detection target,
+2. assigns a classification label (or none for non-specific class matches),
+3. maps event geometry into position and size targets.
+
+This behaviour is configured through `TargetConfig`,
+`TargetClassConfig`, and ROI mapper settings.
+
+## Decoding path (model outputs -> tags and geometry)
+
+At inference time, class labels and ROI parameters are decoded back into
+annotation tags and geometry.
+
+This makes outputs interpretable in the same conceptual space as your original
+annotations.
+
+## Why this matters
+
+Target definitions are not just metadata. They directly shape:
+
+- what events are treated as positive examples,
+- which class names the model learns,
+- how geometry is represented and reconstructed.
+
+Small changes here can alter both training outcomes and prediction semantics.
+
+## Related pages
+
+- Configure detection target logic: {doc}`../how_to/configure-target-definitions`
+- Configure class mapping: {doc}`../how_to/define-target-classes`
+- Configure ROI mapping: {doc}`../how_to/configure-roi-mapping`
+- Target config reference: {doc}`../reference/targets-config-workflow`
--- a/docs/source/explanation/what-batdetect2-predicts.md
+++ b/docs/source/explanation/what-batdetect2-predicts.md
@ -0,0 +1,45 @@
+# What BatDetect2 predicts
+
+BatDetect2 predicts call-level events, not recording-level truth.
+
+For each retained detection, the current stack can expose:
+
+- a geometry describing where the event sits in time-frequency space,
+- a detection score,
+- a class-score vector,
+- an internal feature vector.
+
+## Detection score versus class scores
+
+These are different outputs and should not be interpreted as the same thing.
+
+- The detection score is about whether the event is kept as a detection.
+- The class-score vector ranks classes for that detected event.
+
+A detection can be kept while still having uncertain class identity.
+
+## Predictions are conditional on the workflow
+
+The final output also depends on:
+
+- preprocessing,
+- postprocessing,
+- thresholds,
+- target definitions,
+- output transforms.
+
+That is why two runs can differ even when they use the same checkpoint.
+
+## What BatDetect2 does not predict
+
+BatDetect2 does not directly output ecological truth.
+
+It also does not eliminate the need for local validation.
+
+Use reviewed local data before making ecological claims.
+
+## Related pages
+
+- Model output and validation: {doc}`model-output-and-validation`
+- Postprocessing and thresholds: {doc}`postprocessing-and-thresholds`
+- Interpreting formatted outputs: {doc}`interpreting-formatted-outputs`
--- a/docs/source/faq.md
+++ b/docs/source/faq.md
@ -0,0 +1,81 @@
+# FAQ
+
+## Installation and setup
+
+### Do I need Python knowledge to use batdetect2?
+
+Not much.
+If you only want to run the model on your own recordings, you can use the CLI and follow the steps in {doc}`getting_started`.
+
+Some command-line familiarity helps, but you do not need to write Python code for standard inference workflows.
+
+### Are there plans for an R version?
+
+Not currently.
+Output files are plain formats (for example CSV/JSON), so you can read and analyze them in R or other environments.
+
+### I cannot get installation working. What should I do?
+
+First, re-check {doc}`getting_started` and confirm your environment is active.
+If it still fails, open an issue with your OS, install method, and full error output: [GitHub Issues](https://github.com/macaodha/batdetect2/issues).
+
+## Model behavior and performance
+
+### The model does not perform well on my data
+
+This usually means your data distribution differs from training data.
+The best next step is to validate on reviewed local data and then fine-tune/train on your own annotations if needed.
+
+### The model confuses insects/noise with bats
+
+This can happen, especially when recording conditions differ from training conditions.
+Threshold tuning and training with local annotations can improve results.
+
+See {doc}`how_to/tune-detection-threshold`.
+
+### The model struggles with feeding buzzes or social calls
+
+This is a known limitation of available training data in some settings.
+If you have high-quality annotated examples, they are valuable for improving models.
+
+### Calls in the same sequence are predicted as different species
+
+Currently we do not do any sophisticated post processing on the results output by the model.
+We return a probability associated with each species for each call.
+You can use these predictions to clean up the noisy predictions for sequences of calls.
+
+### Can I trust model outputs for biodiversity conclusions?
+
+The models developed and shared as part of this repository should be used with caution.
+While they have been evaluated on held out audio data, great care should be taken when using the model outputs for any form of biodiversity assessment.
+Your data may differ, and as a result it is very strongly recommended that you validate the model first using data with known species to ensure that the outputs can be trusted.
+
+### The pipeline is slow
+
+Runtime depends on hardware and recording duration.
+GPU inference is often much faster than CPU.
+
+## Training and scope
+
+### Can I train on my own species set?
+
+Yes.
+You can train/fine-tune with your own annotated data and species labels.
+
+### Does this work on frequency-division or zero-crossing recordings?
+
+Not directly.
+The workflow assumes audio can be converted to spectrograms from the raw waveform.
+
+### Can this be used for non-bat bioacoustics (for example insects or birds)?
+
+Potentially yes, but expect retraining and configuration changes.
+Open an issue if you want guidance for a specific use case.
+
+## Usage and licensing
+
+### Can I use this for commercial purposes?
+
+No.
+This project is currently for non-commercial use.
+See the repository license for details.
--- a/docs/source/getting_started.md
+++ b/docs/source/getting_started.md
@ -0,0 +1,91 @@
+# Getting started
+
+BatDetect2 can be used in two ways: through the `batdetect2` command line interface (CLI), or as the `batdetect2` Python package.
+The CLI route does not require coding.
+You run commands in the terminal and, in some cases, write configuration files.
+The Python route gives you more flexibility and lets you integrate the model into your own workflows or experiments.
+For most common use cases, both routes give you the same results.
+
+## Try it out
+
+If you want to try BatDetect2 before installing anything locally:
+
+- [Hugging Face demo (UK species)](https://huggingface.co/spaces/macaodha/batdetect2)
+- [Google Colab notebook](https://colab.research.google.com/github/macaodha/batdetect2/blob/master/batdetect2_notebook.ipynb)
+
+## Installation
+
+To use `batdetect2` on your machine, you need to install it first.
+We recommend using `uv` for that.
+`uv` is a tool that helps manage Python software cleanly, without mixing it into the rest of your machine.
+Install `uv` first by following the [installation instructions](https://docs.astral.sh/uv/getting-started/installation/).
+
+### One-off usage
+
+If you are not ready to install `batdetect2` permanently, you can try it with:
+
+```bash
+uvx batdetect2
+```
+
+This still downloads the code and dependencies and runs them on your machine, but the environment is temporary.
+
+### Install the CLI
+
+If you want the `batdetect2` CLI to always be available in your terminal, run:
+
+```bash
+uv tool install batdetect2
+```
+
+If you need to upgrade later:
+
+```bash
+uv tool upgrade batdetect2
+```
+
+Verify the CLI is available:
+
+```bash
+batdetect2
+```
+
+You can then run your first workflow.
+See {doc}`tutorials/run-inference-on-folder` for more details.
+
+### Add it to your Python project
+
+If you are using BatDetect2 from Python code and already manage your projects with `uv`, you can add it with:
+
+```bash
+uv add batdetect2
+```
+
+If you want to upgrade it later:
+
+```bash
+uv add -U batdetect2
+```
+
+#### Alternative with `pip`
+
+If you prefer `pip`, you can use:
+
+```bash
+pip install batdetect2
+```
+
+It is a good idea to create a separate virtual environment first so this does not interfere with other Python environments.
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+```
+
+## What's next
+
+- Run your first workflow on a folder of recordings: {doc}`tutorials/run-inference-on-folder`
+- If you write code and want the Python route: {doc}`tutorials/integrate-with-a-python-pipeline`
+- For common practical tasks, go to {doc}`how_to/index`
+- For detailed command help, go to {doc}`reference/cli/index`
+- To understand the model and its outputs, go to {doc}`explanation/index`
--- a/docs/source/how_to/choose-a-model.md
+++ b/docs/source/how_to/choose-a-model.md
@ -0,0 +1,112 @@
+# How to choose a model
+
+Use this guide when you want to choose which model checkpoint BatDetect2 loads.
+
+You can choose a model in both the CLI and the Python API.
+
+## Where you can choose the model
+
+In the CLI, use `--model` with commands that load a checkpoint, including:
+
+- `batdetect2 process`
+- `batdetect2 evaluate`
+- `batdetect2 train`
+- `batdetect2 finetune`
+
+In Python, pass the model source to `BatDetect2API.from_checkpoint(...)`.
+
+If you do not choose a model, BatDetect2 uses the built-in default UK model.
+
+## Use a local checkpoint path
+
+Use a local path when you already have a checkpoint file on disk.
+
+CLI example:
+
+```bash
+batdetect2 process directory \
+    path/to/audio \
+    path/to/outputs \
+    --model path/to/model.ckpt
+```
+
+Python example:
+
+```python
+from batdetect2.api_v2 import BatDetect2API
+
+api = BatDetect2API.from_checkpoint("path/to/model.ckpt")
+```
+
+## Use a bundled checkpoint alias
+
+BatDetect2 also supports bundled checkpoint aliases.
+
+The built-in UK model is available as `uk_same`.
+The alias `batdetect2_uk_same` also works.
+
+CLI example:
+
+```bash
+batdetect2 process directory \
+    path/to/audio \
+    path/to/outputs \
+    --model uk_same
+```
+
+Python example:
+
+```python
+from batdetect2.api_v2 import BatDetect2API
+
+api = BatDetect2API.from_checkpoint("uk_same")
+```
+
+## Use a Hugging Face URI
+
+You can also load a checkpoint from Hugging Face with a URI like:
+
+```text
+hf://owner/repo/path/to/model.ckpt
+```
+
+This needs the optional Hugging Face dependency to be installed.
+For example, install it with `pip install batdetect2[huggingface]`.
+
+CLI example:
+
+```bash
+batdetect2 process directory \
+    path/to/audio \
+    path/to/outputs \
+    --model hf://owner/repo/path/to/model.ckpt
+```
+
+Python example:
+
+```python
+from batdetect2.api_v2 import BatDetect2API
+
+api = BatDetect2API.from_checkpoint(
+    "hf://owner/repo/path/to/model.ckpt"
+)
+```
+
+## Choose the right source
+
+- Use a local path when you already have a checkpoint file.
+- Use an alias when you want one of the bundled models.
+- Use a Hugging Face URI when the checkpoint lives in a Hugging Face repo.
+
+## Related pages
+
+- Run inference on a folder:
+  {doc}`../tutorials/run-inference-on-folder`
+- `BatDetect2API` reference:
+  {doc}`../reference/api`
+- Process command reference:
+  {doc}`../reference/cli/predict`
+- Train a custom model:
+  {doc}`../tutorials/train-a-custom-model`
+- Fine-tune from a checkpoint:
+  {doc}`fine-tune-from-a-checkpoint`
--- a/docs/source/how_to/choose-an-inference-input-mode.md
+++ b/docs/source/how_to/choose-an-inference-input-mode.md
@ -0,0 +1,71 @@
+# How to choose an inference input mode
+
+Use this guide to decide whether `process directory`, `process file_list`, or
+`process dataset` is the right entry point for your run.
+
+## Use `process directory` when the recordings already live together
+
+This is the simplest choice.
+
+Use it when:
+
+- your recordings are already organized in one directory tree,
+- you want BatDetect2 to discover audio files for you,
+- you are doing a first pass over a folder of recordings.
+
+```bash
+batdetect2 process directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs
+```
+
+## Use `process file_list` when you need explicit control over the file set
+
+Use it when:
+
+- you want to run only a selected subset,
+- your files are spread across directories,
+- another tool has already produced the exact list of recordings to process.
+
+The list file should contain one path per line.
+
+```bash
+batdetect2 process file_list \
+  path/to/model.ckpt \
+  path/to/audio_files.txt \
+  path/to/outputs
+```
+
+## Use `process dataset` when your workflow is already annotation-set driven
+
+Use it when:
+
+- your project already has a `soundevent` annotation set,
+- you want prediction runs aligned with that annotation metadata,
+- you want BatDetect2 to resolve recording paths from the annotation set.
+
+```bash
+batdetect2 process dataset \
+  path/to/model.ckpt \
+  path/to/annotation_set.json \
+  path/to/outputs
+```
+
+The dataset command reads a `soundevent` annotation set and extracts unique
+recording paths before inference.
+
+## Rule of thumb
+
+- Start with `directory` for the easiest first run.
+- Use `file_list` when selection matters.
+- Use `dataset` when the rest of your workflow is already dataset-based.
+
+## Related pages
+
+- Run batch predictions:
+  {doc}`run-batch-predictions`
+- Tune inference clipping:
+  {doc}`tune-inference-clipping`
+- Process command reference:
+  {doc}`../reference/cli/predict`
--- a/docs/source/how_to/choose-and-configure-evaluation-tasks.md
+++ b/docs/source/how_to/choose-and-configure-evaluation-tasks.md
@ -0,0 +1,74 @@
+# How to choose and configure evaluation tasks
+
+Use this guide when the default evaluation tasks do not match the question you
+want to answer.
+
+## Know the default first
+
+By default, BatDetect2 evaluation starts with:
+
+- sound event detection,
+- sound event classification.
+
+Those are good defaults for many projects, but not for all of them.
+
+## Choose the task that matches the question
+
+Common built-in task families include:
+
+- `sound_event_detection`
+- `sound_event_classification`
+- `top_class_detection`
+- `clip_detection`
+- `clip_classification`
+
+Choose based on the question you care about.
+
+- Use sound-event tasks when you care about individual call events.
+- Use clip tasks when you care about clip-level presence or clip-level class
+  evidence.
+- Use top-class detection when you want matching based on the highest-scoring
+  class per detection.
+
+## Configure tasks in `EvaluationConfig`
+
+Example:
+
+```yaml
+tasks:
+  - name: sound_event_detection
+    prefix: detection
+    affinity_threshold: 0.0
+    strict_match: true
+  - name: clip_classification
+    prefix: clip_classification
+```
+
+Pass the config with:
+
+```bash
+batdetect2 evaluate \
+  path/to/test_dataset.yaml \
+  --model path/to/model.ckpt \
+  --base-dir path/to/project_root \
+  --evaluation-config path/to/evaluation.yaml
+```
+
+Include `--base-dir` when the dataset config resolves recordings through
+relative paths.
+
+## Change one thing at a time
+
+When comparing models or settings, avoid changing task definitions, thresholds,
+matching behavior, and datasets all at once.
+
+Otherwise it becomes hard to explain why the metric changed.
+
+## Related pages
+
+- Evaluation tutorial:
+  {doc}`../tutorials/evaluate-on-a-test-set`
+- Evaluation config reference:
+  {doc}`../reference/evaluation-config`
+- Evaluation concepts:
+  {doc}`../explanation/evaluation-concepts-and-matching`
--- a/docs/source/how_to/configure-aoef-dataset.md
+++ b/docs/source/how_to/configure-aoef-dataset.md
@ -0,0 +1,53 @@
+# How to configure an AOEF dataset source
+
+Use this guide when your annotations are stored in AOEF/soundevent JSON files,
+including exports from Whombat.
+
+## 1) Add an AOEF source entry
+
+In your dataset config, add a source with `format: aoef`.
+
+```yaml
+sources:
+  - name: my_aoef_source
+    format: aoef
+    audio_dir: /path/to/audio
+    annotations_path: /path/to/annotations.soundevent.json
+```
+
+## 2) Choose filtering behavior for annotation projects
+
+If `annotations_path` is an `AnnotationProject`, you can filter by task state.
+
+```yaml
+sources:
+  - name: whombat_verified
+    format: aoef
+    audio_dir: /path/to/audio
+    annotations_path: /path/to/project_export.aoef
+    filter:
+      only_completed: true
+      only_verified: true
+      exclude_issues: true
+```
+
+If you omit `filter`, default project filtering is applied.
+
+To disable filtering for project files:
+
+```yaml
+filter: null
+```
+
+## 3) Check that the source loads
+
+Run a summary on your dataset config:
+
+```bash
+batdetect2 data summary path/to/dataset.yaml
+```
+
+## 4) Continue to training or evaluation
+
+- For training: {doc}`../tutorials/train-a-custom-model`
+- For field-level reference: {doc}`../reference/data-sources`
--- a/docs/source/how_to/configure-audio-preprocessing.md
+++ b/docs/source/how_to/configure-audio-preprocessing.md
@ -0,0 +1,66 @@
+# How to configure audio preprocessing
+
+Use this guide to set sample-rate and waveform-level preprocessing behaviour.
+
+## 1) Set audio loader settings
+
+The audio loader config controls resampling.
+
+```yaml
+samplerate: 256000
+resample:
+  enabled: true
+  method: poly
+```
+
+If your recordings are already at the expected sample rate, you can disable
+resampling.
+
+```yaml
+samplerate: 256000
+resample:
+  enabled: false
+```
+
+## 2) Set waveform transforms in preprocessing config
+
+Waveform transforms are configured in `preprocess.audio_transforms`.
+
+```yaml
+preprocess:
+  audio_transforms:
+    - name: center_audio
+    - name: scale_audio
+    - name: fix_duration
+      duration: 0.5
+```
+
+Available built-ins:
+
+- `center_audio`
+- `scale_audio`
+- `fix_duration`
+
+## 3) Use the config in your workflow
+
+For CLI inference/evaluation, use `--audio-config`.
+
+```bash
+batdetect2 process directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs \
+  --audio-config path/to/audio.yaml
+```
+
+## 4) Verify quickly on a small subset
+
+Run on a small folder first and confirm that outputs and runtime are as expected
+before full-batch runs.
+
+## Related pages
+
+- Spectrogram settings:
+  {doc}`configure-spectrogram-preprocessing`
+- Preprocessing config reference:
+  {doc}`../reference/preprocessing-config`
--- a/docs/source/how_to/configure-roi-mapping.md
+++ b/docs/source/how_to/configure-roi-mapping.md
@ -0,0 +1,57 @@
+# How to configure ROI mapping
+
+Use this guide to control how annotation geometry is encoded into training
+targets and decoded back into boxes.
+
+## 1) Set the default ROI mapper
+
+The default mapper is `anchor_bbox`.
+
+```yaml
+roi:
+  default:
+    name: anchor_bbox
+    anchor: bottom-left
+    time_scale: 1000.0
+    frequency_scale: 0.001163
+```
+
+## 2) Choose an anchor strategy
+
+Typical options include `bottom-left` and `center`.
+
+- `bottom-left` is the current default.
+- `center` can be easier to reason about in some workflows.
+
+## 3) Set scale factors intentionally
+
+- `time_scale` controls width scaling.
+- `frequency_scale` controls height scaling.
+
+Use values that are consistent with your model setup and keep them fixed when
+comparing experiments.
+
+## 4) (Optional) override ROI mapping for specific classes
+
+Add class-specific mappers under `roi.overrides`.
+
+```yaml
+roi:
+  default:
+    name: anchor_bbox
+    anchor: bottom-left
+    time_scale: 1000.0
+    frequency_scale: 0.001163
+  overrides:
+    species_x:
+      name: anchor_bbox
+      anchor: center
+      time_scale: 1000.0
+      frequency_scale: 0.001163
+```
+
+## Related pages
+
+- Target definitions: {doc}`configure-target-definitions`
+- Class definitions: {doc}`define-target-classes`
+- Target encoding overview: {doc}`../explanation/target-encoding-and-decoding`
--- a/docs/source/how_to/configure-spectrogram-preprocessing.md
+++ b/docs/source/how_to/configure-spectrogram-preprocessing.md
@ -0,0 +1,59 @@
+# How to configure spectrogram preprocessing
+
+Use this guide to set STFT, frequency range, and spectrogram transforms.
+
+## 1) Configure STFT and frequency range
+
+```yaml
+preprocess:
+  stft:
+    window_duration: 0.002
+    window_overlap: 0.75
+    window_fn: hann
+  frequencies:
+    min_freq: 10000
+    max_freq: 120000
+```
+
+## 2) Configure spectrogram transforms
+
+`spectrogram_transforms` are applied in order.
+
+```yaml
+preprocess:
+  spectrogram_transforms:
+    - name: pcen
+      time_constant: 0.4
+      gain: 0.98
+      bias: 2.0
+      power: 0.5
+    - name: spectral_mean_subtraction
+    - name: scale_amplitude
+      scale: db
+```
+
+Common built-ins:
+
+- `pcen`
+- `spectral_mean_subtraction`
+- `scale_amplitude` (`db` or `power`)
+- `peak_normalize`
+
+## 3) Configure output size
+
+```yaml
+preprocess:
+  size:
+    height: 128
+    resize_factor: 0.5
+```
+
+## 4) Keep train and inference settings aligned
+
+Use the same preprocessing setup for training and prediction whenever possible.
+Large mismatches can degrade model performance.
+
+## Related pages
+
+- Why consistency matters: {doc}`../explanation/preprocessing-consistency`
+- Preprocessing config reference: {doc}`../reference/preprocessing-config`
--- a/docs/source/how_to/configure-target-definitions.md
+++ b/docs/source/how_to/configure-target-definitions.md
@ -0,0 +1,58 @@
+# How to configure target definitions
+
+Use this guide to define which annotated sound events are considered valid
+detection targets.
+
+## 1) Start from a targets config file
+
+```yaml
+detection_target:
+  name: bat
+  match_if:
+    name: has_tag
+    tag:
+      key: call_type
+      value: Echolocation
+  assign_tags:
+    - key: call_type
+      value: Echolocation
+    - key: order
+      value: Chiroptera
+```
+
+`match_if` decides whether an annotation is included in the detection target.
+
+## 2) Use condition combinators when needed
+
+You can combine conditions with `all_of`, `any_of`, and `not`.
+
+```yaml
+detection_target:
+  name: bat
+  match_if:
+    name: all_of
+    conditions:
+      - name: has_tag
+        tag:
+          key: call_type
+          value: Echolocation
+      - name: not
+        condition:
+          name: has_any_tag
+          tags:
+            - key: call_type
+              value: Social
+            - key: class
+              value: Not Bat
+```
+
+## 3) Verify with a small sample first
+
+Before full training, inspect a small annotation subset and confirm that the
+selection logic keeps the events you expect.
+
+## Related pages
+
+- Class mapping: {doc}`define-target-classes`
+- ROI mapping: {doc}`configure-roi-mapping`
+- Targets reference: {doc}`../reference/targets-config-workflow`
--- a/docs/source/how_to/define-target-classes.md
+++ b/docs/source/how_to/define-target-classes.md
@ -0,0 +1,59 @@
+# How to define target classes
+
+Use this guide to map annotations to classification labels used during
+training.
+
+## 1) Add classification target entries
+
+Each entry defines a class name and matching tags.
+
+```yaml
+classification_targets:
+  - name: pippip
+    tags:
+      - key: class
+        value: Pipistrellus pipistrellus
+  - name: pippyg
+    tags:
+      - key: class
+        value: Pipistrellus pygmaeus
+```
+
+## 2) Use `assign_tags` to control decoded output tags
+
+If you want prediction output tags to differ from matching tags, set
+`assign_tags` explicitly.
+
+```yaml
+classification_targets:
+  - name: pipistrelle_group
+    tags:
+      - key: class
+        value: Pipistrellus pipistrellus
+    assign_tags:
+      - key: genus
+        value: Pipistrellus
+```
+
+## 3) Use `match_if` for complex class rules
+
+For advanced conditions, use `match_if` instead of `tags`.
+
+```yaml
+classification_targets:
+  - name: long_call
+    match_if:
+      name: duration
+      operator: gt
+      seconds: 0.02
+```
+
+## 4) Confirm class names are unique
+
+`classification_targets.name` values must be unique.
+
+## Related pages
+
+- Detection-target filtering: {doc}`configure-target-definitions`
+- ROI mapping: {doc}`configure-roi-mapping`
+- Targets config reference: {doc}`../reference/targets-config-workflow`
--- a/docs/source/how_to/fine-tune-from-a-checkpoint.md
+++ b/docs/source/how_to/fine-tune-from-a-checkpoint.md
@ -0,0 +1,45 @@
+# How to fine-tune from a checkpoint
+
+Use this guide when you want to continue from an existing checkpoint instead of training a fresh model config.
+
+## Use `--model` for checkpoint-based training
+
+Pass a checkpoint with `--model`.
+
+Do not combine `--model` with `--model-config`.
+
+```bash
+batdetect2 train \
+  path/to/train_dataset.yaml \
+  --val-dataset path/to/val_dataset.yaml \
+  --model path/to/model.ckpt \
+  --training-config path/to/training.yaml
+```
+
+## Keep targets and preprocessing aligned
+
+If you override targets or audio-related settings while fine-tuning, validate that they still match the checkpoint and your dataset.
+
+Mismatches here can produce confusing failures or invalid comparisons.
+
+## Decide what question the fine-tune should answer
+
+Common fine-tuning goals are:
+
+- adapting to local recording conditions,
+- adapting to a new label set,
+- improving performance on a narrower deployment context.
+
+Make that goal explicit before comparing results.
+
+## Evaluate after fine-tuning
+
+Always compare the fine-tuned checkpoint against a held-out dataset.
+
+Use the same evaluation setup when comparing before and after.
+
+## Related pages
+
+- Training tutorial: {doc}`../tutorials/train-a-custom-model`
+- Evaluate a test set: {doc}`../tutorials/evaluate-on-a-test-set`
+- Train command reference: {doc}`../reference/cli/train`
--- a/docs/source/how_to/import-legacy-batdetect2-annotations.md
+++ b/docs/source/how_to/import-legacy-batdetect2-annotations.md
@ -0,0 +1,66 @@
+# How to import legacy batdetect2 annotations
+
+Use this guide if your annotations are in older batdetect2 JSON formats.
+
+Two legacy formats are supported:
+
+- `batdetect2`: one annotation JSON file per recording
+- `batdetect2_file`: one merged JSON file for many recordings
+
+## 1) Choose the correct source format
+
+Directory-based annotations (`format: batdetect2`):
+
+```yaml
+sources:
+  - name: legacy_per_file
+    format: batdetect2
+    audio_dir: /path/to/audio
+    annotations_dir: /path/to/annotation_json_dir
+```
+
+Merged annotation file (`format: batdetect2_file`):
+
+```yaml
+sources:
+  - name: legacy_merged
+    format: batdetect2_file
+    audio_dir: /path/to/audio
+    annotations_path: /path/to/merged_annotations.json
+```
+
+## 2) Set optional legacy filters
+
+Legacy filters are based on `annotated` and `issues` flags.
+
+```yaml
+filter:
+  only_annotated: true
+  exclude_issues: true
+```
+
+To load all entries regardless of flags:
+
+```yaml
+filter: null
+```
+
+## 3) Validate and convert if needed
+
+Check loaded records:
+
+```bash
+batdetect2 data summary path/to/dataset.yaml
+```
+
+Convert to annotation-set output for downstream tooling:
+
+```bash
+batdetect2 data convert path/to/dataset.yaml --output path/to/output.json
+```
+
+## 4) Continue with current workflows
+
+- Run predictions: {doc}`run-batch-predictions`
+- Train on imported data: {doc}`../tutorials/train-a-custom-model`
+- Field-level reference: {doc}`../reference/data-sources`
--- a/docs/source/how_to/index.md
+++ b/docs/source/how_to/index.md
@ -0,0 +1,30 @@
+# How-to Guides
+
+How-to guides help you answer practical questions once you are past the first
+tutorial.
+
+Use this section when you already know the basic workflow and want help with one
+specific task.
+
+```{toctree}
+:maxdepth: 1
+
+choose-a-model
+choose-an-inference-input-mode
+run-batch-predictions
+tune-inference-clipping
+tune-detection-threshold
+inspect-class-scores-in-python
+inspect-detection-features-in-python
+save-predictions-in-different-output-formats
+fine-tune-from-a-checkpoint
+choose-and-configure-evaluation-tasks
+interpret-evaluation-outputs
+configure-aoef-dataset
+import-legacy-batdetect2-annotations
+configure-audio-preprocessing
+configure-spectrogram-preprocessing
+configure-target-definitions
+define-target-classes
+configure-roi-mapping
+```
--- a/docs/source/how_to/inspect-class-scores-in-python.md
+++ b/docs/source/how_to/inspect-class-scores-in-python.md
@ -0,0 +1,44 @@
+# How to inspect class scores in Python
+
+Use this guide when you need more than the top class label for each detection.
+
+## Get the ranked class scores
+
+`BatDetect2API.get_class_scores` returns `(class_name, score)` pairs for one detection.
+
+```python
+from pathlib import Path
+
+from batdetect2.api_v2 import BatDetect2API
+
+api = BatDetect2API.from_checkpoint(Path("path/to/model.ckpt"))
+prediction = api.process_file(Path("path/to/audio.wav"))
+
+for detection in prediction.detections:
+    print("detection score:", detection.detection_score)
+    for class_name, score in api.get_class_scores(detection):
+        print(class_name, score)
+```
+
+## Separate detection confidence from class ranking
+
+Keep these two ideas separate:
+
+- `detection_score` tells you how strongly the model kept the event as a detection,
+- `class_scores` tell you how the model ranked classes for that detected event.
+
+A detection can have a reasonable detection score while still having uncertain class ranking.
+
+## Hide the top class if needed
+
+If you want to inspect only the alternatives, pass `include_top_class=False`.
+
+```python
+api.get_class_scores(detection, include_top_class=False)
+```
+
+## Related pages
+
+- Python tutorial: {doc}`../tutorials/integrate-with-a-python-pipeline`
+- API reference: {doc}`../reference/api`
+- Understanding scores: {doc}`../explanation/what-batdetect2-predicts`
--- a/docs/source/how_to/inspect-detection-features-in-python.md
+++ b/docs/source/how_to/inspect-detection-features-in-python.md
@ -0,0 +1,49 @@
+# How to inspect detection features in Python
+
+Use this guide when you want the per-detection feature vectors exposed by the current API.
+
+## Get the feature vector for one detection
+
+Each detection carries a `features` vector.
+
+The API exposes it through `get_detection_features`.
+
+```python
+from pathlib import Path
+
+from batdetect2.api_v2 import BatDetect2API
+
+api = BatDetect2API.from_checkpoint(Path("path/to/model.ckpt"))
+prediction = api.process_file(Path("path/to/audio.wav"))
+
+for detection in prediction.detections:
+    features = api.get_detection_features(detection)
+    print(features.shape)
+```
+
+## Use features for exploration, not as ground truth labels
+
+These features are internal model representations attached to detections.
+
+They can be useful for:
+
+- exploratory visualization,
+- downstream clustering,
+- comparison across detections,
+- building extra analysis pipelines.
+
+They do not replace validation.
+
+They also do not automatically have a one-to-one interpretation as ecological variables.
+
+## Save predictions with features included
+
+If you need features on disk, use an output format that supports them, such as `raw` or `parquet`, and keep feature inclusion enabled.
+
+See {doc}`save-predictions-in-different-output-formats`.
+
+## Related pages
+
+- Understanding features and embeddings: {doc}`../explanation/extracted-features-and-embeddings`
+- Output formats reference: {doc}`../reference/output-formats`
+- API reference: {doc}`../reference/api`
--- a/docs/source/how_to/interpret-evaluation-outputs.md
+++ b/docs/source/how_to/interpret-evaluation-outputs.md
@ -0,0 +1,41 @@
+# How to interpret evaluation outputs
+
+Use this guide after `batdetect2 evaluate` has written metrics and plots to disk.
+
+## Start by identifying the task
+
+Do not interpret a metric until you know which evaluation task produced it.
+
+For example, a detection score and a clip-classification score answer different questions.
+
+## Read the output directory as a bundle
+
+Treat the evaluation output directory as one package:
+
+- metrics,
+- plots,
+- saved predictions,
+- config context.
+
+Do not lift a single number out of context and treat it as the whole story.
+
+## Look for failure patterns, not just overall averages
+
+Check:
+
+- whether errors concentrate in certain taxa,
+- whether specific sites or recorder setups behave differently,
+- whether threshold choices are driving the result,
+- whether predictions are near clip boundaries or matching thresholds.
+
+## Keep validation and deployment questions separate
+
+A model can look good on one task and still be a poor fit for your deployment question.
+
+Interpret the outputs in relation to the real use case, not only the easiest metric to report.
+
+## Related pages
+
+- Evaluation tutorial: {doc}`../tutorials/evaluate-on-a-test-set`
+- Evaluation concepts: {doc}`../explanation/evaluation-concepts-and-matching`
+- Model output and validation: {doc}`../explanation/model-output-and-validation`
--- a/docs/source/how_to/run-batch-predictions.md
+++ b/docs/source/how_to/run-batch-predictions.md
@ -0,0 +1,62 @@
+# How to run batch processing
+
+This guide shows practical command patterns for directory-based and file-list
+processing runs.
+
+Use it after you already know which input mode you want and need concrete
+command templates for a repeatable batch run.
+
+## Process a directory
+
+```bash
+batdetect2 process directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs
+```
+
+Use this when BatDetect2 should discover the audio files for you.
+
+## Process a file list
+
+```bash
+batdetect2 process file_list \
+  path/to/model.ckpt \
+  path/to/audio_files.txt \
+  path/to/outputs
+```
+
+Use this when another part of your workflow already produced the exact recording
+list to process.
+
+## Process a dataset config
+
+```bash
+batdetect2 process dataset \
+  path/to/model.ckpt \
+  path/to/annotation_set.json \
+  path/to/outputs
+```
+
+Use this when your project already has a `soundevent` annotation set and you
+want to extract unique recording paths from it.
+
+## Useful options
+
+- `--batch-size` to control throughput.
+- `--workers` to set data-loading parallelism.
+- `--format` to select output format.
+- `--inference-config` to control clipping and loader behavior.
+- `--outputs-config` to control serialization and output transforms.
+- `--detection-threshold` to override the detection threshold for a run.
+
+## Practical workflow
+
+For large runs:
+
+1. test the command on a small reviewed subset,
+2. lock the config files and command shape,
+3. write outputs to a dedicated directory per run,
+4. record the checkpoint, config paths, and thresholds used.
+
+For complete option details, see {doc}`../reference/cli/predict`.
--- a/docs/source/how_to/save-predictions-in-different-output-formats.md
+++ b/docs/source/how_to/save-predictions-in-different-output-formats.md
@ -0,0 +1,95 @@
+# How to save predictions in different output formats
+
+Use this guide when you need BatDetect2 outputs in a specific representation for
+downstream tools.
+
+## Choose the format that matches the job
+
+Current built-in output formats include:
+
+- `raw`:
+  one NetCDF file per clip, best for rich structured outputs,
+- `parquet`:
+  tabular storage for data analysis workflows,
+- `soundevent`:
+  prediction-set JSON for soundevent-style tooling,
+- `batdetect2`:
+  legacy-compatible per-recording JSON and CSV outputs.
+
+## Select a format from the CLI
+
+Use `--format` for quick experiments.
+
+```bash
+batdetect2 process directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs \
+  --format parquet
+```
+
+## Use an outputs config for repeatable runs
+
+Use an outputs config when you want reproducible control over format and
+transforms.
+
+Example:
+
+```yaml
+format:
+  name: raw
+  include_class_scores: true
+  include_features: true
+  include_geometry: true
+transform:
+  detection_transforms: []
+  clip_transforms: []
+```
+
+Run with:
+
+```bash
+batdetect2 process directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs \
+  --outputs-config path/to/outputs.yaml
+```
+
+## Pick the simplest useful format
+
+- Use `raw` if you want the richest output surface and easy round-tripping.
+- Use `parquet` if you want tabular analysis in Python or data-lake workflows.
+- Use `soundevent` if you want prediction-set JSON.
+- Use `batdetect2` when you need legacy BatDetect2-style outputs.
+
+## Enable legacy CNN feature CSVs
+
+The `batdetect2` formatter can also write the legacy CNN feature sidecar CSVs.
+This is controlled through the outputs config.
+
+Example:
+
+```yaml
+format:
+  name: batdetect2
+  write_cnn_features_csv: true
+transform:
+  detection_transforms: []
+  clip_transforms: []
+```
+
+When enabled, BatDetect2 writes:
+
+- one `.json` file per recording,
+- one detection `.csv` file per recording,
+- one `_cnn_features.csv` file per recording when detections are present.
+
+## Related pages
+
+- Outputs config reference:
+  {doc}`../reference/outputs-config`
+- Output formats reference:
+  {doc}`../reference/output-formats`
+- Output transforms reference:
+  {doc}`../reference/output-transforms`
--- a/docs/source/how_to/tune-detection-threshold.md
+++ b/docs/source/how_to/tune-detection-threshold.md
@ -0,0 +1,51 @@
+# How to tune detection threshold
+
+Use this guide to compare detection outputs at different threshold values.
+
+The goal is not to find a universal threshold.
+
+The goal is to choose a threshold that fits your reviewed local data and the
+project trade-off between missed calls and false positives.
+
+## 1) Start with a baseline run
+
+Run an initial prediction workflow and keep outputs in a dedicated folder.
+
+## 2) Sweep threshold values
+
+Run `process` multiple times with different thresholds (for example `0.1`,
+`0.3`, `0.5`) and compare output counts and quality on the same validation
+subset.
+
+```bash
+batdetect2 process directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs_thr_03 \
+  --detection-threshold 0.3
+```
+
+Keep each threshold run in a separate output directory.
+
+That makes it easier to compare counts and inspect example files without mixing
+results.
+
+## 3) Validate against known calls
+
+Use files with trusted annotations or expert review to select a threshold that
+fits your project goals.
+
+Check both:
+
+- obvious false positives,
+- obvious missed calls.
+
+If class interpretation matters downstream, inspect class ranking behavior as
+well, not just detection counts.
+
+## 4) Record your chosen setting
+
+Write down the chosen threshold and rationale so analyses are reproducible.
+
+For conceptual trade-offs, see
+{doc}`../explanation/model-output-and-validation`.
--- a/docs/source/how_to/tune-inference-clipping.md
+++ b/docs/source/how_to/tune-inference-clipping.md
@ -0,0 +1,73 @@
+# How to tune inference clipping
+
+Use this guide when long recordings need to be split into smaller clips during
+inference.
+
+## What clipping controls
+
+`InferenceConfig.clipping` controls how recordings are split before batching.
+
+Key fields are:
+
+- `duration`:
+  clip duration in seconds,
+- `overlap`:
+  overlap between adjacent clips,
+- `max_empty`:
+  how much empty padding is allowed,
+- `discard_empty`:
+  whether empty clips are dropped.
+
+## Start from the defaults
+
+Use the built-in clipping behavior first unless you already know you need
+something else.
+
+Only tune clipping when:
+
+- recordings are much longer than your normal working set,
+- you are seeing edge effects around calls,
+- you need tighter control over throughput or padding behavior.
+
+## Override clipping with an inference config
+
+Create an inference config file and pass it to `process` or `evaluate`.
+
+Example:
+
+```yaml
+clipping:
+  enabled: true
+  duration: 0.5
+  overlap: 0.1
+  max_empty: 0.0
+  discard_empty: true
+loader:
+  batch_size: 8
+```
+
+Run with:
+
+```bash
+batdetect2 process directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs \
+  --inference-config path/to/inference.yaml
+```
+
+## Validate clipping changes on a small reviewed subset
+
+Changing clipping changes what the model sees per batch and can change how
+events near clip boundaries behave.
+
+Check a reviewed subset before applying clipping changes to a full project.
+
+## Related pages
+
+- Inference config reference:
+  {doc}`../reference/inference-config`
+- Run batch predictions:
+  {doc}`run-batch-predictions`
+- Understanding the pipeline:
+  {doc}`../explanation/pipeline-overview`
--- a/docs/source/index.md
+++ b/docs/source/index.md
@ -0,0 +1,114 @@
+# Home
+
+Welcome to the BatDetect2 documentation.
+
+## What is BatDetect2?
+
+`batdetect2` is a deep learning model and software package for detecting and
+classifying bat echolocation calls in high-frequency audio recordings.
+
+You can use it from the command line or from Python, depending on how much
+control you need.
+
+In practice, BatDetect2 scans a recording, finds sounds that look like bat
+calls, and returns one result for each detected call.
+Each result can include where the call appears in the recording, shown as a box
+with start and end time and the lowest and highest frequency, how confident the
+model is that it found a call, and how strongly it matches the available
+classes.
+
+The built-in default model is trained for 17 UK species.
+The package also supports custom training, fine-tuning, evaluation, and more
+advanced workflows from Python.
+
+For more detail on the underlying approach, see the pre-print:
+[Towards a General Approach for Bat Echolocation Detection and Classification](https://www.biorxiv.org/content/10.1101/2022.12.14.520490v1)
+
+```{warning}
+Treat outputs as model predictions, not ground truth.
+Always validate on reviewed local data before using results for ecological inference.
+```
+
+## What can I do with it?
+
+- I want to run the model on my recordings:
+  {doc}`tutorials/run-inference-on-folder`
+- I write code and want to use it from Python:
+  {doc}`tutorials/integrate-with-a-python-pipeline`
+- I want to train or fine-tune a custom model:
+  {doc}`tutorials/train-a-custom-model`
+- I want to evaluate a trained model on held-out data:
+  {doc}`tutorials/evaluate-on-a-test-set`
+
+```{note}
+Looking for the previous BatDetect2 workflow?
+See {doc}`legacy/index`.
+The legacy docs are still available, but new workflows should use `batdetect2 process` and `BatDetect2API`.
+```
+
+## How to use this site
+
+Start with {doc}`getting_started` if you are new.
+
+Then choose the section that matches what you need.
+
+If you are here mainly to run the model on recordings, start with Tutorials.
+
+| Section       | Best for                                      | Start here               |
+| ------------- | --------------------------------------------- | ------------------------ |
+| Tutorials     | Step-by-step routes for the most common tasks | {doc}`tutorials/index`   |
+| How-to guides | Answers to specific practical questions       | {doc}`how_to/index`      |
+| Reference     | Detailed command and settings help            | {doc}`reference/index`   |
+| Understanding | Concepts, interpretation, and trade-offs      | {doc}`explanation/index` |
+| Legacy        | Previous workflow and migration guidance      | {doc}`legacy/index`      |
+
+## Get in touch
+
+- GitHub repository:
+  [macaodha/batdetect2](https://github.com/macaodha/batdetect2)
+- Questions, bug reports, and feature requests:
+  [GitHub Issues](https://github.com/macaodha/batdetect2/issues)
+- Common questions:
+  {doc}`faq`
+- Want to contribute?
+  See {doc}`development/index`
+
+## Cite this work
+
+If you use BatDetect2 in research, please cite:
+
+Mac Aodha, O., Martinez Balvanera, S., Damstra, E., et al.
+(2022).
+_Towards a General Approach for Bat Echolocation Detection and Classification_.
+bioRxiv.
+
+or the bibtex entry
+
+```bibtex
+@article{batdetect2_2022,
+  title         = {Towards a General Approach for Bat Echolocation Detection and Classification},
+  author        = {Mac Aodha, Oisin and Mart\'{i}nez Balvanera, Santiago and Damstra, Elise and Cooke, Martyn and Eichinski, Philip and Browning, Ella and Barataudm, Michel and Boughey, Katherine and Coles, Roger and Giacomini, Giada and MacSwiney G., M. Cristina and K. Obrist, Martin and Parsons, Stuart and Sattler, Thomas and Jones, Kate E.},
+  journal       = {bioRxiv},
+  year          = {2022}
+}
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Get Started
+
+getting_started
+faq
+tutorials/index
+how_to/index
+reference/index
+explanation/index
+legacy/index
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Contributing
+
+development/index
+```
--- a/docs/source/legacy/cli-detect.md
+++ b/docs/source/legacy/cli-detect.md
@ -0,0 +1,53 @@
+# CLI workflow: `batdetect2 detect`
+
+This page documents the previous CLI workflow based on `batdetect2 detect`.
+
+```{warning}
+This is documentation for a previous version of batdetect2.
+For new workflows, use `batdetect2 process directory` instead.
+If you are migrating, start with {doc}`migration-guide`.
+```
+
+## Processing a folder of audio files
+
+```bash
+batdetect2 detect AUDIO_DIR ANN_DIR DETECTION_THRESHOLD
+```
+
+Example:
+
+```bash
+batdetect2 detect example_data/audio/ example_data/anns/ 0.3
+```
+
+This command scans a directory of audio files, runs the BatDetect2 detector on
+each file, and writes BatDetect2-style outputs into `ANN_DIR`.
+Those outputs usually include one JSON file and one CSV file per recording, and
+can optionally include extra feature CSVs.
+
+`AUDIO_DIR` is the folder containing the input `.wav` files.
+`ANN_DIR` is the folder where model outputs are written.
+
+`DETECTION_THRESHOLD` controls which detections are kept.
+Predictions below this score are discarded.
+Smaller values keep more detections, but usually also increase mistakes.
+
+Common options:
+
+- `--cnn_features` Write extra CNN feature CSV files for each recording.
+- `--spec_features` Extract and write traditional acoustic spectrogram feature
+  CSV files.
+  These are saved as `*_spec_features.csv` files.
+- `--time_expansion_factor` Set the time expansion factor used for all files in
+  the run.
+- `--save_preds_if_empty` Save output files even when no detections are found.
+- `--model_path` Use a specific checkpoint instead of the included default
+  model.
+  If omitted, the command uses the default model trained on UK data.
+
+## Related pages
+
+- Migration guide:
+  {doc}`migration-guide`
+- Current process docs:
+  {doc}`../reference/cli/predict`
--- a/docs/source/legacy/index.md
+++ b/docs/source/legacy/index.md
@ -0,0 +1,28 @@
+# BatDetect2 v1.0 documentation
+
+This section documents the BatDetect2 workflow for version 1.
+
+Use these pages if you need to keep working with the older `batdetect2 detect` command or the older `batdetect2.api` interface.
+
+For new projects, we recommend the current workflow:
+
+- CLI:
+  `batdetect2 process`
+- Python:
+  `batdetect2.api_v2.BatDetect2API`
+
+If you are moving from the older workflow, start with {doc}`migration-guide`.
+
+```{warning}
+These pages describe the previous workflow.
+They are kept for continuity and migration support.
+New users should start with {doc}`../getting_started` and {doc}`../tutorials/index`.
+```
+
+```{toctree}
+:maxdepth: 1
+
+cli-detect
+python-api
+migration-guide
+```
--- a/docs/source/legacy/migration-guide.md
+++ b/docs/source/legacy/migration-guide.md
@ -0,0 +1,123 @@
+# BatDetect2 2.0 migration guide
+
+Use this guide when moving from BatDetect2 1.x workflows to the CLI and API in
+2.x.
+
+## Why migrate
+
+You get access to newer features.
+The codebase changed quite a bit and now gives you much more control over the
+workflow through config files, improved training and fine-tuning code, and a
+more flexible sound target definition system.
+
+You can also run newer or improved models.
+That includes updated versions of the UK model, plus other models trained with
+the newer codebase.
+
+We are no longer actively supporting version 1.
+No new enhancements are planned there, and only major bug fixes may still be
+considered.
+Future work is focused on version 2, including compatibility with newer Python
+versions.
+
+## Deprecation plan
+
+We have kept the `batdetect2.api` module and the `batdetect2 detect` CLI command
+in place for now.
+You can keep using them without changing your current workflow.
+However, many of the internal functions were relocated, removed or modified.
+If your code relied on anything outside of the `api` module, it may break.
+It is worth checking the new docs first, since there may already be a newer
+feature that covers your use case.
+If not, please open an issue.
+
+Because the old `api` and CLI command are now redundant with the newer stack, we
+plan to remove them in about a year.
+If you want to keep pipelines up to date and long-running, it is a good idea to
+migrate to version 2.
+
+## How to migrate
+
+If you are only using the `batdetect2 detect` CLI command or the
+`batdetect2.api` module, the migration should be fairly simple.
+This guide only covers these two entry points.
+
+### CLI mapping
+
+- `batdetect2 detect AUDIO_DIR ANN_DIR DETECTION_THRESHOLD` -> `batdetect2
+  process directory AUDIO_DIR OUTPUT_PATH --detection-threshold
+  DETECTION_THRESHOLD ...`
+
+Main changes:
+
+- outputs can be written in different formats.
+  See the output format reference for the available options.
+- the detection threshold is now an option instead of a required positional
+  argument.
+- options like saving CNN features are now controlled through config rather than
+  command flags.
+- there are separate subcommands for processing a directory, file list, or
+  dataset.
+
+### Python API mapping
+
+- old:
+  `import batdetect2.api as api`
+- current:
+  `from batdetect2 import BatDetect2API`
+
+Typical migration shape:
+
+```python
+from pathlib import Path
+
+from batdetect2 import BatDetect2API
+
+# If no checkpoint is provided, the default UK model is loaded
+api = BatDetect2API.from_checkpoint()
+prediction = api.process_file(Path("path/to/audio.wav"))
+```
+
+Useful replacements:
+
+- `batdetect2.api.process_file` -> current `BatDetect2API.process_file`
+- `batdetect2.api.process_audio` -> current `BatDetect2API.process_audio`
+- `batdetect2.api.process_spectrogram` -> current
+  `BatDetect2API.process_spectrogram`
+- one-off batch loops -> `BatDetect2API.process_files` or CLI `process`
+
+### Model changes
+
+The default checkpoint used by the new CLI `process` commands and by
+`BatDetect2API` is a newer model trained from scratch using the updated training
+code, but the same model architecture, training procedure, and data.
+Performance did not change substantially, but some differences are still
+expected.
+
+### Species names
+
+For the default UK model there are two naming changes:
+
+1. The original model had a typo and instead of `Barbastella barbastellus` it
+   used `Barbastellus barbastellus`.
+   This has now been corrected.
+2. There has been a recent change in name for `Eptesicus serotinus` to
+   `Cnephaeus serotinus`.
+
+## Stay on version 1
+
+If you prefer not to migrate to version 2 yet, you can keep using version 1.
+In that case, it is a good idea to pin your dependency:
+
+```bash
+pip install "batdetect2>=1.3.1,<2"
+```
+
+## Related pages
+
+- Getting started:
+  {doc}`../getting_started`
+- Tutorials:
+  {doc}`../tutorials/index`
+- API reference:
+  {doc}`../reference/api`
--- a/docs/source/legacy/python-api.md
+++ b/docs/source/legacy/python-api.md
@ -0,0 +1,55 @@
+# Legacy Python API: `batdetect2.api`
+
+This page documents the previous Python API workflow based on `batdetect2.api`.
+
+```{warning}
+This is documentation for a previous version of batdetect2.
+For new workflows, use `batdetect2.BatDetect2API`.
+If you are migrating, start with {doc}`migration-guide`.
+```
+
+## Using BatDetect2 in Python
+
+If you prefer to process data inside a Python script, you can use the `batdetect2.api` module.
+
+This interface gives you a simple entry point for running the built-in BatDetect2 model and also exposes the default model and default configuration more directly than the current API.
+
+You can process a whole file in one step, or load audio, generate a spectrogram, and work with lower-level functions yourself.
+
+Common functions:
+
+- `process_file` Load an audio file, run the model, and return BatDetect2-style results for that recording.
+- `process_audio` Run inference on an audio array that is already loaded in memory.
+- `process_spectrogram` Run inference starting from a spectrogram tensor instead of raw audio.
+- `load_audio` Load and resample audio using the legacy preprocessing path.
+- `generate_spectrogram` Convert audio into the spectrogram representation expected by the model.
+- `postprocess` Convert raw model outputs into detections and extracted features.
+
+Typical usage:
+
+```python
+import batdetect2.api as api
+
+AUDIO_FILE = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
+
+# Process a whole file
+results = api.process_file(AUDIO_FILE)
+annotations = results["pred_dict"]["annotation"]
+
+# Or, load audio and compute spectrograms
+audio = api.load_audio(AUDIO_FILE)
+spec = api.generate_spectrogram(audio)
+
+# And process the audio or the spectrogram with the model
+detections, features, spec = api.process_audio(audio)
+detections, features = api.process_spectrogram(spec)
+
+# Integrate the detections or extracted features into your own analysis
+```
+
+This interface is most useful when you want to work directly with detections, features, spectrograms, or intermediate arrays inside your own code.
+
+## Related pages
+
+- Migration guide: {doc}`migration-guide`
+- Current API reference: {doc}`../reference/api`
--- a/docs/source/reference/api.md
+++ b/docs/source/reference/api.md
@ -0,0 +1,39 @@
+# `BatDetect2API` reference
+
+`BatDetect2API` is the main Python entry point for BatDetect2.
+
+Use it when you want to load a model, run prediction, inspect detections,
+evaluate results, or train from Python.
+
+Defined in `batdetect2.api_v2`.
+
+## Main ways to create it
+
+- `BatDetect2API.from_checkpoint(path, ...)`
+  - load a trained checkpoint, a bundled checkpoint alias, or a Hugging Face
+    checkpoint.
+- `BatDetect2API.from_config(model_config=..., targets_config=..., ...)`
+  - build a full model stack from config objects.
+
+## Common tasks
+
+- Load a checkpoint and run prediction on one file.
+- Run prediction on many files or clips.
+- Save predictions in one of the supported output formats.
+- Evaluate a model on labelled data.
+- Fine-tune an existing checkpoint on new targets.
+
+## Generated reference
+
+```{eval-rst}
+.. autoclass:: batdetect2.api_v2.BatDetect2API
+```
+
+## Related pages
+
+- Python tutorial:
+  {doc}`../tutorials/integrate-with-a-python-pipeline`
+- Outputs config reference:
+  {doc}`outputs-config`
+- Output formats reference:
+  {doc}`output-formats`
--- a/docs/source/reference/cli/base.rst
+++ b/docs/source/reference/cli/base.rst
@ -0,0 +1,8 @@
+Base command
+============
+
+The options on this page apply to all subcommands.
+
+.. click:: batdetect2.cli:cli
+   :prog: batdetect2
+   :nested: none
--- a/docs/source/reference/cli/data.rst
+++ b/docs/source/reference/cli/data.rst
@ -0,0 +1,8 @@
+Data command
+============
+
+Inspect and convert dataset config files.
+
+.. click:: batdetect2.cli.data:data
+   :prog: batdetect2 data
+   :nested: full
--- a/docs/source/reference/cli/detect_legacy.rst
+++ b/docs/source/reference/cli/detect_legacy.rst
@ -0,0 +1,18 @@
+Legacy detect command
+=====================
+
+.. warning::
+
+   ``batdetect2 detect`` is a legacy compatibility command.
+   Prefer ``batdetect2 process directory`` for new workflows.
+
+Migration at a glance
+---------------------
+
+- Legacy: ``batdetect2 detect AUDIO_DIR ANN_DIR DETECTION_THRESHOLD``
+- Current: ``batdetect2 process directory MODEL_PATH AUDIO_DIR OUTPUT_PATH``
+  with optional ``--detection-threshold``
+
+.. click:: batdetect2.cli.compat:detect
+   :prog: batdetect2 detect
+   :nested: none
--- a/docs/source/reference/cli/evaluate.rst
+++ b/docs/source/reference/cli/evaluate.rst
@ -0,0 +1,11 @@
+Evaluate command
+================
+
+Use ``batdetect2 evaluate`` to compare a checkpoint against labelled test data.
+
+This command writes metrics and any configured artifacts to the output
+directory.
+
+.. click:: batdetect2.cli.evaluate:evaluate_command
+   :prog: batdetect2 evaluate
+   :nested: none
--- a/docs/source/reference/cli/finetune.rst
+++ b/docs/source/reference/cli/finetune.rst
@ -0,0 +1,11 @@
+Finetune command
+================
+
+Use ``batdetect2 finetune`` to adapt an existing checkpoint to a new target
+definition.
+
+If you do not pass ``--model``, the bundled ``uk_same`` checkpoint is used.
+
+.. click:: batdetect2.cli.finetune:finetune_command
+   :prog: batdetect2 finetune
+   :nested: none
--- a/docs/source/reference/cli/index.md
+++ b/docs/source/reference/cli/index.md
@ -0,0 +1,50 @@
+# CLI reference
+
+Use this section to find the right command quickly, then open the command page
+for the full option list.
+
+## Command map
+
+| Command | Use it for | Required positional args |
+| --- | --- | --- |
+| `batdetect2 process` | Run inference on audio | Depends on subcommand (`directory`, `file_list`, `dataset`) |
+| `batdetect2 data` | Inspect and convert dataset configs | Depends on subcommand (`summary`, `convert`) |
+| `batdetect2 train` | Train or fine-tune models | `TRAIN_DATASET` |
+| `batdetect2 finetune` | Fine-tune a checkpoint on new targets | `TRAIN_DATASET` plus `--targets` |
+| `batdetect2 evaluate` | Evaluate a checkpoint on a test dataset | `TEST_DATASET` |
+| `batdetect2 detect` | Legacy compatibility workflow | `AUDIO_DIR`, `ANN_DIR`, `DETECTION_THRESHOLD` |
+
+## Notes
+
+- Global CLI options are documented in {doc}`base`.
+- Paths with spaces should be wrapped in quotes.
+- Input audio is expected to be mono.
+- `process` uses the optional `--detection-threshold` override.
+- `evaluate` takes `TEST_DATASET` as a positional argument and uses `--model`
+  for the checkpoint override.
+- `finetune` defaults to the bundled `uk_same` checkpoint if `--model` is not
+  provided.
+
+```{warning}
+`batdetect2 detect` is a legacy command.
+Prefer `batdetect2 process directory` for new workflows.
+```
+
+## Related pages
+
+- {doc}`../../tutorials/run-inference-on-folder`
+- {doc}`../../how_to/run-batch-predictions`
+- {doc}`../../how_to/tune-detection-threshold`
+- {doc}`../configs`
+
+```{toctree}
+:maxdepth: 1
+
+Base command and global options <base>
+Process command group <predict>
+Data command group <data>
+Train command <train>
+Finetune command <finetune>
+Evaluate command <evaluate>
+Legacy detect command <detect_legacy>
+```
--- a/docs/source/reference/cli/predict.rst
+++ b/docs/source/reference/cli/predict.rst
@ -0,0 +1,17 @@
+Process command
+===============
+
+Use ``batdetect2 process`` to run inference on audio.
+
+Choose a subcommand based on how you want to provide the input:
+
+- ``directory`` for all supported audio files in one folder
+- ``file_list`` for a text file with one audio path per line
+- ``dataset`` for recordings referenced by a dataset file
+
+Use ``--detection-threshold`` when you want to override the configured
+threshold for one run.
+
+.. click:: batdetect2.cli.inference:process
+   :prog: batdetect2 process
+   :nested: full
--- a/docs/source/reference/cli/train.rst
+++ b/docs/source/reference/cli/train.rst
@ -0,0 +1,12 @@
+Train command
+=============
+
+Use ``batdetect2 train`` to start from a fresh model config or continue from an
+existing checkpoint.
+
+If you want to adapt an existing checkpoint to a new target definition, use
+``batdetect2 finetune`` instead.
+
+.. click:: batdetect2.cli.train:train_command
+   :prog: batdetect2 train
+   :nested: none
--- a/docs/source/reference/configs.rst
+++ b/docs/source/reference/configs.rst
@ -0,0 +1,18 @@
+Config reference
+================
+
+BatDetect2 uses separate config objects for different workflow surfaces.
+
+Use the dedicated reference pages for each config family:
+
+- model config
+- training config
+- logging config
+- inference config
+- evaluation config
+- outputs config
+- preprocessing config
+- postprocess config
+- targets config workflow
+
+Example config files live under `example_data/configs/`.
--- a/docs/source/reference/data-sources.md
+++ b/docs/source/reference/data-sources.md
@ -0,0 +1,76 @@
+# Data source reference
+
+This page summarizes dataset source formats and their config fields.
+
+## Supported source formats
+
+| Format | Description |
+| --- | --- |
+| `aoef` | AOEF/soundevent annotation files (`AnnotationSet` or `AnnotationProject`) |
+| `batdetect2` | Legacy format with one JSON annotation file per recording |
+| `batdetect2_file` | Legacy format with one merged JSON annotation file |
+
+## AOEF (`format: aoef`)
+
+Required fields:
+
+- `name`
+- `format`
+- `audio_dir`
+- `annotations_path`
+
+Optional fields:
+
+- `description`
+- `filter`
+
+`filter` is only used when `annotations_path` points to an
+`AnnotationProject`.
+
+AOEF filter options:
+
+- `only_completed` (default: `true`)
+- `only_verified` (default: `false`)
+- `exclude_issues` (default: `true`)
+
+Use `filter: null` to disable project filtering.
+
+## Legacy per-file (`format: batdetect2`)
+
+Required fields:
+
+- `name`
+- `format`
+- `audio_dir`
+- `annotations_dir`
+
+Optional fields:
+
+- `description`
+- `filter`
+
+## Legacy merged file (`format: batdetect2_file`)
+
+Required fields:
+
+- `name`
+- `format`
+- `audio_dir`
+- `annotations_path`
+
+Optional fields:
+
+- `description`
+- `filter`
+
+Legacy filter options:
+
+- `only_annotated` (default: `true`)
+- `exclude_issues` (default: `true`)
+
+Use `filter: null` to disable filtering.
+
+## Related guides
+
+- {doc}`../how_to/configure-aoef-dataset`
+- {doc}`../how_to/import-legacy-batdetect2-annotations`
--- a/docs/source/reference/detections.md
+++ b/docs/source/reference/detections.md
@ -0,0 +1,42 @@
+# Detections reference
+
+These are the main prediction objects returned by BatDetect2 inference methods.
+
+Defined in `batdetect2.postprocess.types`.
+
+## `ClipDetections`
+
+`ClipDetections` represents the predictions for one clip or one full recording.
+
+Fields:
+
+- `clip`
+  - the `soundevent` clip metadata for the processed audio.
+- `detections`
+  - list of `Detection` objects for that clip.
+
+## `Detection`
+
+`Detection` represents one detected event.
+
+Fields:
+
+- `geometry`
+  - time-frequency geometry for the detected event.
+- `detection_score`
+  - confidence that there is an event at this location.
+- `class_scores`
+  - class ranking scores for the detected event.
+- `features`
+  - per-detection feature vector from the model.
+
+## Related pages
+
+- Python tutorial:
+  {doc}`../tutorials/integrate-with-a-python-pipeline`
+- API reference:
+  {doc}`api`
+- What BatDetect2 predicts:
+  {doc}`../explanation/what-batdetect2-predicts`
+- Features and embeddings:
+  {doc}`../explanation/extracted-features-and-embeddings`
--- a/docs/source/reference/evaluation-config.md
+++ b/docs/source/reference/evaluation-config.md
@ -0,0 +1,46 @@
+# Evaluation config reference
+
+`EvaluationConfig` defines which evaluation tasks run and which plots they generate.
+
+Defined in `batdetect2.evaluate.config`.
+
+## Top-level fields
+
+- `tasks`
+  - list of task configs.
+
+## Built-in task families
+
+Current built-in tasks include:
+
+- `sound_event_detection`
+- `sound_event_classification`
+- `top_class_detection`
+- `clip_detection`
+- `clip_classification`
+
+## Shared task controls
+
+Common task-level controls include:
+
+- `prefix`
+- `ignore_start_end`
+
+Sound-event-style tasks also support:
+
+- `affinity`
+- `affinity_threshold`
+- `strict_match`
+
+## Default behavior
+
+The default evaluation config starts with:
+
+- sound event detection,
+- sound event classification.
+
+## Related pages
+
+- Choose and configure evaluation tasks: {doc}`../how_to/choose-and-configure-evaluation-tasks`
+- Evaluation concepts: {doc}`../explanation/evaluation-concepts-and-matching`
+- Evaluate CLI reference: {doc}`cli/evaluate`
--- a/docs/source/reference/index.md
+++ b/docs/source/reference/index.md
@ -0,0 +1,28 @@
+# Reference documentation
+
+Reference pages are the detailed lookup pages.
+
+Use this section when you need exact command options, setting names, output
+details, or Python API entries.
+
+```{toctree}
+:maxdepth: 1
+
+cli/index
+api
+detections
+model-config
+training-config
+logging-config
+inference-config
+evaluation-config
+outputs-config
+output-formats
+output-transforms
+data-sources
+preprocessing-config
+postprocess-config
+targets-config-workflow
+configs
+targets
+```
--- a/docs/source/reference/inference-config.md
+++ b/docs/source/reference/inference-config.md
@ -0,0 +1,41 @@
+# Inference config reference
+
+`InferenceConfig` controls how files are clipped and batched during prediction-time workflows.
+
+Defined in `batdetect2.inference.config`.
+
+## Top-level fields
+
+- `loader`
+  - data-loader settings for inference.
+- `clipping`
+  - controls how recordings are split into clips before batching.
+
+## `loader`
+
+Current built-in loader field:
+
+- `batch_size` (int, default `8`)
+
+## `clipping`
+
+Fields:
+
+- `enabled` (bool)
+- `duration` (float, seconds)
+- `overlap` (float, seconds)
+- `max_empty` (float)
+- `discard_empty` (bool)
+
+## When to override this config
+
+Override `InferenceConfig` when:
+
+- long recordings need different clipping behavior,
+- you want to tune batch size for your hardware,
+- you need reproducible prediction settings across runs.
+
+## Related pages
+
+- Tune inference clipping: {doc}`../how_to/tune-inference-clipping`
+- Predict CLI reference: {doc}`cli/predict`
--- a/docs/source/reference/logging-config.md
+++ b/docs/source/reference/logging-config.md
@ -0,0 +1,46 @@
+# Logging config reference
+
+`AppLoggingConfig` controls which logger backend BatDetect2 uses for training,
+evaluation, and inference.
+
+Defined in `batdetect2.logging`.
+
+## Top-level fields
+
+- `train`
+  - logger config for training runs.
+- `evaluation`
+  - logger config for evaluation runs.
+- `inference`
+  - logger config for inference runs.
+
+## Built-in logger backends
+
+Current built-in logger backends are:
+
+- `csv`
+- `tensorboard`
+- `mlflow`
+- `dvclive`
+
+## Default behaviour
+
+By default:
+
+- training uses `csv`,
+- evaluation uses `csv`,
+- inference uses `csv`.
+
+With the CSV logger, training writes a `metrics.csv` file in the log folder.
+
+Example files live under `example_data/configs/`, including
+`example_data/configs/logging.yaml`.
+
+## Related pages
+
+- Train command reference:
+  {doc}`cli/train`
+- Evaluate command reference:
+  {doc}`cli/evaluate`
+- Run inference on a folder:
+  {doc}`../tutorials/run-inference-on-folder`
--- a/docs/source/reference/model-config.md
+++ b/docs/source/reference/model-config.md
@ -0,0 +1,37 @@
+# Model config reference
+
+`ModelConfig` defines the model stack used for training or fresh model
+construction.
+
+Defined in `batdetect2.models`.
+
+## Top-level fields
+
+- `samplerate`
+  - expected input sample rate.
+- `architecture`
+  - backbone network settings.
+- `preprocess`
+  - spectrogram preprocessing settings.
+- `postprocess`
+  - decoding and output filtering settings.
+
+## What this config controls
+
+Use `ModelConfig` when you want to change things like:
+
+- the backbone architecture,
+- the spectrogram settings used by the model,
+- postprocessing settings stored with the model.
+
+Example files live under `example_data/configs/`, including
+`example_data/configs/model.yaml`.
+
+## Related pages
+
+- Preprocessing config:
+  {doc}`preprocessing-config`
+- Postprocess config:
+  {doc}`postprocess-config`
+- Train command reference:
+  {doc}`cli/train`
--- a/docs/source/reference/output-formats.md
+++ b/docs/source/reference/output-formats.md
@ -0,0 +1,75 @@
+# Output formats reference
+
+BatDetect2 currently supports several built-in output formatters.
+
+## `raw`
+
+Defined by `RawOutputConfig`.
+
+Best for rich structured outputs and round-tripping.
+
+Key fields:
+
+- `include_class_scores`
+- `include_features`
+- `include_geometry`
+
+Writes one NetCDF `.nc` file per clip.
+
+## `parquet`
+
+Defined by `ParquetOutputConfig`.
+
+Best for tabular analysis workflows.
+
+Key fields:
+
+- `include_class_scores`
+- `include_features`
+- `include_geometry`
+
+Writes a parquet table, typically `predictions.parquet`.
+
+## `soundevent`
+
+Defined by `SoundEventOutputConfig`.
+
+Best when you want a `PredictionSet` JSON workflow.
+
+Key fields:
+
+- `top_k`
+- `min_score`
+
+Writes a prediction-set JSON file.
+
+## `batdetect2`
+
+Defined by `BatDetect2OutputConfig`.
+
+This is the legacy-compatible BatDetect2 formatter.
+
+Key fields:
+
+- `event_name`
+- `annotation_note`
+- `write_detection_csv`
+- `write_cnn_features_csv`
+- `save_if_empty`
+- `preserve_audio_tree`
+- `include_file_path`
+
+By default it writes one `.json` file and one detection `.csv` file per
+recording, preserving the input audio directory layout under the output root.
+
+It can also write legacy `_cnn_features.csv` sidecars when
+`write_cnn_features_csv` is enabled.
+
+## Related pages
+
+- Outputs config:
+  {doc}`outputs-config`
+- Save predictions in different output formats:
+  {doc}`../how_to/save-predictions-in-different-output-formats`
+- Understanding formatted outputs:
+  {doc}`../explanation/interpreting-formatted-outputs`
--- a/docs/source/reference/output-transforms.md
+++ b/docs/source/reference/output-transforms.md
@ -0,0 +1,37 @@
+# Output transforms reference
+
+Output transforms operate after decoding and before formatting.
+
+Defined in `batdetect2.outputs.transforms`.
+
+## Top-level config
+
+`OutputTransformConfig` contains:
+
+- `detection_transforms`
+- `clip_transforms`
+
+## Detection transforms
+
+Detection transforms operate on one detection at a time.
+
+Built-in examples include:
+
+- filtering by frequency,
+- filtering by duration.
+
+These can remove detections entirely if they fail the transform.
+
+## Clip transforms
+
+Clip transforms operate on the list of detections for one clip.
+
+Built-in examples include:
+
+- removing detections above Nyquist,
+- removing detections at clip edges.
+
+## Related pages
+
+- Outputs config: {doc}`outputs-config`
+- Understanding outputs: {doc}`../explanation/interpreting-formatted-outputs`
--- a/docs/source/reference/outputs-config.md
+++ b/docs/source/reference/outputs-config.md
@ -0,0 +1,41 @@
+# Outputs config reference
+
+`OutputsConfig` controls two layers of prediction handling:
+
+- how detections are transformed before formatting,
+- how formatted outputs are written to disk.
+
+Defined in `batdetect2.outputs.config`.
+
+## Fields
+
+- `format`
+  - output format config.
+- `transform`
+  - output transform config.
+
+## Mental model
+
+The output workflow is:
+
+1. model outputs are decoded into detections,
+2. optional output transforms filter or adjust those detections,
+3. a formatter serializes them to disk.
+
+## Default behavior
+
+By default, the current stack uses the raw output formatter unless you override
+it.
+
+For CLI processing commands, omitting `--format` now leaves format selection to
+the loaded outputs config.
+If no outputs config is provided, the CLI still uses its command defaults.
+
+## Related pages
+
+- Output formats:
+  {doc}`output-formats`
+- Output transforms:
+  {doc}`output-transforms`
+- Save predictions in different output formats:
+  {doc}`../how_to/save-predictions-in-different-output-formats`
--- a/docs/source/reference/postprocess-config.md
+++ b/docs/source/reference/postprocess-config.md
@ -0,0 +1,31 @@
+# Postprocess config reference
+
+`PostprocessConfig` controls how raw detector outputs are converted into final
+detections.
+
+Defined in `batdetect2.postprocess.config`.
+
+## Fields
+
+- `nms_kernel_size` (int > 0)
+  - neighborhood size for non-maximum suppression.
+- `detection_threshold` (float >= 0)
+  - minimum detection score to keep a candidate event.
+- `classification_threshold` (float >= 0)
+  - minimum class score used when assigning class tags.
+- `top_k_per_sec` (int > 0)
+  - maximum detection density per second.
+
+## Defaults
+
+- `detection_threshold`: `0.01`
+- `classification_threshold`: `0.1`
+- `top_k_per_sec`: `100`
+
+`nms_kernel_size` defaults to the library constant used by the NMS module.
+
+## Related pages
+
+- Threshold behaviour: {doc}`../explanation/postprocessing-and-thresholds`
+- Threshold tuning workflow: {doc}`../how_to/tune-detection-threshold`
+- CLI predict options: {doc}`cli/predict`
--- a/docs/source/reference/preprocessing-config.md
+++ b/docs/source/reference/preprocessing-config.md
@ -0,0 +1,61 @@
+# Preprocessing config reference
+
+This page summarizes preprocessing-related config objects used by batdetect2.
+
+## Audio loader config (`AudioConfig`)
+
+Defined in `batdetect2.audio.loader`.
+
+Fields:
+
+- `samplerate` (int): target audio sample rate in Hz.
+- `resample.enabled` (bool): whether to resample loaded audio.
+- `resample.method` (`poly` or `fourier`): resampling method.
+
+## Model preprocessing config (`PreprocessingConfig`)
+
+Defined in `batdetect2.preprocess.config`.
+
+Top-level fields:
+
+- `audio_transforms`: ordered waveform transforms.
+- `stft`: STFT parameters.
+- `frequencies`: spectrogram frequency range.
+- `spectrogram_transforms`: ordered spectrogram transforms.
+- `size`: final resize settings.
+
+### `audio_transforms` built-ins
+
+- `center_audio`
+- `scale_audio`
+- `fix_duration` (`duration` in seconds)
+
+### `stft` fields
+
+- `window_duration`
+- `window_overlap`
+- `window_fn`
+
+### `frequencies` fields
+
+- `min_freq`
+- `max_freq`
+
+### `spectrogram_transforms` built-ins
+
+- `pcen`
+- `scale_amplitude` (`scale: db|power`)
+- `spectral_mean_subtraction`
+- `peak_normalize`
+
+### `size` fields
+
+- `height`
+- `resize_factor`
+
+## Related pages
+
+- Audio preprocessing how-to: {doc}`../how_to/configure-audio-preprocessing`
+- Spectrogram preprocessing how-to:
+  {doc}`../how_to/configure-spectrogram-preprocessing`
+- Why consistency matters: {doc}`../explanation/preprocessing-consistency`
--- a/docs/source/reference/targets-config-workflow.md
+++ b/docs/source/reference/targets-config-workflow.md
@ -0,0 +1,67 @@
+# Targets config workflow reference
+
+This page summarizes the target-definition configuration used by batdetect2.
+
+## `TargetConfig`
+
+Defined in `batdetect2.targets.config`.
+
+Fields:
+
+- `detection_target`: one `TargetClassConfig` defining detection eligibility.
+- `classification_targets`: list of `TargetClassConfig` entries for class
+  encoding/decoding.
+- `roi`: ROI mapping config with `default` mapper and optional per-class
+  `overrides`.
+
+## `TargetClassConfig`
+
+Defined in `batdetect2.targets.classes`.
+
+Fields:
+
+- `name`: class label name.
+- `tags`: tag list used for matching (shortcut for `match_if`).
+- `match_if`: explicit condition config (`match_if` is accepted as alias).
+- `assign_tags`: tags used when decoding this class.
+
+`tags` and `match_if` are mutually exclusive.
+
+## Supported condition config types
+
+Built from `batdetect2.data.conditions`.
+
+- `has_tag`
+- `has_all_tags`
+- `has_any_tag`
+- `duration`
+- `frequency`
+- `all_of`
+- `any_of`
+- `not`
+
+## ROI mapper config
+
+`roi.default` and each `roi.overrides.<class_name>` entry support built-in
+mappers including:
+
+- `anchor_bbox`
+- `peak_energy_bbox`
+
+Key `anchor_bbox` fields:
+
+- `anchor`
+- `time_scale`
+- `frequency_scale`
+
+Top-level ROI mapping shape:
+
+- `default`: fallback mapper used for all classes.
+- `overrides`: optional mapping from class name to mapper config.
+
+## Related pages
+
+- Detection target setup: {doc}`../how_to/configure-target-definitions`
+- Class setup: {doc}`../how_to/define-target-classes`
+- ROI setup: {doc}`../how_to/configure-roi-mapping`
+- Concept overview: {doc}`../explanation/target-encoding-and-decoding`
--- a/docs/source/reference/targets.rst
+++ b/docs/source/reference/targets.rst
@ -0,0 +1,5 @@
+Targets reference
+=================
+
+.. automodule:: batdetect2.targets
+   :members:
--- a/docs/source/reference/training-config.md
+++ b/docs/source/reference/training-config.md
@ -0,0 +1,50 @@
+# Training config reference
+
+`TrainingConfig` controls the training loop, optimisation, data loading, losses,
+and validation tasks.
+
+Defined in `batdetect2.train.config`.
+
+## Top-level fields
+
+- `train_loader`
+  - training data loading and clipping settings.
+- `val_loader`
+  - validation data loading and clipping settings.
+- `optimizer`
+  - optimiser type and learning rate settings.
+- `scheduler`
+  - learning-rate schedule settings.
+- `loss`
+  - detection, classification, and size loss settings.
+- `trainer`
+  - PyTorch Lightning trainer settings such as `max_epochs`.
+- `labels`
+  - target label generation settings.
+- `validation`
+  - evaluation tasks used during validation.
+- `checkpoints`
+  - checkpoint saving settings.
+
+## What this config controls
+
+Use `TrainingConfig` when you want to change things like:
+
+- batch size,
+- augmentation,
+- optimiser and scheduler settings,
+- number of epochs,
+- validation frequency,
+- checkpoint behaviour.
+
+Example files live under `example_data/configs/`, including
+`example_data/configs/training.yaml`.
+
+## Related pages
+
+- Evaluation config:
+  {doc}`evaluation-config`
+- Train command reference:
+  {doc}`cli/train`
+- Fine-tune from a checkpoint:
+  {doc}`../how_to/fine-tune-from-a-checkpoint`
--- a/docs/source/tutorials/evaluate-on-a-test-set.md
+++ b/docs/source/tutorials/evaluate-on-a-test-set.md
@ -0,0 +1,138 @@
+# Evaluate on a test set
+
+This tutorial shows how to evaluate a trained checkpoint on a held-out dataset
+and inspect the output metrics.
+
+Use it when you want to measure how a model performs on labelled data that was
+kept aside for testing.
+
+## Before you start
+
+You need:
+
+- a test dataset config,
+- a trained checkpoint or model alias.
+
+```{note}
+This page is for model evaluation.
+If you only want to run BatDetect2 on recordings, start with
+{doc}`run-inference-on-folder` instead.
+```
+
+## What you will do
+
+By the end of this tutorial you will have:
+
+- prepared a test dataset config,
+- run `batdetect2 evaluate`,
+- written evaluation metrics and result files,
+- identified the next pages for model choice and evaluation configuration.
+
+## 1. Create a test dataset config
+
+Evaluation needs a dataset config that points to the labelled data you want to
+use for testing.
+
+This is the same kind of dataset config used for training.
+It explicitly declares which data sources BatDetect2 should read, including the
+audio files and their annotations.
+
+For an example, see `example_data/dataset.yaml`.
+
+If you need help creating the dataset config, follow the dataset section in
+{doc}`train-a-custom-model`.
+For more detail on dataset source formats, see {doc}`../reference/data-sources`.
+
+Use a dataset that was not used for training or tuning.
+
+## 2. Run evaluation
+
+For a simple run, use:
+
+```bash
+batdetect2 evaluate \
+  path/to/test_dataset.yaml
+```
+
+If you do not pass `--model`, BatDetect2 uses the built-in default UK model.
+If you want to choose a different checkpoint, alias, or Hugging Face model, see
+{doc}`../how_to/choose-a-model`.
+
+If you want to save the results somewhere else, add `--output-dir`:
+
+```bash
+batdetect2 evaluate \
+  path/to/test_dataset.yaml \
+  --model path/to/model.ckpt \
+  --output-dir path/to/eval_outputs
+```
+
+This command loads the model, runs prediction on the test dataset, applies the
+evaluation tasks, and writes the results to the output directory.
+
+## 3. Check the output files
+
+By default, the CLI writes evaluation outputs to `outputs/evaluation`.
+
+With the default evaluation config, a run will usually create a folder like
+this:
+
+```text
+outputs/evaluation/
+  version_0/
+    metrics.csv
+    hparams.yaml
+```
+
+The most important file is `metrics.csv`.
+It contains the metric values computed for the evaluation run.
+
+A file like this might start like:
+
+```csv
+classification/average_precision/barbar,classification/average_precision/cneser,...,detection/average_precision
+0.898695170879364,0.9408193826675415,...,0.851219117641449
+```
+
+The exact columns depend on the evaluation tasks you run.
+
+The `hparams.yaml` file records the config used for the evaluation run.
+
+## 4. Expect extra plots and files when configs enable them
+
+You may also see extra outputs such as plots and saved predictions.
+
+For example, if you run evaluation with `example_data/configs/evaluation.yaml`,
+you should expect a richer output folder with:
+
+- `metrics.csv`
+- `hparams.yaml`
+- a `plots/` directory
+- a `predictions/` directory
+
+That config enables more evaluation tasks and plots than the default setup.
+
+So, depending on your evaluation config, you may see files such as:
+
+- precision-recall plots,
+- ROC curves,
+- confusion matrices,
+- example detection plots,
+- saved prediction files.
+
+If you want to control which tasks run and which plots are generated, see
+{doc}`../reference/evaluation-config` and
+{doc}`../how_to/choose-and-configure-evaluation-tasks`.
+
+## Common next steps
+
+- Choose a different model:
+  {doc}`../how_to/choose-a-model`
+- Configure evaluation tasks:
+  {doc}`../how_to/choose-and-configure-evaluation-tasks`
+- Interpret evaluation artifacts:
+  {doc}`../how_to/interpret-evaluation-outputs`
+- Learn the evaluation concepts:
+  {doc}`../explanation/evaluation-concepts-and-matching`
+- Check full evaluate options:
+  {doc}`../reference/cli/evaluate`
--- a/docs/source/tutorials/index.md
+++ b/docs/source/tutorials/index.md
@ -0,0 +1,20 @@
+# Tutorials
+
+Welcome to the `batdetect2` tutorials.
+
+These tutorials walk you step by step through the most common use cases and
+workflows.
+They follow the simplest route and are a good place to start with `batdetect2`.
+
+Use {doc}`../how_to/index` for focused guides on specific tasks, or
+{doc}`../explanation/index` if you want to understand the concepts in more
+depth.
+
+```{toctree}
+:maxdepth: 1
+
+run-inference-on-folder
+train-a-custom-model
+evaluate-on-a-test-set
+integrate-with-a-python-pipeline
+```
--- a/docs/source/tutorials/integrate-with-a-python-pipeline.md
+++ b/docs/source/tutorials/integrate-with-a-python-pipeline.md
@ -0,0 +1,160 @@
+# Integrate with a Python pipeline
+
+This tutorial shows a simple Python workflow for loading audio, running BatDetect2, and inspecting the detections.
+
+Use it when you want to work directly in Python rather than through the CLI.
+
+If you mainly want to run the model on recordings, start with {doc}`run-inference-on-folder` instead.
+
+## Before you start
+
+You need:
+
+- BatDetect2 installed in your Python environment,
+- at least one input audio file.
+
+## What you will do
+
+By the end of this tutorial you will have:
+
+- created a `BatDetect2API` object,
+- run inference on one file,
+- inspected detections, scores, and features,
+- used lower-level audio and spectrogram methods for more control,
+- identified the next API workflows for batch processing, training, fine-tuning, and evaluation.
+
+## 1. Create the API instance
+
+For a first run, use the built-in default UK model:
+
+```python
+from batdetect2 import BatDetect2API
+
+# If you don't specify a checkpoint the default model will be loaded
+api = BatDetect2API.from_checkpoint()
+```
+
+If you want to use a different checkpoint later, see {doc}`../how_to/choose-a-model`.
+
+## 2. Run inference on one file
+
+`process_file` is the simplest Python entry point when you want one prediction object per recording.
+
+```python
+from batdetect2 import BatDetect2API
+
+api = BatDetect2API.from_checkpoint()
+prediction = api.process_file("path/to/audio.wav")
+
+for detection in prediction.detections:
+    top_class = api.get_top_class_name(detection)
+    score = detection.detection_score
+    print(top_class, score)
+```
+
+## 3. Understand the prediction objects
+
+`prediction` is a `ClipDetections` object.
+See {doc}`../reference/detections` for the full reference.
+
+Very briefly, `ClipDetections` represents all detections for one processed clip or recording.
+It includes:
+
+- the clip metadata,
+- the list of detections for that clip.
+
+Each item in `prediction.detections` is a `Detection` object.
+
+Each `Detection` includes:
+
+- the time-frequency geometry of the event,
+- a detection score,
+- the class scores,
+- a feature vector.
+
+## 4. Inspect detection score and class scores
+
+The detection score and the class scores answer different questions.
+
+- `detection_score` is about whether the model thinks there is a call at that time-frequency location.
+- `class_scores` are about which class the model prefers for that detected event.
+
+So a detection can have a fairly strong detection score, but still have a more uncertain class ranking.
+
+```python
+for detection in prediction.detections:
+    print("top class:", api.get_top_class_name(detection))
+    print("detection score:", detection.detection_score)
+    print("class scores:")
+    for class_name, score in api.get_class_scores(detection):
+        print(f"  {class_name}: {score:.3f}")
+```
+
+If you want more detail on class-score inspection, see {doc}`../how_to/inspect-class-scores-in-python`.
+
+## 5. Inspect the detection features
+
+Each detection also carries a `features` vector.
+
+These are internal model features attached to the detection.
+They can be useful for things like:
+
+- exploratory visualisation,
+- clustering similar detections,
+- comparing detections across files,
+- building downstream analysis pipelines.
+
+They are useful descriptors, but they are not direct ecological labels by themselves.
+
+For more detail, see {doc}`../how_to/inspect-detection-features-in-python` and {doc}`../explanation/extracted-features-and-embeddings`.
+
+## 6. Use lower-level audio and spectrogram methods for more control
+
+If you want finer control over what gets processed and when, the API also lets you work step by step.
+
+For example, you can load the audio yourself, inspect the waveform length, generate the spectrogram, and then run detection on that spectrogram:
+
+```python
+from batdetect2 import BatDetect2API
+
+api = BatDetect2API.from_checkpoint()
+
+audio = api.load_audio("path/to/audio.wav")
+print(audio.shape)
+
+spec = api.generate_spectrogram(audio)
+print(spec.shape)
+
+detections = api.process_spectrogram(spec)
+print(len(detections))
+```
+
+This is helpful when you want to:
+
+- inspect the loaded audio before inference,
+- inspect the generated spectrogram,
+- control which audio segment is processed,
+- run only part of the pipeline in custom code.
+
+You can also call `process_audio(audio)` directly if you already have the waveform array in memory.
+
+## 7. Use the wider API workflows
+
+The Python API is not only for single-file inference.
+It also exposes methods for batch processing, training, evaluation, and fine-tuning.
+
+Examples:
+
+- `process_files(...)` for batch processing from Python,
+- `train(...)` for training,
+- `evaluate(...)` for evaluation,
+- `finetune(...)` for fine-tuning.
+
+Useful next pages:
+
+- Choose a different model: {doc}`../how_to/choose-a-model`
+- Run batch predictions: {doc}`../how_to/run-batch-predictions`
+- Train a custom model: {doc}`train-a-custom-model`
+- Evaluate on a test set: {doc}`evaluate-on-a-test-set`
+- Fine-tune from a checkpoint: {doc}`../how_to/fine-tune-from-a-checkpoint`
+- API reference: {doc}`../reference/api`
--- a/docs/source/tutorials/run-inference-on-folder.md
+++ b/docs/source/tutorials/run-inference-on-folder.md
@ -0,0 +1,217 @@
+# Run BatDetect2 on a folder of audio files
+
+This tutorial shows how to run BatDetect2 on a folder of recordings from the command line.
+
+Use it when you want a first pass over a folder of audio recordings and want to see what BatDetect2 finds.
+
+If you want to follow the tutorial exactly, you can use the example recordings that come with the repository.
+
+## Before you start
+
+You need:
+
+- BatDetect2 installed.
+- A folder containing supported audio files.
+- A place to save the results.
+
+If you have not installed BatDetect2 yet, start with {doc}`../getting_started`.
+
+## Optional: use the repository example files
+
+If you want to follow the steps with the same paths shown here, clone the repository and move into it:
+
+```bash
+git clone https://github.com/macaodha/batdetect2.git
+cd batdetect2
+```
+
+Then you can use these example paths from the repository root.
+
+## What you will do
+
+By the end of this tutorial you will have:
+
+- run `batdetect2 process directory`,
+- saved predictions to disk,
+- checked that BatDetect2 wrote the files you expected,
+- tried a second run with a higher detection threshold,
+- identified the next pages to use if you want to customise the run.
+
+## 1. Choose your input and output folders
+
+Pick:
+
+- the folder containing your audio files,
+- an output folder where BatDetect2 should save results.
+
+Example layout:
+
+```text
+project/
+  audio/
+    file_001.wav
+    file_002.wav
+  outputs/
+```
+
+If `outputs/` does not exist yet, that is fine.
+BatDetect2 can create it.
+
+If you are using the repository example files, your layout already looks like this:
+
+```text
+batdetect2/
+  example_data/
+    audio/
+      20170701_213954-MYOMYS-LR_0_0.5.wav
+      20180530_213516-EPTSER-LR_0_0.5.wav
+      20180627_215323-RHIFER-LR_0_0.5.wav
+```
+
+## 2. Run BatDetect2 on the folder
+
+For a first run, use the built-in default UK model:
+
+```bash
+batdetect2 process directory \
+  path/to/audio \
+  path/to/outputs
+```
+
+If you are using the repository example files, run:
+
+```bash
+batdetect2 process directory \
+  example_data/audio \
+  example_outputs/first_run
+```
+
+What this does:
+
+- looks for supported audio files in `path/to/audio`,
+- runs the model on each recording,
+- saves the results in `path/to/outputs`.
+
+You do not need to choose a model for this first run.
+If you do nothing, BatDetect2 uses the built-in default UK model.
+
+If you want to use a different model later, see {doc}`../how_to/choose-a-model`.
+
+## 3. Check the output files
+
+After the command finishes, look in your output folder.
+
+By default, the CLI writes predictions in the `batdetect2` output format.
+This is a JSON-based format used for BatDetect2-style outputs.
+
+With the default settings, you will usually see one `.json` file and one `_detections.csv` file per recording.
+
+For the repository example run, that means files like:
+
+```text
+example_outputs/first_run/
+  20170701_213954-MYOMYS-LR_0_0.5.wav.json
+  20170701_213954-MYOMYS-LR_0_0.5.wav_detections.csv
+  20180530_213516-EPTSER-LR_0_0.5.wav.json
+  20180530_213516-EPTSER-LR_0_0.5.wav_detections.csv
+  20180627_215323-RHIFER-LR_0_0.5.wav.json
+  20180627_215323-RHIFER-LR_0_0.5.wav_detections.csv
+```
+
+One of the JSON files will look roughly like this:
+
+```json
+{
+  "annotated": false,
+  "annotation": [
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.889,
+      "det_prob": 0.889,
+      "end_time": 0.0668,
+      "event": "Echolocation",
+      "high_freq": 84857,
+      "individual": "-1",
+      "low_freq": 67578,
+      "start_time": 0.0
+    }
+  ]
+}
+```
+
+Very briefly:
+
+- `annotated: false` means this is a prediction file, not a reviewed annotation file.
+- `annotation` holds the list of detections.
+- Each detection includes a predicted class, detection score, class score, time bounds, and frequency bounds.
+
+For more detail, see {doc}`../explanation/interpreting-formatted-outputs`.
+If you want to save results in another format, see {doc}`../how_to/save-predictions-in-different-output-formats`.
+
+## 4. Run the same folder with a higher threshold
+
+If you want, you can also run the same folder again with a higher detection threshold and save that run in a separate output folder.
+
+```bash
+batdetect2 process directory \
+    path/to/audio \
+    path/to/outputs_threshold_05 \
+    --detection-threshold 0.5
+```
+
+Concrete example:
+
+```bash
+batdetect2 process directory \
+    example_data/audio \
+    example_outputs/threshold_05 \
+    --detection-threshold 0.5
+```
+
+Keeping this in a separate folder makes it easy to compare runs later.
+
+## 5. Run the model on a list of recordings
+
+If you only want to process selected recordings, use `file_list`.
+The list file should contain one recording path per line.
+
+Example `audio_files.txt`:
+
+```text
+path/to/audio/file_001.wav
+path/to/audio/file_002.wav
+path/to/audio/file_010.wav
+```
+
+Repository example:
+
+```text
+example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav
+example_data/audio/20180530_213516-EPTSER-LR_0_0.5.wav
+```
+
+Then run:
+
+```bash
+batdetect2 process file_list \
+    path/to/audio_files.txt \
+    path/to/selected_outputs
+```
+
+Concrete example:
+
+```bash
+batdetect2 process file_list \
+    example_data/audio_files.txt \
+    example_outputs/selected_outputs
+```
+
+This is useful when your recordings are spread across folders, or when you only want to run a chosen subset.
+
+## Common next steps
+
+- If your recordings are not all in one folder, or you want to compare input modes, see {doc}`../how_to/choose-an-inference-input-mode`.
+- If you want to save results in another format, see {doc}`../how_to/save-predictions-in-different-output-formats`.
+- If you want to choose a different model, see {doc}`../how_to/choose-a-model`.
+- If you already write code and want more control from Python, see {doc}`integrate-with-a-python-pipeline`.
+- If you want the full command reference, including `--model`, see {doc}`../reference/cli/predict`.
--- a/docs/source/tutorials/train-a-custom-model.md
+++ b/docs/source/tutorials/train-a-custom-model.md
@ -0,0 +1,208 @@
+# Train a custom model
+
+This tutorial walks through a first custom training run using your own annotations.
+
+Use it when you already have labelled recordings and want to train a model for your own data.
+
+## Before you start
+
+You need:
+
+- BatDetect2 installed.
+- labelled recordings and annotations.
+
+```{note}
+This is not the first page to start with if you only want to run the existing
+model on recordings.
+Use {doc}`run-inference-on-folder` for that.
+```
+
+## Optional: use the repository example files
+
+If you want to follow the steps with the same files shown here, clone the repository and move into it:
+
+```bash
+git clone https://github.com/macaodha/batdetect2.git
+cd batdetect2
+```
+
+## What you will do
+
+By the end of this tutorial you will have:
+
+- created a dataset config,
+- defined a targets config,
+- started a training run,
+- checked the checkpoint and log outputs,
+- identified the next pages for evaluation and customisation.
+
+## 1. Create a dataset config
+
+The dataset config explicitly declares what data you want to use for training.
+It is a YAML file.
+If YAML is new to you, see [Learn YAML in Y Minutes](https://learnxinyminutes.com/yaml/).
+
+In the dataset config, you list one or more data sources.
+Each source tells `batdetect2` where the audio recordings live and where the matching annotations are stored.
+
+BatDetect2 can read annotations from different source formats.
+In this example, we use the example data in the `batdetect2` format.
+
+Use `example_data/dataset.yaml` as a reference:
+
+```yaml
+name: example dataset
+description: Only for demonstration purposes
+sources:
+  - format: batdetect2
+    name: Example Data
+    description: Examples included for testing batdetect2
+    annotations_dir: example_data/anns
+    audio_dir: example_data/audio
+```
+
+For your own project, the main thing to change is the file paths.
+If you have several collections of recordings, you can add more than one source to the same dataset config.
+That lets you describe the full training data you want to use in one place.
+
+If you need more detail on dataset source formats, see {doc}`../reference/data-sources`.
+
+## 2. Define a targets config
+
+The targets config tells BatDetect2 how to turn your annotations into training targets.
+
+It defines two main things:
+
+- what should count as a detection,
+- which classes the model should learn to predict.
+
+In practice, this means the targets config maps the labels in your annotations to the detection and classification outputs used during training.
+
+Use `example_data/targets.yaml` as a reference:
+
+```yaml
+detection_target:
+  name: bat
+  match_if:
+    name: all_of
+    conditions:
+      - name: has_tag
+        tag: { key: event, value: Echolocation }
+      - name: not
+        condition:
+          name: has_tag
+          tag: { key: class, value: Unknown }
+  assign_tags:
+    - key: class
+      value: Bat
+
+classification_targets:
+  - name: myomys
+    tags:
+      - key: class
+        value: Myotis mystacinus
+  - name: pippip
+    tags:
+      - key: class
+        value: Pipistrellus pipistrellus
+```
+
+For your own project, update the matching rules and class definitions so they fit your labels.
+
+In this example:
+
+- `detection_target` says that echolocation calls should be treated as detections,
+- `classification_targets` define the classes the model should predict,
+
+It is worth taking a bit of time over this file, because your targets config decides what the model is actually being asked to learn.
+
+If you need help with that, see {doc}`../how_to/configure-target-definitions` and {doc}`../reference/targets-config-workflow`.
+
+## 3. Run a first training command
+
+For a first run, keep the command simple:
+
+```bash
+batdetect2 train \
+  path/to/train_dataset.yaml \
+  --val-dataset path/to/val_dataset.yaml \
+  --targets path/to/targets.yaml
+```
+
+If you are using the repository example files, run:
+
+```bash
+batdetect2 train \
+  example_data/dataset.yaml \
+  --val-dataset example_data/dataset.yaml \
+  --targets example_data/targets.yaml
+```
+
+This uses the same dataset for training and validation only to keep the example simple.
+For real training runs, you usually want separate training and validation datasets.
+
+This uses the built-in default model and training settings.
+If you want to change the model architecture later, see {doc}`../reference/model-config`.
+If you want to change optimiser settings, batch size, epochs, or checkpoint behaviour, see {doc}`../reference/training-config`.
+
+## 4. Check the training outputs
+
+After the run starts, `batdetect2` should write checkpoints and logs.
+
+By default, training logs are written with the CSV logger.
+That means you should see a log folder with a `metrics.csv` file.
+
+A typical layout looks like this:
+
+```text
+outputs/
+  checkpoints/
+    epoch=19-step=20.ckpt
+  logs/
+    version_0/
+      metrics.csv
+      hparams.yaml
+    training_artifacts/
+      train_dataset.yaml
+      val_dataset.yaml
+      targets.yaml
+      train_class_summary.csv
+      val_class_summary.csv
+```
+
+The checkpoint is the trained model you can use later for inference, evaluation, or sharing with someone else.
+
+The files in `training_artifacts/` record which datasets and targets were used for the run.
+The `hparams.yaml` file records the full training setup, including the configs used for the model, training, and other parts of the run.
+
+The `metrics.csv` file stores one row per validation epoch.
+It includes training losses as well as validation losses and metrics such as:
+
+```csv
+classification/mean_average_precision,detection/average_precision,epoch,total_loss/val
+0.10041624307632446,0.3697187900543213,0,4070.3515625
+0.11328697204589844,0.346899151802063,1,3941.6455078125
+0.1388484090566635,0.36171725392341614,2,3776.323974609375
+```
+
+You may also see class-specific metrics in extra columns.
+
+The more detailed metrics are computed from the validation set.
+If you do not provide `--val-dataset`, those validation metrics will not appear.
+
+Other logger backends are also supported, including TensorBoard, MLflow, and DVCLive.
+See {doc}`../reference/logging-config` if you want to change that.
+
+## Use the trained model
+
+You can now use the trained checkpoint in BatDetect2, or share it with someone else to use in their own runs.
+If you want to load it for inference or evaluation, see {doc}`../how_to/choose-a-model`.
+
+## Common next steps
+
+- Evaluate the trained checkpoint: {doc}`evaluate-on-a-test-set`
+- Fine-tune from a checkpoint: {doc}`../how_to/fine-tune-from-a-checkpoint`
+- Configure targets in more detail: {doc}`../how_to/configure-target-definitions`
+- Configure audio preprocessing: {doc}`../how_to/configure-audio-preprocessing`
+- Configure spectrogram preprocessing: {doc}`../how_to/configure-spectrogram-preprocessing`
+- Check full train options: {doc}`../reference/cli/train`
--- a/environment.yml
+++ b/environment.yml
@ -1,17 +0,0 @@
-name: batdetect2
-channels:
-  - defaults
-  - conda-forge
-  - pytorch
-  - nvidia
-dependencies:
-  - python==3.10
-  - matplotlib
-  - pandas
-  - scikit-learn
-  - numpy
-  - pytorch
-  - scipy
-  - torchvision
-  - librosa
-  - torchaudio
--- a/example_data/anns/20170701_213954-MYOMYS-LR_0_0.5.wav.json
+++ b/example_data/anns/20170701_213954-MYOMYS-LR_0_0.5.wav.json
@ -0,0 +1,177 @@
+{
+  "annotated": true,
+  "annotation": [
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.55,
+      "det_prob": 0.658,
+      "end_time": 0.028,
+      "event": "Echolocation",
+      "high_freq": 107492,
+      "individual": "-1",
+      "low_freq": 33203,
+      "start_time": 0.0225
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.679,
+      "det_prob": 0.742,
+      "end_time": 0.0583,
+      "event": "Echolocation",
+      "high_freq": 113192,
+      "individual": "-1",
+      "low_freq": 28046,
+      "start_time": 0.0525
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.488,
+      "det_prob": 0.585,
+      "end_time": 0.1211,
+      "event": "Echolocation",
+      "high_freq": 107008,
+      "individual": "-1",
+      "low_freq": 33203,
+      "start_time": 0.1155
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.46,
+      "det_prob": 0.503,
+      "end_time": 0.145,
+      "event": "Echolocation",
+      "high_freq": 59621,
+      "individual": "-1",
+      "low_freq": 48671,
+      "start_time": 0.1385
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.656,
+      "det_prob": 0.704,
+      "end_time": 0.1513,
+      "event": "Echolocation",
+      "high_freq": 113493,
+      "individual": "-1",
+      "low_freq": 27187,
+      "start_time": 0.1445
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.549,
+      "det_prob": 0.63,
+      "end_time": 0.2076,
+      "event": "Echolocation",
+      "high_freq": 108573,
+      "individual": "-1",
+      "low_freq": 34062,
+      "start_time": 0.2025
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.503,
+      "det_prob": 0.528,
+      "end_time": 0.224,
+      "event": "Echolocation",
+      "high_freq": 57361,
+      "individual": "-1",
+      "low_freq": 48671,
+      "start_time": 0.2195
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.672,
+      "det_prob": 0.737,
+      "end_time": 0.2374,
+      "event": "Echolocation",
+      "high_freq": 116415,
+      "individual": "-1",
+      "low_freq": 27187,
+      "start_time": 0.2315
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.65,
+      "det_prob": 0.736,
+      "end_time": 0.3058,
+      "event": "Echolocation",
+      "high_freq": 56624,
+      "individual": "-1",
+      "low_freq": 48671,
+      "start_time": 0.2995
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.687,
+      "det_prob": 0.724,
+      "end_time": 0.3312,
+      "event": "Echolocation",
+      "high_freq": 116522,
+      "individual": "-1",
+      "low_freq": 27187,
+      "start_time": 0.3245
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.547,
+      "det_prob": 0.599,
+      "end_time": 0.3762,
+      "event": "Echolocation",
+      "high_freq": 108530,
+      "individual": "-1",
+      "low_freq": 34062,
+      "start_time": 0.3705
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.664,
+      "det_prob": 0.711,
+      "end_time": 0.4184,
+      "event": "Echolocation",
+      "high_freq": 115775,
+      "individual": "-1",
+      "low_freq": 28906,
+      "start_time": 0.4125
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.544,
+      "det_prob": 0.598,
+      "end_time": 0.4423,
+      "event": "Echolocation",
+      "high_freq": 104197,
+      "individual": "-1",
+      "low_freq": 36640,
+      "start_time": 0.4365
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.73,
+      "det_prob": 0.78,
+      "end_time": 0.4803,
+      "event": "Echolocation",
+      "high_freq": 58290,
+      "individual": "-1",
+      "low_freq": 48671,
+      "start_time": 0.4745
+    },
+    {
+      "class": "Myotis mystacinus",
+      "class_prob": 0.404,
+      "det_prob": 0.449,
+      "end_time": 0.4947,
+      "event": "Echolocation",
+      "high_freq": 111336,
+      "individual": "-1",
+      "low_freq": 36640,
+      "start_time": 0.4895
+    }
+  ],
+  "class_name": "Myotis mystacinus",
+  "duration": 0.5,
+  "id": "20170701_213954-MYOMYS-LR_0_0.5.wav",
+  "issues": false,
+  "notes": "Automatically generated. Example data do not assume correct!",
+  "time_exp": 1
+}
+
--- a/example_data/anns/20180530_213516-EPTSER-LR_0_0.5.wav.json
+++ b/example_data/anns/20180530_213516-EPTSER-LR_0_0.5.wav.json
@ -0,0 +1,231 @@
+{
+  "annotated": true,
+  "annotation": [
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.744,
+      "det_prob": 0.77,
+      "end_time": 0.0162,
+      "event": "Echolocation",
+      "high_freq": 65592,
+      "individual": "-1",
+      "low_freq": 27187,
+      "start_time": 0.0085
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.453,
+      "det_prob": 0.459,
+      "end_time": 0.0255,
+      "event": "Echolocation",
+      "high_freq": 59730,
+      "individual": "-1",
+      "low_freq": 46093,
+      "start_time": 0.0205
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.668,
+      "det_prob": 0.68,
+      "end_time": 0.0499,
+      "event": "Echolocation",
+      "high_freq": 57080,
+      "individual": "-1",
+      "low_freq": 46953,
+      "start_time": 0.0445
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.729,
+      "det_prob": 0.739,
+      "end_time": 0.109,
+      "event": "Echolocation",
+      "high_freq": 62808,
+      "individual": "-1",
+      "low_freq": 44375,
+      "start_time": 0.1025
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.591,
+      "det_prob": 0.602,
+      "end_time": 0.1311,
+      "event": "Echolocation",
+      "high_freq": 56848,
+      "individual": "-1",
+      "low_freq": 46953,
+      "start_time": 0.1255
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.696,
+      "det_prob": 0.735,
+      "end_time": 0.1694,
+      "event": "Echolocation",
+      "high_freq": 67238,
+      "individual": "-1",
+      "low_freq": 28046,
+      "start_time": 0.1625
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.617,
+      "det_prob": 0.643,
+      "end_time": 0.2031,
+      "event": "Echolocation",
+      "high_freq": 57047,
+      "individual": "-1",
+      "low_freq": 46093,
+      "start_time": 0.1975
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.507,
+      "det_prob": 0.515,
+      "end_time": 0.2222,
+      "event": "Echolocation",
+      "high_freq": 58214,
+      "individual": "-1",
+      "low_freq": 47812,
+      "start_time": 0.2175
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.201,
+      "det_prob": 0.372,
+      "end_time": 0.2839,
+      "event": "Echolocation",
+      "high_freq": 55667,
+      "individual": "-1",
+      "low_freq": 33203,
+      "start_time": 0.2775
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.749,
+      "det_prob": 0.78,
+      "end_time": 0.2918,
+      "event": "Echolocation",
+      "high_freq": 60611,
+      "individual": "-1",
+      "low_freq": 45234,
+      "start_time": 0.2855
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.239,
+      "det_prob": 0.325,
+      "end_time": 0.3148,
+      "event": "Echolocation",
+      "high_freq": 54100,
+      "individual": "-1",
+      "low_freq": 30625,
+      "start_time": 0.3085
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.621,
+      "det_prob": 0.652,
+      "end_time": 0.3227,
+      "event": "Echolocation",
+      "high_freq": 63504,
+      "individual": "-1",
+      "low_freq": 27187,
+      "start_time": 0.3155
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.32,
+      "det_prob": 0.414,
+      "end_time": 0.3546,
+      "event": "Echolocation",
+      "high_freq": 37589,
+      "individual": "-1",
+      "low_freq": 27187,
+      "start_time": 0.3455
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.69,
+      "det_prob": 0.697,
+      "end_time": 0.3776,
+      "event": "Echolocation",
+      "high_freq": 57262,
+      "individual": "-1",
+      "low_freq": 46093,
+      "start_time": 0.3735
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.34,
+      "det_prob": 0.415,
+      "end_time": 0.4069,
+      "event": "Echolocation",
+      "high_freq": 52025,
+      "individual": "-1",
+      "low_freq": 31484,
+      "start_time": 0.4005
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.386,
+      "det_prob": 0.445,
+      "end_time": 0.4178,
+      "event": "Echolocation",
+      "high_freq": 53951,
+      "individual": "-1",
+      "low_freq": 27187,
+      "start_time": 0.4115
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.393,
+      "det_prob": 0.517,
+      "end_time": 0.4359,
+      "event": "Echolocation",
+      "high_freq": 51724,
+      "individual": "-1",
+      "low_freq": 30625,
+      "start_time": 0.4305
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.332,
+      "det_prob": 0.396,
+      "end_time": 0.4502,
+      "event": "Echolocation",
+      "high_freq": 58310,
+      "individual": "-1",
+      "low_freq": 27187,
+      "start_time": 0.4435
+    },
+    {
+      "class": "Pipistrellus pipistrellus",
+      "class_prob": 0.45,
+      "det_prob": 0.456,
+      "end_time": 0.4638,
+      "event": "Echolocation",
+      "high_freq": 55714,
+      "individual": "-1",
+      "low_freq": 46093,
+      "start_time": 0.4575
+    },
+    {
+      "class": "Eptesicus serotinus",
+      "class_prob": 0.719,
+      "det_prob": 0.766,
+      "end_time": 0.4824,
+      "event": "Echolocation",
+      "high_freq": 66101,
+      "individual": "-1",
+      "low_freq": 28046,
+      "start_time": 0.4755
+    }
+  ],
+  "class_name": "Pipistrellus pipistrellus",
+  "duration": 0.5,
+  "id": "20180530_213516-EPTSER-LR_0_0.5.wav",
+  "issues": false,
+  "notes": "Automatically generated. Example data do not assume correct!",
+  "time_exp": 1
+}
--- a/example_data/anns/20180627_215323-RHIFER-LR_0_0.5.wav.json
+++ b/example_data/anns/20180627_215323-RHIFER-LR_0_0.5.wav.json
@ -0,0 +1,111 @@
+{
+  "annotated": true,
+  "annotation": [
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.407,
+      "det_prob": 0.407,
+      "end_time": 0.066,
+      "event": "Echolocation",
+      "high_freq": 84254,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.0245
+    },
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.759,
+      "det_prob": 0.76,
+      "end_time": 0.1576,
+      "event": "Echolocation",
+      "high_freq": 84048,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.0955
+    },
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.754,
+      "det_prob": 0.755,
+      "end_time": 0.269,
+      "event": "Echolocation",
+      "high_freq": 83768,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.2095
+    },
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.495,
+      "det_prob": 0.495,
+      "end_time": 0.2869,
+      "event": "Echolocation",
+      "high_freq": 84055,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.2425
+    },
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.73,
+      "det_prob": 0.73,
+      "end_time": 0.3631,
+      "event": "Echolocation",
+      "high_freq": 84280,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.3055
+    },
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.648,
+      "det_prob": 0.649,
+      "end_time": 0.3798,
+      "event": "Echolocation",
+      "high_freq": 83030,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.3215
+    },
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.678,
+      "det_prob": 0.678,
+      "end_time": 0.4611,
+      "event": "Echolocation",
+      "high_freq": 84020,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.4065
+    },
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.717,
+      "det_prob": 0.718,
+      "end_time": 0.4987,
+      "event": "Echolocation",
+      "high_freq": 83603,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.4365
+    },
+    {
+      "class": "Rhinolophus ferrumequinum",
+      "class_prob": 0.662,
+      "det_prob": 0.662,
+      "end_time": 0.5503,
+      "event": "Echolocation",
+      "high_freq": 83710,
+      "individual": "-1",
+      "low_freq": 68437,
+      "start_time": 0.4975
+    }
+  ],
+  "class_name": "Rhinolophus ferrumequinum",
+  "duration": 0.5,
+  "id": "20180627_215323-RHIFER-LR_0_0.5.wav",
+  "issues": false,
+  "notes": "Automatically generated. Example data do not assume correct!",
+  "time_exp": 1
+}
+
--- a/example_data/audio_files.txt
+++ b/example_data/audio_files.txt
@ -0,0 +1,2 @@
+example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav
+example_data/audio/20180530_213516-EPTSER-LR_0_0.5.wav
--- a/example_data/configs/audio.yaml
+++ b/example_data/configs/audio.yaml
@ -0,0 +1,4 @@
+samplerate: 256000
+resample:
+  enabled: true
+  method: poly
--- a/example_data/configs/evaluation.yaml
+++ b/example_data/configs/evaluation.yaml
@ -0,0 +1,37 @@
+tasks:
+  - name: sound_event_detection
+    metrics:
+      - name: average_precision
+      - name: roc_auc
+    plots:
+      - name: pr_curve
+      - name: score_distribution
+      - name: example_detection
+  - name: sound_event_classification
+    metrics:
+      - name: average_precision
+      - name: roc_auc
+    plots:
+      - name: pr_curve
+  - name: top_class_detection
+    metrics:
+      - name: average_precision
+    plots:
+      - name: pr_curve
+      - name: confusion_matrix
+      - name: example_classification
+  - name: clip_detection
+    metrics:
+      - name: average_precision
+      - name: roc_auc
+    plots:
+      - name: pr_curve
+      - name: roc_curve
+      - name: score_distribution
+  - name: clip_classification
+    metrics:
+      - name: average_precision
+      - name: roc_auc
+    plots:
+      - name: pr_curve
+      - name: roc_curve
--- a/example_data/configs/inference.yaml
+++ b/example_data/configs/inference.yaml
@ -0,0 +1,9 @@
+loader:
+  batch_size: 8
+
+clipping:
+  enabled: true
+  duration: 0.5
+  overlap: 0.0
+  max_empty: 0.0
+  discard_empty: true
--- a/example_data/configs/logging.yaml
+++ b/example_data/configs/logging.yaml
@ -0,0 +1,2 @@
+train:
+  name: csv
--- a/example_data/configs/model.yaml
+++ b/example_data/configs/model.yaml
@ -0,0 +1,59 @@
+samplerate: 256000
+
+preprocess:
+  stft:
+    window_duration: 0.002
+    window_overlap: 0.75
+    window_fn: hann
+  frequencies:
+    max_freq: 120000
+    min_freq: 10000
+  size:
+    height: 128
+    resize_factor: 0.5
+  spectrogram_transforms:
+    - name: pcen
+      time_constant: 0.1
+      gain: 0.98
+      bias: 2
+      power: 0.5
+    - name: spectral_mean_subtraction
+
+architecture:
+  name: UNetBackbone
+  input_height: 128
+  in_channels: 1
+  encoder:
+    layers:
+      - name: FreqCoordConvDown
+        out_channels: 32
+      - name: FreqCoordConvDown
+        out_channels: 64
+      - name: LayerGroup
+        layers:
+          - name: FreqCoordConvDown
+            out_channels: 128
+          - name: ConvBlock
+            out_channels: 256
+  bottleneck:
+    channels: 256
+    layers:
+      - name: SelfAttention
+        attention_channels: 256
+  decoder:
+    layers:
+      - name: FreqCoordConvUp
+        out_channels: 64
+      - name: FreqCoordConvUp
+        out_channels: 32
+      - name: LayerGroup
+        layers:
+          - name: FreqCoordConvUp
+            out_channels: 32
+          - name: ConvBlock
+            out_channels: 32
+
+postprocess:
+  nms_kernel_size: 9
+  detection_threshold: 0.01
+  top_k_per_sec: 200
--- a/example_data/configs/outputs.yaml
+++ b/example_data/configs/outputs.yaml
@ -0,0 +1,9 @@
+format:
+  name: raw
+  include_class_scores: true
+  include_features: true
+  include_geometry: true
+
+transform:
+  detection_transforms: []
+  clip_transforms: []
--- a/example_data/configs/training.yaml
+++ b/example_data/configs/training.yaml
@ -0,0 +1,79 @@
+optimizer:
+  name: adam
+  learning_rate: 0.001
+
+scheduler:
+  name: cosine_annealing
+  t_max: 100
+
+labels:
+  sigma: 3
+
+trainer:
+  max_epochs: 10
+  check_val_every_n_epoch: 5
+
+train_loader:
+  batch_size: 8
+  shuffle: true
+
+  clipping_strategy:
+    name: random_subclip
+    duration: 0.256
+
+  augmentations:
+    enabled: true
+    audio:
+      - name: mix_audio
+        probability: 0.2
+        min_weight: 0.3
+        max_weight: 0.7
+      - name: add_echo
+        probability: 0.2
+        max_delay: 0.005
+        min_weight: 0.0
+        max_weight: 1.0
+    spectrogram:
+      - name: scale_volume
+        probability: 0.2
+        min_scaling: 0.0
+        max_scaling: 2.0
+      - name: warp
+        probability: 0.2
+        delta: 0.04
+      - name: mask_time
+        probability: 0.2
+        max_perc: 0.05
+        max_masks: 3
+      - name: mask_freq
+        probability: 0.2
+        max_perc: 0.10
+        max_masks: 3
+
+val_loader:
+  clipping_strategy:
+    name: whole_audio_padded
+    chunk_size: 0.256
+
+loss:
+  detection:
+    weight: 1.0
+    focal:
+      beta: 4
+      alpha: 2
+  classification:
+    weight: 2.0
+    focal:
+      beta: 4
+      alpha: 2
+  size:
+    weight: 0.1
+
+validation:
+  tasks:
+    - name: sound_event_detection
+      metrics:
+        - name: average_precision
+    - name: sound_event_classification
+      metrics:
+        - name: average_precision
--- a/example_data/dataset.yaml
+++ b/example_data/dataset.yaml
@ -0,0 +1,8 @@
+name: example dataset
+description: Only for demonstration purposes
+sources:
+  - format: batdetect2
+    name: Example Data
+    description: Examples included for testing batdetect2
+    annotations_dir: example_data/anns
+    audio_dir: example_data/audio
--- a/Show More
+++ b/Show More