Start cli docs

This commit is contained in:
mbsantiago 2026-03-28 11:28:52 +00:00
parent 9ec2f1a107
commit 7277151f33
21 changed files with 446 additions and 17 deletions

View File

@ -19,6 +19,7 @@ extensions = [
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
"sphinxcontrib.autodoc_pydantic",
"sphinx_click",
"numpydoc",
"myst_parser",
"sphinx_autodoc_typehints",
@ -38,9 +39,19 @@ source_suffix = {
html_theme = "sphinx_book_theme"
html_static_path = ["_static"]
html_theme_options = {
"home_page_in_toc": True,
}
intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
"click": ("https://click.palletsprojects.com/en/stable/", None),
"librosa": ("https://librosa.org/doc/latest/", None),
"lightning": ("https://lightning.ai/docs/pytorch/stable/", None),
"loguru": ("https://loguru.readthedocs.io/en/stable/", None),
"numpy": ("https://numpy.org/doc/stable/", None),
"omegaconf": ("https://omegaconf.readthedocs.io/en/latest/", None),
"pytorch": ("https://pytorch.org/docs/stable/", None),
"soundevent": ("https://mbsantiago.github.io/soundevent/", None),
"pydantic": ("https://docs.pydantic.dev/latest/", None),
"xarray": ("https://docs.xarray.dev/en/stable/", None),

View File

@ -0,0 +1,135 @@
# Documentation Architecture and Migration Plan (Phase 0)
This page defines the Phase 0 documentation architecture and inventory for
reorganizing `batdetect2` documentation using the Diataxis framework.
## Scope and goals
Phase 0 focuses on architecture and prioritization only. It does not attempt
to write all new docs yet.
Primary goals:
1. Define a target docs architecture by Diataxis type.
2. Map current pages to target documentation types.
3. Identify what to keep, split, rewrite, or deprecate.
4. Set priorities for implementation phases.
## Audiences
Two primary audiences are in scope.
1. Ecologists who prefer minimal coding, focused on practical workflows:
run inference, inspect outputs, and possibly train with custom data.
2. Ecologists or bioacousticians who are Python-savvy and want to customize
workflows, training, and analysis.
## Target information architecture
The target architecture uses four top-level documentation sections.
1. Tutorials
- Learning-oriented, single-path, reproducible walkthroughs.
2. How-to guides
- Task-oriented procedures for common real goals.
3. Reference
- Factual descriptions of CLI, configs, APIs, and formats.
4. Explanation
- Conceptual material that explains why design and workflow decisions
matter.
Cross-cutting navigation conventions:
- Every page starts with audience, prerequisites, and outcome.
- Every page serves one Diataxis type only.
- Beginner-first path is prioritized, with clear links to advanced pages.
## Phase 0 inventory: current docs mapped to Diataxis
Legend:
- Keep: useful as-is with minor edits.
- Split: contains mixed documentation types and should be separated.
- Rewrite: major changes needed to fit target audience/type.
- Move: content is valid but belongs under another section.
| Current page | Current role | Target type | Audience | Action | Priority |
| --- | --- | --- | --- | --- | --- |
| `README.md` | Mixed quickstart + CLI + API + warning | Tutorial + How-to + Explanation (split) | 1 + 2 | Split | P0 |
| `docs/source/index.md` | Sparse landing page | Navigation hub | 1 + 2 | Rewrite | P0 |
| `docs/source/architecture.md` | Internal architecture deep dive | Explanation + developer reference | 2 | Move/trim | P2 |
| `docs/source/postprocessing.md` | Concept + config + internals + usage | Explanation + How-to + Reference (split) | 1 + 2 | Split | P1 |
| `docs/source/preprocessing/index.md` | Conceptual overview with some procedural flow | Explanation | 2 (and 1 optional) | Keep/trim | P2 |
| `docs/source/preprocessing/audio.md` | Detailed configuration and behavior | Reference + How-to fragments | 2 | Split | P2 |
| `docs/source/preprocessing/spectrogram.md` | Detailed configuration and behavior | Reference + How-to fragments | 2 | Split | P2 |
| `docs/source/preprocessing/usage.md` | Usage patterns + concept | How-to + Explanation (split) | 2 | Split | P1 |
| `docs/source/data/index.md` | Data-loading section index | Reference index | 2 | Keep/update | P2 |
| `docs/source/data/aoef.md` | Config and examples | How-to + Reference (split) | 2 | Split | P1 |
| `docs/source/data/legacy.md` | Legacy formats and config | How-to + Reference (split) | 2 | Split | P2 |
| `docs/source/targets/index.md` | Long conceptual + process overview | Explanation + How-to (split) | 2 | Split | P2 |
| `docs/source/targets/tags_and_terms.md` | Definitions + guidance | Explanation + Reference | 2 | Split | P2 |
| `docs/source/targets/filtering.md` | Procedure + config | How-to + Reference | 2 | Split | P2 |
| `docs/source/targets/transform.md` | Procedure + config | How-to + Reference | 2 | Split | P2 |
| `docs/source/targets/classes.md` | Procedure + config | How-to + Reference | 2 | Split | P2 |
| `docs/source/targets/rois.md` | Concept + mapping details | Explanation + Reference | 2 | Split | P2 |
| `docs/source/targets/use.md` | Integration overview | Explanation | 2 | Keep/trim | P2 |
| `docs/source/reference/index.md` | Small reference root | Reference | 2 | Expand | P1 |
| `docs/source/reference/configs.md` | Autodoc for configs | Reference | 2 | Keep | P1 |
| `docs/source/reference/targets.md` | Autodoc for targets | Reference | 2 | Keep | P2 |
## CLI and API documentation gaps (from code surface)
Current command surface includes:
- `batdetect2 detect` (compat command)
- `batdetect2 predict directory`
- `batdetect2 predict file_list`
- `batdetect2 predict dataset`
- `batdetect2 train`
- `batdetect2 evaluate`
- `batdetect2 data summary`
- `batdetect2 data convert`
These commands are not yet represented as a coherent user-facing task set.
Priority gap actions:
1. Add CLI reference pages for command signatures and options.
2. Add beginner how-to pages for practical command recipes.
3. Add migration guidance from `detect` to `predict` workflows.
## Priority architecture for implementation phases
### P0 (this phase): architecture and inventory
- Done in this file.
- Define structure and classify existing material.
### P1: user-critical docs for running the model
1. Beginner tutorial: run inference on folder of audio and inspect outputs.
2. How-to guides for repeatable inference tasks and threshold tuning.
3. Reference: complete CLI docs for prediction and outputs.
4. Explanation: interpretation caveats and validation guidance.
### P2: advanced customization and training
1. How-to guides for custom dataset preparation and training.
2. Reference for data formats, targets, and preprocessing configs.
3. Explanation docs for target design and pipeline trade-offs.
### P3: polish and contributor consistency
1. Tight cross-linking across Diataxis boundaries.
2. Consistent page templates and terminology.
3. Reader testing with representative users from both audiences.
## Definition of done for Phase 0
Phase 0 is complete when:
1. The target architecture is defined.
2. Existing content is inventoried and classified.
3. Prioritized migration path is agreed.
This page satisfies these criteria and is the baseline for Phase 1 work.

View File

@ -0,0 +1,10 @@
# Explanation
Explanation pages describe why BatDetect2 behaves as it does and how to reason
about trade-offs.
```{toctree}
:maxdepth: 1
model-output-and-validation
```

View File

@ -0,0 +1,29 @@
# Model output and validation
BatDetect2 outputs model predictions, not ground truth. The same configuration
can behave differently across recording conditions, species compositions, and
acoustic environments.
## Why threshold choice matters
- Lower detection thresholds increase sensitivity but can increase false
positives.
- Higher thresholds reduce false positives but can miss faint calls.
No threshold is universally correct. The right setting depends on your survey
objectives and tolerance for false positives versus missed detections.
## Why local validation is required
Model performance depends on how similar your data are to training data.
Before ecological interpretation, validate predictions on a representative,
locally reviewed subset.
Recommended validation checks:
1. Compare detection counts against expert-reviewed clips.
2. Inspect species-level predictions for plausible confusion patterns.
3. Repeat checks across sites, seasons, and recorder setups.
For practical threshold workflows, see
{doc}`../how_to/tune-detection-threshold`.

View File

@ -0,0 +1,83 @@
# Getting started
BatDetect2 is both a command line tool (CLI) and a Python library.
- Use the CLI if you want to run existing models or train your own models from
the terminal.
- Use the Python package if you want to integrate BatDetect2 into your own
scripts, notebooks, or analysis pipeline.
If you want to try BatDetect2 before installing anything locally:
- [Hugging Face demo (UK species)](https://huggingface.co/spaces/macaodha/batdetect2)
- [Google Colab notebook](https://colab.research.google.com/github/macaodha/batdetect2/blob/master/batdetect2_notebook.ipynb)
## Prerequisites
We recommend `uv` for both workflows.
`uv` is a fast Python package and environment manager that keeps installs
isolated and reproducible.
- Use `uv tool` to install the CLI.
- Use `uv add` to add `batdetect2` as a dependency in a Python project.
Install `uv` first by following their
[installation instructions](https://docs.astral.sh/uv/getting-started/installation/).
## Install the CLI
The following installs `batdetect2` in an isolated tool environment and exposes
the `batdetect2` command on your machine.
```bash
uv tool install batdetect2
```
If you need to upgrade later:
```bash
uv tool upgrade batdetect2
```
Verify the CLI is available:
```bash
batdetect2 --help
```
Run your first workflow:
Go to {doc}`tutorials/run-inference-on-folder` for a complete first run.
## Integrate with your Python project
If you are using BatDetect2 from Python code, add it to your project
dependencies:
```bash
uv add batdetect2
```
This keeps dependency metadata and the environment in sync.
### Alternative with `pip`
If you prefer `pip`, create and activate a virtual environment first:
```bash
python -m venv .venv
source .venv/bin/activate
```
Then install from PyPI:
```bash
pip install batdetect2
```
## What's next
- Run your first detection workflow:
{doc}`tutorials/run-inference-on-folder`
- For practical task recipes, go to {doc}`how_to/index`
- For command and option details, go to {doc}`reference/cli`

View File

@ -0,0 +1,15 @@
# How-to Guides
How-to guides help you complete specific tasks while working.
## Who this section is for
- Ecologists running repeat analyses.
- Python-savvy users integrating BatDetect2 into workflows.
```{toctree}
:maxdepth: 1
run-batch-predictions
tune-detection-threshold
```

View File

@ -0,0 +1,30 @@
# How to run batch predictions
This guide shows practical command patterns for directory-based and file-list
prediction runs.
## Predict from a directory
```bash
batdetect2 predict directory \
path/to/model.ckpt \
path/to/audio_dir \
path/to/outputs
```
## Predict from a file list
```bash
batdetect2 predict file_list \
path/to/model.ckpt \
path/to/audio_files.txt \
path/to/outputs
```
## Useful options
- `--batch-size` to control throughput.
- `--workers` to set data-loading parallelism.
- `--format` to select output format.
For complete option details, see {doc}`../reference/cli`.

View File

@ -0,0 +1,25 @@
# How to tune detection threshold
Use this guide to compare detection outputs at different threshold values.
## 1) Start with a baseline run
Run an initial prediction workflow and keep outputs in a dedicated folder.
## 2) Sweep threshold values
If you use the legacy `detect` command, run multiple thresholds (for example,
`0.1`, `0.3`, `0.5`) and compare output counts and quality on a validation
subset.
## 3) Validate against known calls
Use files with trusted annotations or expert review to select a threshold that
fits your project goals.
## 4) Record your chosen setting
Write down the chosen threshold and rationale so analyses are reproducible.
For conceptual trade-offs, see
{doc}`../explanation/model-output-and-validation`.

View File

@ -1,15 +1,28 @@
# batdetect2 documentation
# Home
Hi!
Welcome.
This documentation is being reorganized using the Diataxis framework.
See the current planning and migration inventory here:
- {doc}`documentation_plan`
```{toctree}
:maxdepth: 1
:caption: Contents:
:caption: Get Started
architecture
data/index
preprocessing/index
postprocessing
targets/index
getting_started
documentation_plan
tutorials/index
how_to/index
reference/index
explanation/index
legacy/index
```
## Audience paths
- Ecologists who want low-code workflows should start with
{doc}`tutorials/index`.
- Python-savvy ecologists and bioacousticians can jump to
{doc}`how_to/index` and {doc}`reference/index`.

View File

@ -0,0 +1,14 @@
# Legacy documentation
These pages contain existing technical material that predates the Diataxis
reorganization. They remain available during migration.
```{toctree}
:maxdepth: 1
../architecture
../data/index
../preprocessing/index
../postprocessing
../targets/index
```

View File

@ -0,0 +1,7 @@
CLI reference
=============
.. click:: batdetect2.cli:cli
:prog: batdetect2
:nested: full

View File

@ -1,7 +1,7 @@
# Config Reference
```{eval-rst}
.. automodule:: batdetect2.configs
.. automodule:: batdetect2.config
:members:
:inherited-members: pydantic.BaseModel
```

View File

@ -1,10 +1,12 @@
# Reference documentation
```{eval-rst}
.. toctree::
:maxdepth: 1
:caption: Contents:
Reference pages provide factual, complete descriptions of commands,
configuration, and data structures.
configs
targets
```{toctree}
:maxdepth: 1
cli
configs
targets
```

View File

@ -74,5 +74,6 @@ filtering
transform
classes
rois
labels
use
```

View File

@ -0,0 +1,15 @@
# Tutorials
Tutorials are for learning by doing. They provide a single, reproducible path
to a concrete outcome.
## Who this section is for
- Ecologists who want practical workflows with minimal coding.
- New users who want to build confidence before customization.
```{toctree}
:maxdepth: 1
run-inference-on-folder
```

View File

@ -0,0 +1,31 @@
# Tutorial: Run inference on a folder of audio files
## Prerequisites
- BatDetect2 installed in your environment.
- A folder containing `.wav` files.
- A model checkpoint path.
## Steps
1. Choose your input and output directories.
2. Run prediction with the CLI.
3. Verify output files were written.
4. Inspect predictions and confidence scores.
## Example command
```bash
batdetect2 predict directory \
path/to/model.ckpt \
path/to/audio_dir \
path/to/outputs
```
## What to do next
- Use {doc}`../how_to/tune-detection-threshold` to tune sensitivity.
- Use {doc}`../reference/cli` for full command options.
Note: this is the initial Phase 1 scaffold and will be expanded with a full,
validated end-to-end walkthrough.

View File

@ -80,6 +80,7 @@ dev = [
"numpydoc>=1.8.0",
"sphinx-autodoc-typehints>=2.3.0",
"sphinx-book-theme>=1.1.4",
"sphinx-click>=6.1.0",
"autodoc-pydantic>=2.2.0",
"pytest-cov>=6.1.1",
"ty>=0.0.1a12",

View File

@ -8,7 +8,8 @@ __all__ = ["data"]
@cli.group()
def data(): ...
def data():
"""Inspect and convert dataset configuration files."""
@data.command()
@ -37,6 +38,7 @@ def summary(
targets_path: Path | None = None,
base_dir: Path | None = None,
):
"""Show annotation counts and optional class summary."""
from batdetect2.data import compute_class_summary, load_dataset_from_config
from batdetect2.targets import load_targets

View File

@ -40,6 +40,7 @@ def evaluate_command(
experiment_name: str | None = None,
run_name: str | None = None,
):
"""Evaluate a checkpoint against a configured test dataset."""
from batdetect2.api_v2 import BatDetect2API
from batdetect2.audio import AudioConfig
from batdetect2.data import load_dataset_from_config

View File

@ -12,7 +12,7 @@ __all__ = ["predict"]
@cli.group(name="predict")
def predict() -> None:
"""Run prediction with BatDetect2 API v2."""
"""Run model inference on audio using API v2."""
def _build_api(
@ -126,6 +126,7 @@ def predict_directory_command(
num_workers: int,
format_name: str | None,
) -> None:
"""Predict on all audio files in a directory."""
audio_files = list(get_audio_files(audio_dir))
_run_prediction(
model_path=model_path,
@ -164,6 +165,7 @@ def predict_file_list_command(
num_workers: int,
format_name: str | None,
) -> None:
"""Predict on audio files listed in a text file."""
file_list = Path(file_list)
audio_files = [
Path(line.strip())
@ -208,6 +210,7 @@ def predict_dataset_command(
num_workers: int,
format_name: str | None,
) -> None:
"""Predict on recordings referenced in an annotation dataset."""
dataset_path = Path(dataset_path)
dataset = io.load(dataset_path, type="annotation_set")
audio_files = sorted(

View File

@ -49,6 +49,7 @@ def train_command(
experiment_name: str | None = None,
run_name: str | None = None,
):
"""Train a model from dataset configs or a checkpoint."""
from batdetect2.api_v2 import BatDetect2API
from batdetect2.audio import AudioConfig
from batdetect2.config import BatDetect2Config