Start cli docs

2026-07-07 21:00:10 +02:00 · 2026-03-28 11:28:52 +00:00 · 2026-03-28 11:28:52 +00:00 · 7277151f33
commit 7277151f33
parent 9ec2f1a107
21 changed files with 446 additions and 17 deletions
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -19,6 +19,7 @@ extensions = [
    "sphinx.ext.autosummary",
    "sphinx.ext.intersphinx",
    "sphinxcontrib.autodoc_pydantic",
+    "sphinx_click",
    "numpydoc",
    "myst_parser",
    "sphinx_autodoc_typehints",
@ -38,9 +39,19 @@ source_suffix = {

 html_theme = "sphinx_book_theme"
 html_static_path = ["_static"]
+html_theme_options = {
+    "home_page_in_toc": True,
+}

 intersphinx_mapping = {
    "python": ("https://docs.python.org/3", None),
+    "click": ("https://click.palletsprojects.com/en/stable/", None),
+    "librosa": ("https://librosa.org/doc/latest/", None),
+    "lightning": ("https://lightning.ai/docs/pytorch/stable/", None),
+    "loguru": ("https://loguru.readthedocs.io/en/stable/", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+    "omegaconf": ("https://omegaconf.readthedocs.io/en/latest/", None),
+    "pytorch": ("https://pytorch.org/docs/stable/", None),
    "soundevent": ("https://mbsantiago.github.io/soundevent/", None),
    "pydantic": ("https://docs.pydantic.dev/latest/", None),
    "xarray": ("https://docs.xarray.dev/en/stable/", None),
--- a/docs/source/documentation_plan.md
+++ b/docs/source/documentation_plan.md
@ -0,0 +1,135 @@
+# Documentation Architecture and Migration Plan (Phase 0)
+
+This page defines the Phase 0 documentation architecture and inventory for
+reorganizing `batdetect2` documentation using the Diataxis framework.
+
+## Scope and goals
+
+Phase 0 focuses on architecture and prioritization only. It does not attempt
+to write all new docs yet.
+
+Primary goals:
+
+1. Define a target docs architecture by Diataxis type.
+2. Map current pages to target documentation types.
+3. Identify what to keep, split, rewrite, or deprecate.
+4. Set priorities for implementation phases.
+
+## Audiences
+
+Two primary audiences are in scope.
+
+1. Ecologists who prefer minimal coding, focused on practical workflows:
+   run inference, inspect outputs, and possibly train with custom data.
+2. Ecologists or bioacousticians who are Python-savvy and want to customize
+   workflows, training, and analysis.
+
+## Target information architecture
+
+The target architecture uses four top-level documentation sections.
+
+1. Tutorials
+   - Learning-oriented, single-path, reproducible walkthroughs.
+2. How-to guides
+   - Task-oriented procedures for common real goals.
+3. Reference
+   - Factual descriptions of CLI, configs, APIs, and formats.
+4. Explanation
+   - Conceptual material that explains why design and workflow decisions
+     matter.
+
+Cross-cutting navigation conventions:
+
+- Every page starts with audience, prerequisites, and outcome.
+- Every page serves one Diataxis type only.
+- Beginner-first path is prioritized, with clear links to advanced pages.
+
+## Phase 0 inventory: current docs mapped to Diataxis
+
+Legend:
+
+- Keep: useful as-is with minor edits.
+- Split: contains mixed documentation types and should be separated.
+- Rewrite: major changes needed to fit target audience/type.
+- Move: content is valid but belongs under another section.
+
+| Current page | Current role | Target type | Audience | Action | Priority |
+| --- | --- | --- | --- | --- | --- |
+| `README.md` | Mixed quickstart + CLI + API + warning | Tutorial + How-to + Explanation (split) | 1 + 2 | Split | P0 |
+| `docs/source/index.md` | Sparse landing page | Navigation hub | 1 + 2 | Rewrite | P0 |
+| `docs/source/architecture.md` | Internal architecture deep dive | Explanation + developer reference | 2 | Move/trim | P2 |
+| `docs/source/postprocessing.md` | Concept + config + internals + usage | Explanation + How-to + Reference (split) | 1 + 2 | Split | P1 |
+| `docs/source/preprocessing/index.md` | Conceptual overview with some procedural flow | Explanation | 2 (and 1 optional) | Keep/trim | P2 |
+| `docs/source/preprocessing/audio.md` | Detailed configuration and behavior | Reference + How-to fragments | 2 | Split | P2 |
+| `docs/source/preprocessing/spectrogram.md` | Detailed configuration and behavior | Reference + How-to fragments | 2 | Split | P2 |
+| `docs/source/preprocessing/usage.md` | Usage patterns + concept | How-to + Explanation (split) | 2 | Split | P1 |
+| `docs/source/data/index.md` | Data-loading section index | Reference index | 2 | Keep/update | P2 |
+| `docs/source/data/aoef.md` | Config and examples | How-to + Reference (split) | 2 | Split | P1 |
+| `docs/source/data/legacy.md` | Legacy formats and config | How-to + Reference (split) | 2 | Split | P2 |
+| `docs/source/targets/index.md` | Long conceptual + process overview | Explanation + How-to (split) | 2 | Split | P2 |
+| `docs/source/targets/tags_and_terms.md` | Definitions + guidance | Explanation + Reference | 2 | Split | P2 |
+| `docs/source/targets/filtering.md` | Procedure + config | How-to + Reference | 2 | Split | P2 |
+| `docs/source/targets/transform.md` | Procedure + config | How-to + Reference | 2 | Split | P2 |
+| `docs/source/targets/classes.md` | Procedure + config | How-to + Reference | 2 | Split | P2 |
+| `docs/source/targets/rois.md` | Concept + mapping details | Explanation + Reference | 2 | Split | P2 |
+| `docs/source/targets/use.md` | Integration overview | Explanation | 2 | Keep/trim | P2 |
+| `docs/source/reference/index.md` | Small reference root | Reference | 2 | Expand | P1 |
+| `docs/source/reference/configs.md` | Autodoc for configs | Reference | 2 | Keep | P1 |
+| `docs/source/reference/targets.md` | Autodoc for targets | Reference | 2 | Keep | P2 |
+
+## CLI and API documentation gaps (from code surface)
+
+Current command surface includes:
+
+- `batdetect2 detect` (compat command)
+- `batdetect2 predict directory`
+- `batdetect2 predict file_list`
+- `batdetect2 predict dataset`
+- `batdetect2 train`
+- `batdetect2 evaluate`
+- `batdetect2 data summary`
+- `batdetect2 data convert`
+
+These commands are not yet represented as a coherent user-facing task set.
+
+Priority gap actions:
+
+1. Add CLI reference pages for command signatures and options.
+2. Add beginner how-to pages for practical command recipes.
+3. Add migration guidance from `detect` to `predict` workflows.
+
+## Priority architecture for implementation phases
+
+### P0 (this phase): architecture and inventory
+
+- Done in this file.
+- Define structure and classify existing material.
+
+### P1: user-critical docs for running the model
+
+1. Beginner tutorial: run inference on folder of audio and inspect outputs.
+2. How-to guides for repeatable inference tasks and threshold tuning.
+3. Reference: complete CLI docs for prediction and outputs.
+4. Explanation: interpretation caveats and validation guidance.
+
+### P2: advanced customization and training
+
+1. How-to guides for custom dataset preparation and training.
+2. Reference for data formats, targets, and preprocessing configs.
+3. Explanation docs for target design and pipeline trade-offs.
+
+### P3: polish and contributor consistency
+
+1. Tight cross-linking across Diataxis boundaries.
+2. Consistent page templates and terminology.
+3. Reader testing with representative users from both audiences.
+
+## Definition of done for Phase 0
+
+Phase 0 is complete when:
+
+1. The target architecture is defined.
+2. Existing content is inventoried and classified.
+3. Prioritized migration path is agreed.
+
+This page satisfies these criteria and is the baseline for Phase 1 work.
--- a/docs/source/explanation/index.md
+++ b/docs/source/explanation/index.md
@ -0,0 +1,10 @@
+# Explanation
+
+Explanation pages describe why BatDetect2 behaves as it does and how to reason
+about trade-offs.
+
+```{toctree}
+:maxdepth: 1
+
+model-output-and-validation
+```
--- a/docs/source/explanation/model-output-and-validation.md
+++ b/docs/source/explanation/model-output-and-validation.md
@ -0,0 +1,29 @@
+# Model output and validation
+
+BatDetect2 outputs model predictions, not ground truth. The same configuration
+can behave differently across recording conditions, species compositions, and
+acoustic environments.
+
+## Why threshold choice matters
+
+- Lower detection thresholds increase sensitivity but can increase false
+  positives.
+- Higher thresholds reduce false positives but can miss faint calls.
+
+No threshold is universally correct. The right setting depends on your survey
+objectives and tolerance for false positives versus missed detections.
+
+## Why local validation is required
+
+Model performance depends on how similar your data are to training data.
+Before ecological interpretation, validate predictions on a representative,
+locally reviewed subset.
+
+Recommended validation checks:
+
+1. Compare detection counts against expert-reviewed clips.
+2. Inspect species-level predictions for plausible confusion patterns.
+3. Repeat checks across sites, seasons, and recorder setups.
+
+For practical threshold workflows, see
+{doc}`../how_to/tune-detection-threshold`.
--- a/docs/source/getting_started.md
+++ b/docs/source/getting_started.md
@ -0,0 +1,83 @@
+# Getting started
+
+BatDetect2 is both a command line tool (CLI) and a Python library.
+
+- Use the CLI if you want to run existing models or train your own models from
+  the terminal.
+- Use the Python package if you want to integrate BatDetect2 into your own
+  scripts, notebooks, or analysis pipeline.
+
+If you want to try BatDetect2 before installing anything locally:
+
+- [Hugging Face demo (UK species)](https://huggingface.co/spaces/macaodha/batdetect2)
+- [Google Colab notebook](https://colab.research.google.com/github/macaodha/batdetect2/blob/master/batdetect2_notebook.ipynb)
+
+## Prerequisites
+
+We recommend `uv` for both workflows.
+`uv` is a fast Python package and environment manager that keeps installs
+isolated and reproducible.
+
+- Use `uv tool` to install the CLI.
+- Use `uv add` to add `batdetect2` as a dependency in a Python project.
+
+Install `uv` first by following their
+[installation instructions](https://docs.astral.sh/uv/getting-started/installation/).
+
+## Install the CLI
+
+The following installs `batdetect2` in an isolated tool environment and exposes
+the `batdetect2` command on your machine.
+
+```bash
+uv tool install batdetect2
+```
+
+If you need to upgrade later:
+
+```bash
+uv tool upgrade batdetect2
+```
+
+Verify the CLI is available:
+
+```bash
+batdetect2 --help
+```
+
+Run your first workflow:
+
+Go to {doc}`tutorials/run-inference-on-folder` for a complete first run.
+
+## Integrate with your Python project
+
+If you are using BatDetect2 from Python code, add it to your project
+dependencies:
+
+```bash
+uv add batdetect2
+```
+
+This keeps dependency metadata and the environment in sync.
+
+### Alternative with `pip`
+
+If you prefer `pip`, create and activate a virtual environment first:
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+```
+
+Then install from PyPI:
+
+```bash
+pip install batdetect2
+```
+
+## What's next
+
+- Run your first detection workflow:
+  {doc}`tutorials/run-inference-on-folder`
+- For practical task recipes, go to {doc}`how_to/index`
+- For command and option details, go to {doc}`reference/cli`
--- a/docs/source/how_to/index.md
+++ b/docs/source/how_to/index.md
@ -0,0 +1,15 @@
+# How-to Guides
+
+How-to guides help you complete specific tasks while working.
+
+## Who this section is for
+
+- Ecologists running repeat analyses.
+- Python-savvy users integrating BatDetect2 into workflows.
+
+```{toctree}
+:maxdepth: 1
+
+run-batch-predictions
+tune-detection-threshold
+```
--- a/docs/source/how_to/run-batch-predictions.md
+++ b/docs/source/how_to/run-batch-predictions.md
@ -0,0 +1,30 @@
+# How to run batch predictions
+
+This guide shows practical command patterns for directory-based and file-list
+prediction runs.
+
+## Predict from a directory
+
+```bash
+batdetect2 predict directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs
+```
+
+## Predict from a file list
+
+```bash
+batdetect2 predict file_list \
+  path/to/model.ckpt \
+  path/to/audio_files.txt \
+  path/to/outputs
+```
+
+## Useful options
+
+- `--batch-size` to control throughput.
+- `--workers` to set data-loading parallelism.
+- `--format` to select output format.
+
+For complete option details, see {doc}`../reference/cli`.
--- a/docs/source/how_to/tune-detection-threshold.md
+++ b/docs/source/how_to/tune-detection-threshold.md
@ -0,0 +1,25 @@
+# How to tune detection threshold
+
+Use this guide to compare detection outputs at different threshold values.
+
+## 1) Start with a baseline run
+
+Run an initial prediction workflow and keep outputs in a dedicated folder.
+
+## 2) Sweep threshold values
+
+If you use the legacy `detect` command, run multiple thresholds (for example,
+`0.1`, `0.3`, `0.5`) and compare output counts and quality on a validation
+subset.
+
+## 3) Validate against known calls
+
+Use files with trusted annotations or expert review to select a threshold that
+fits your project goals.
+
+## 4) Record your chosen setting
+
+Write down the chosen threshold and rationale so analyses are reproducible.
+
+For conceptual trade-offs, see
+{doc}`../explanation/model-output-and-validation`.
--- a/docs/source/index.md
+++ b/docs/source/index.md
@ -1,15 +1,28 @@
-# batdetect2 documentation
+# Home

-Hi!
+Welcome.
+
+This documentation is being reorganized using the Diataxis framework.
+See the current planning and migration inventory here:
+
+- {doc}`documentation_plan`

 ```{toctree}
 :maxdepth: 1
-:caption: Contents:
+:caption: Get Started

-architecture
-data/index
-preprocessing/index
-postprocessing
-targets/index
+getting_started
+documentation_plan
+tutorials/index
+how_to/index
 reference/index
+explanation/index
+legacy/index
 ```
+
+## Audience paths
+
+- Ecologists who want low-code workflows should start with
+  {doc}`tutorials/index`.
+- Python-savvy ecologists and bioacousticians can jump to
+  {doc}`how_to/index` and {doc}`reference/index`.
--- a/docs/source/legacy/index.md
+++ b/docs/source/legacy/index.md
@ -0,0 +1,14 @@
+# Legacy documentation
+
+These pages contain existing technical material that predates the Diataxis
+reorganization. They remain available during migration.
+
+```{toctree}
+:maxdepth: 1
+
+../architecture
+../data/index
+../preprocessing/index
+../postprocessing
+../targets/index
+```
--- a/docs/source/reference/cli.rst
+++ b/docs/source/reference/cli.rst
@ -0,0 +1,7 @@
+CLI reference
+=============
+
+.. click:: batdetect2.cli:cli
+   :prog: batdetect2
+   :nested: full
+
--- a/docs/source/reference/configs.md
+++ b/docs/source/reference/configs.md
@ -1,7 +1,7 @@
 # Config Reference

 ```{eval-rst}
-.. automodule:: batdetect2.configs
+.. automodule:: batdetect2.config
    :members:
    :inherited-members: pydantic.BaseModel
 ```
--- a/docs/source/reference/index.md
+++ b/docs/source/reference/index.md
@ -1,10 +1,12 @@
 # Reference documentation

-```{eval-rst}
-.. toctree::
-   :maxdepth: 1
-   :caption: Contents:
+Reference pages provide factual, complete descriptions of commands,
+configuration, and data structures.

-   configs
-   targets
+```{toctree}
+:maxdepth: 1
+
+cli
+configs
+targets
 ```
--- a/docs/source/targets/index.md
+++ b/docs/source/targets/index.md
@ -74,5 +74,6 @@ filtering
 transform
 classes
 rois
+labels
 use
 ```
--- a/docs/source/tutorials/index.md
+++ b/docs/source/tutorials/index.md
@ -0,0 +1,15 @@
+# Tutorials
+
+Tutorials are for learning by doing. They provide a single, reproducible path
+to a concrete outcome.
+
+## Who this section is for
+
+- Ecologists who want practical workflows with minimal coding.
+- New users who want to build confidence before customization.
+
+```{toctree}
+:maxdepth: 1
+
+run-inference-on-folder
+```
--- a/docs/source/tutorials/run-inference-on-folder.md
+++ b/docs/source/tutorials/run-inference-on-folder.md
@ -0,0 +1,31 @@
+# Tutorial: Run inference on a folder of audio files
+
+## Prerequisites
+
+- BatDetect2 installed in your environment.
+- A folder containing `.wav` files.
+- A model checkpoint path.
+
+## Steps
+
+1. Choose your input and output directories.
+2. Run prediction with the CLI.
+3. Verify output files were written.
+4. Inspect predictions and confidence scores.
+
+## Example command
+
+```bash
+batdetect2 predict directory \
+  path/to/model.ckpt \
+  path/to/audio_dir \
+  path/to/outputs
+```
+
+## What to do next
+
+- Use {doc}`../how_to/tune-detection-threshold` to tune sensitivity.
+- Use {doc}`../reference/cli` for full command options.
+
+Note: this is the initial Phase 1 scaffold and will be expanded with a full,
+validated end-to-end walkthrough.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -80,6 +80,7 @@ dev = [
  "numpydoc>=1.8.0",
  "sphinx-autodoc-typehints>=2.3.0",
  "sphinx-book-theme>=1.1.4",
+  "sphinx-click>=6.1.0",
  "autodoc-pydantic>=2.2.0",
  "pytest-cov>=6.1.1",
  "ty>=0.0.1a12",
--- a/src/batdetect2/cli/data.py
+++ b/src/batdetect2/cli/data.py
@ -8,7 +8,8 @@ __all__ = ["data"]


@cli.group()
-def data(): ...
+def data():
+    """Inspect and convert dataset configuration files."""


@data.command()
@ -37,6 +38,7 @@ def summary(
    targets_path: Path | None = None,
    base_dir: Path | None = None,
 ):
+    """Show annotation counts and optional class summary."""
    from batdetect2.data import compute_class_summary, load_dataset_from_config
    from batdetect2.targets import load_targets

--- a/src/batdetect2/cli/evaluate.py
+++ b/src/batdetect2/cli/evaluate.py
@ -40,6 +40,7 @@ def evaluate_command(
    experiment_name: str | None = None,
    run_name: str | None = None,
 ):
+    """Evaluate a checkpoint against a configured test dataset."""
    from batdetect2.api_v2 import BatDetect2API
    from batdetect2.audio import AudioConfig
    from batdetect2.data import load_dataset_from_config
--- a/src/batdetect2/cli/inference.py
+++ b/src/batdetect2/cli/inference.py
@ -12,7 +12,7 @@ __all__ = ["predict"]

@cli.group(name="predict")
 def predict() -> None:
-    """Run prediction with BatDetect2 API v2."""
+    """Run model inference on audio using API v2."""


 def _build_api(
@ -126,6 +126,7 @@ def predict_directory_command(
    num_workers: int,
    format_name: str | None,
 ) -> None:
+    """Predict on all audio files in a directory."""
    audio_files = list(get_audio_files(audio_dir))
    _run_prediction(
        model_path=model_path,
@ -164,6 +165,7 @@ def predict_file_list_command(
    num_workers: int,
    format_name: str | None,
 ) -> None:
+    """Predict on audio files listed in a text file."""
    file_list = Path(file_list)
    audio_files = [
        Path(line.strip())
@ -208,6 +210,7 @@ def predict_dataset_command(
    num_workers: int,
    format_name: str | None,
 ) -> None:
+    """Predict on recordings referenced in an annotation dataset."""
    dataset_path = Path(dataset_path)
    dataset = io.load(dataset_path, type="annotation_set")
    audio_files = sorted(
--- a/src/batdetect2/cli/train.py
+++ b/src/batdetect2/cli/train.py
@ -49,6 +49,7 @@ def train_command(
    experiment_name: str | None = None,
    run_name: str | None = None,
 ):
+    """Train a model from dataset configs or a checkpoint."""
    from batdetect2.api_v2 import BatDetect2API
    from batdetect2.audio import AudioConfig
    from batdetect2.config import BatDetect2Config