From 7277151f33b207c2c3445866d6e1a3b5c7711f44 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Sat, 28 Mar 2026 11:28:52 +0000 Subject: [PATCH] Start cli docs --- docs/source/conf.py | 11 ++ docs/source/documentation_plan.md | 135 ++++++++++++++++++ docs/source/explanation/index.md | 10 ++ .../model-output-and-validation.md | 29 ++++ docs/source/getting_started.md | 83 +++++++++++ docs/source/how_to/index.md | 15 ++ docs/source/how_to/run-batch-predictions.md | 30 ++++ .../source/how_to/tune-detection-threshold.md | 25 ++++ docs/source/index.md | 29 ++-- docs/source/legacy/index.md | 14 ++ docs/source/reference/cli.rst | 7 + docs/source/reference/configs.md | 2 +- docs/source/reference/index.md | 14 +- docs/source/targets/index.md | 1 + docs/source/tutorials/index.md | 15 ++ .../tutorials/run-inference-on-folder.md | 31 ++++ pyproject.toml | 1 + src/batdetect2/cli/data.py | 4 +- src/batdetect2/cli/evaluate.py | 1 + src/batdetect2/cli/inference.py | 5 +- src/batdetect2/cli/train.py | 1 + 21 files changed, 446 insertions(+), 17 deletions(-) create mode 100644 docs/source/documentation_plan.md create mode 100644 docs/source/explanation/index.md create mode 100644 docs/source/explanation/model-output-and-validation.md create mode 100644 docs/source/getting_started.md create mode 100644 docs/source/how_to/index.md create mode 100644 docs/source/how_to/run-batch-predictions.md create mode 100644 docs/source/how_to/tune-detection-threshold.md create mode 100644 docs/source/legacy/index.md create mode 100644 docs/source/reference/cli.rst create mode 100644 docs/source/tutorials/index.md create mode 100644 docs/source/tutorials/run-inference-on-folder.md diff --git a/docs/source/conf.py b/docs/source/conf.py index 10dcebb..bb834ca 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,6 +19,7 @@ extensions = [ "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinxcontrib.autodoc_pydantic", + "sphinx_click", "numpydoc", "myst_parser", "sphinx_autodoc_typehints", @@ -38,9 +39,19 @@ source_suffix = { html_theme = "sphinx_book_theme" html_static_path = ["_static"] +html_theme_options = { + "home_page_in_toc": True, +} intersphinx_mapping = { "python": ("https://docs.python.org/3", None), + "click": ("https://click.palletsprojects.com/en/stable/", None), + "librosa": ("https://librosa.org/doc/latest/", None), + "lightning": ("https://lightning.ai/docs/pytorch/stable/", None), + "loguru": ("https://loguru.readthedocs.io/en/stable/", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "omegaconf": ("https://omegaconf.readthedocs.io/en/latest/", None), + "pytorch": ("https://pytorch.org/docs/stable/", None), "soundevent": ("https://mbsantiago.github.io/soundevent/", None), "pydantic": ("https://docs.pydantic.dev/latest/", None), "xarray": ("https://docs.xarray.dev/en/stable/", None), diff --git a/docs/source/documentation_plan.md b/docs/source/documentation_plan.md new file mode 100644 index 0000000..2e7e98f --- /dev/null +++ b/docs/source/documentation_plan.md @@ -0,0 +1,135 @@ +# Documentation Architecture and Migration Plan (Phase 0) + +This page defines the Phase 0 documentation architecture and inventory for +reorganizing `batdetect2` documentation using the Diataxis framework. + +## Scope and goals + +Phase 0 focuses on architecture and prioritization only. It does not attempt +to write all new docs yet. + +Primary goals: + +1. Define a target docs architecture by Diataxis type. +2. Map current pages to target documentation types. +3. Identify what to keep, split, rewrite, or deprecate. +4. Set priorities for implementation phases. + +## Audiences + +Two primary audiences are in scope. + +1. Ecologists who prefer minimal coding, focused on practical workflows: + run inference, inspect outputs, and possibly train with custom data. +2. Ecologists or bioacousticians who are Python-savvy and want to customize + workflows, training, and analysis. + +## Target information architecture + +The target architecture uses four top-level documentation sections. + +1. Tutorials + - Learning-oriented, single-path, reproducible walkthroughs. +2. How-to guides + - Task-oriented procedures for common real goals. +3. Reference + - Factual descriptions of CLI, configs, APIs, and formats. +4. Explanation + - Conceptual material that explains why design and workflow decisions + matter. + +Cross-cutting navigation conventions: + +- Every page starts with audience, prerequisites, and outcome. +- Every page serves one Diataxis type only. +- Beginner-first path is prioritized, with clear links to advanced pages. + +## Phase 0 inventory: current docs mapped to Diataxis + +Legend: + +- Keep: useful as-is with minor edits. +- Split: contains mixed documentation types and should be separated. +- Rewrite: major changes needed to fit target audience/type. +- Move: content is valid but belongs under another section. + +| Current page | Current role | Target type | Audience | Action | Priority | +| --- | --- | --- | --- | --- | --- | +| `README.md` | Mixed quickstart + CLI + API + warning | Tutorial + How-to + Explanation (split) | 1 + 2 | Split | P0 | +| `docs/source/index.md` | Sparse landing page | Navigation hub | 1 + 2 | Rewrite | P0 | +| `docs/source/architecture.md` | Internal architecture deep dive | Explanation + developer reference | 2 | Move/trim | P2 | +| `docs/source/postprocessing.md` | Concept + config + internals + usage | Explanation + How-to + Reference (split) | 1 + 2 | Split | P1 | +| `docs/source/preprocessing/index.md` | Conceptual overview with some procedural flow | Explanation | 2 (and 1 optional) | Keep/trim | P2 | +| `docs/source/preprocessing/audio.md` | Detailed configuration and behavior | Reference + How-to fragments | 2 | Split | P2 | +| `docs/source/preprocessing/spectrogram.md` | Detailed configuration and behavior | Reference + How-to fragments | 2 | Split | P2 | +| `docs/source/preprocessing/usage.md` | Usage patterns + concept | How-to + Explanation (split) | 2 | Split | P1 | +| `docs/source/data/index.md` | Data-loading section index | Reference index | 2 | Keep/update | P2 | +| `docs/source/data/aoef.md` | Config and examples | How-to + Reference (split) | 2 | Split | P1 | +| `docs/source/data/legacy.md` | Legacy formats and config | How-to + Reference (split) | 2 | Split | P2 | +| `docs/source/targets/index.md` | Long conceptual + process overview | Explanation + How-to (split) | 2 | Split | P2 | +| `docs/source/targets/tags_and_terms.md` | Definitions + guidance | Explanation + Reference | 2 | Split | P2 | +| `docs/source/targets/filtering.md` | Procedure + config | How-to + Reference | 2 | Split | P2 | +| `docs/source/targets/transform.md` | Procedure + config | How-to + Reference | 2 | Split | P2 | +| `docs/source/targets/classes.md` | Procedure + config | How-to + Reference | 2 | Split | P2 | +| `docs/source/targets/rois.md` | Concept + mapping details | Explanation + Reference | 2 | Split | P2 | +| `docs/source/targets/use.md` | Integration overview | Explanation | 2 | Keep/trim | P2 | +| `docs/source/reference/index.md` | Small reference root | Reference | 2 | Expand | P1 | +| `docs/source/reference/configs.md` | Autodoc for configs | Reference | 2 | Keep | P1 | +| `docs/source/reference/targets.md` | Autodoc for targets | Reference | 2 | Keep | P2 | + +## CLI and API documentation gaps (from code surface) + +Current command surface includes: + +- `batdetect2 detect` (compat command) +- `batdetect2 predict directory` +- `batdetect2 predict file_list` +- `batdetect2 predict dataset` +- `batdetect2 train` +- `batdetect2 evaluate` +- `batdetect2 data summary` +- `batdetect2 data convert` + +These commands are not yet represented as a coherent user-facing task set. + +Priority gap actions: + +1. Add CLI reference pages for command signatures and options. +2. Add beginner how-to pages for practical command recipes. +3. Add migration guidance from `detect` to `predict` workflows. + +## Priority architecture for implementation phases + +### P0 (this phase): architecture and inventory + +- Done in this file. +- Define structure and classify existing material. + +### P1: user-critical docs for running the model + +1. Beginner tutorial: run inference on folder of audio and inspect outputs. +2. How-to guides for repeatable inference tasks and threshold tuning. +3. Reference: complete CLI docs for prediction and outputs. +4. Explanation: interpretation caveats and validation guidance. + +### P2: advanced customization and training + +1. How-to guides for custom dataset preparation and training. +2. Reference for data formats, targets, and preprocessing configs. +3. Explanation docs for target design and pipeline trade-offs. + +### P3: polish and contributor consistency + +1. Tight cross-linking across Diataxis boundaries. +2. Consistent page templates and terminology. +3. Reader testing with representative users from both audiences. + +## Definition of done for Phase 0 + +Phase 0 is complete when: + +1. The target architecture is defined. +2. Existing content is inventoried and classified. +3. Prioritized migration path is agreed. + +This page satisfies these criteria and is the baseline for Phase 1 work. diff --git a/docs/source/explanation/index.md b/docs/source/explanation/index.md new file mode 100644 index 0000000..9ad50ac --- /dev/null +++ b/docs/source/explanation/index.md @@ -0,0 +1,10 @@ +# Explanation + +Explanation pages describe why BatDetect2 behaves as it does and how to reason +about trade-offs. + +```{toctree} +:maxdepth: 1 + +model-output-and-validation +``` diff --git a/docs/source/explanation/model-output-and-validation.md b/docs/source/explanation/model-output-and-validation.md new file mode 100644 index 0000000..fc39b38 --- /dev/null +++ b/docs/source/explanation/model-output-and-validation.md @@ -0,0 +1,29 @@ +# Model output and validation + +BatDetect2 outputs model predictions, not ground truth. The same configuration +can behave differently across recording conditions, species compositions, and +acoustic environments. + +## Why threshold choice matters + +- Lower detection thresholds increase sensitivity but can increase false + positives. +- Higher thresholds reduce false positives but can miss faint calls. + +No threshold is universally correct. The right setting depends on your survey +objectives and tolerance for false positives versus missed detections. + +## Why local validation is required + +Model performance depends on how similar your data are to training data. +Before ecological interpretation, validate predictions on a representative, +locally reviewed subset. + +Recommended validation checks: + +1. Compare detection counts against expert-reviewed clips. +2. Inspect species-level predictions for plausible confusion patterns. +3. Repeat checks across sites, seasons, and recorder setups. + +For practical threshold workflows, see +{doc}`../how_to/tune-detection-threshold`. diff --git a/docs/source/getting_started.md b/docs/source/getting_started.md new file mode 100644 index 0000000..077a6c8 --- /dev/null +++ b/docs/source/getting_started.md @@ -0,0 +1,83 @@ +# Getting started + +BatDetect2 is both a command line tool (CLI) and a Python library. + +- Use the CLI if you want to run existing models or train your own models from + the terminal. +- Use the Python package if you want to integrate BatDetect2 into your own + scripts, notebooks, or analysis pipeline. + +If you want to try BatDetect2 before installing anything locally: + +- [Hugging Face demo (UK species)](https://huggingface.co/spaces/macaodha/batdetect2) +- [Google Colab notebook](https://colab.research.google.com/github/macaodha/batdetect2/blob/master/batdetect2_notebook.ipynb) + +## Prerequisites + +We recommend `uv` for both workflows. +`uv` is a fast Python package and environment manager that keeps installs +isolated and reproducible. + +- Use `uv tool` to install the CLI. +- Use `uv add` to add `batdetect2` as a dependency in a Python project. + +Install `uv` first by following their +[installation instructions](https://docs.astral.sh/uv/getting-started/installation/). + +## Install the CLI + +The following installs `batdetect2` in an isolated tool environment and exposes +the `batdetect2` command on your machine. + +```bash +uv tool install batdetect2 +``` + +If you need to upgrade later: + +```bash +uv tool upgrade batdetect2 +``` + +Verify the CLI is available: + +```bash +batdetect2 --help +``` + +Run your first workflow: + +Go to {doc}`tutorials/run-inference-on-folder` for a complete first run. + +## Integrate with your Python project + +If you are using BatDetect2 from Python code, add it to your project +dependencies: + +```bash +uv add batdetect2 +``` + +This keeps dependency metadata and the environment in sync. + +### Alternative with `pip` + +If you prefer `pip`, create and activate a virtual environment first: + +```bash +python -m venv .venv +source .venv/bin/activate +``` + +Then install from PyPI: + +```bash +pip install batdetect2 +``` + +## What's next + +- Run your first detection workflow: + {doc}`tutorials/run-inference-on-folder` +- For practical task recipes, go to {doc}`how_to/index` +- For command and option details, go to {doc}`reference/cli` diff --git a/docs/source/how_to/index.md b/docs/source/how_to/index.md new file mode 100644 index 0000000..5425e61 --- /dev/null +++ b/docs/source/how_to/index.md @@ -0,0 +1,15 @@ +# How-to Guides + +How-to guides help you complete specific tasks while working. + +## Who this section is for + +- Ecologists running repeat analyses. +- Python-savvy users integrating BatDetect2 into workflows. + +```{toctree} +:maxdepth: 1 + +run-batch-predictions +tune-detection-threshold +``` diff --git a/docs/source/how_to/run-batch-predictions.md b/docs/source/how_to/run-batch-predictions.md new file mode 100644 index 0000000..7b95080 --- /dev/null +++ b/docs/source/how_to/run-batch-predictions.md @@ -0,0 +1,30 @@ +# How to run batch predictions + +This guide shows practical command patterns for directory-based and file-list +prediction runs. + +## Predict from a directory + +```bash +batdetect2 predict directory \ + path/to/model.ckpt \ + path/to/audio_dir \ + path/to/outputs +``` + +## Predict from a file list + +```bash +batdetect2 predict file_list \ + path/to/model.ckpt \ + path/to/audio_files.txt \ + path/to/outputs +``` + +## Useful options + +- `--batch-size` to control throughput. +- `--workers` to set data-loading parallelism. +- `--format` to select output format. + +For complete option details, see {doc}`../reference/cli`. diff --git a/docs/source/how_to/tune-detection-threshold.md b/docs/source/how_to/tune-detection-threshold.md new file mode 100644 index 0000000..b668ec0 --- /dev/null +++ b/docs/source/how_to/tune-detection-threshold.md @@ -0,0 +1,25 @@ +# How to tune detection threshold + +Use this guide to compare detection outputs at different threshold values. + +## 1) Start with a baseline run + +Run an initial prediction workflow and keep outputs in a dedicated folder. + +## 2) Sweep threshold values + +If you use the legacy `detect` command, run multiple thresholds (for example, +`0.1`, `0.3`, `0.5`) and compare output counts and quality on a validation +subset. + +## 3) Validate against known calls + +Use files with trusted annotations or expert review to select a threshold that +fits your project goals. + +## 4) Record your chosen setting + +Write down the chosen threshold and rationale so analyses are reproducible. + +For conceptual trade-offs, see +{doc}`../explanation/model-output-and-validation`. diff --git a/docs/source/index.md b/docs/source/index.md index 4f677a1..6c7991a 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,15 +1,28 @@ -# batdetect2 documentation +# Home -Hi! +Welcome. + +This documentation is being reorganized using the Diataxis framework. +See the current planning and migration inventory here: + +- {doc}`documentation_plan` ```{toctree} :maxdepth: 1 -:caption: Contents: +:caption: Get Started -architecture -data/index -preprocessing/index -postprocessing -targets/index +getting_started +documentation_plan +tutorials/index +how_to/index reference/index +explanation/index +legacy/index ``` + +## Audience paths + +- Ecologists who want low-code workflows should start with + {doc}`tutorials/index`. +- Python-savvy ecologists and bioacousticians can jump to + {doc}`how_to/index` and {doc}`reference/index`. diff --git a/docs/source/legacy/index.md b/docs/source/legacy/index.md new file mode 100644 index 0000000..c2be922 --- /dev/null +++ b/docs/source/legacy/index.md @@ -0,0 +1,14 @@ +# Legacy documentation + +These pages contain existing technical material that predates the Diataxis +reorganization. They remain available during migration. + +```{toctree} +:maxdepth: 1 + +../architecture +../data/index +../preprocessing/index +../postprocessing +../targets/index +``` diff --git a/docs/source/reference/cli.rst b/docs/source/reference/cli.rst new file mode 100644 index 0000000..3e60369 --- /dev/null +++ b/docs/source/reference/cli.rst @@ -0,0 +1,7 @@ +CLI reference +============= + +.. click:: batdetect2.cli:cli + :prog: batdetect2 + :nested: full + diff --git a/docs/source/reference/configs.md b/docs/source/reference/configs.md index 9971c99..476850d 100644 --- a/docs/source/reference/configs.md +++ b/docs/source/reference/configs.md @@ -1,7 +1,7 @@ # Config Reference ```{eval-rst} -.. automodule:: batdetect2.configs +.. automodule:: batdetect2.config :members: :inherited-members: pydantic.BaseModel ``` diff --git a/docs/source/reference/index.md b/docs/source/reference/index.md index 87d0647..9bd30f4 100644 --- a/docs/source/reference/index.md +++ b/docs/source/reference/index.md @@ -1,10 +1,12 @@ # Reference documentation -```{eval-rst} -.. toctree:: - :maxdepth: 1 - :caption: Contents: +Reference pages provide factual, complete descriptions of commands, +configuration, and data structures. - configs - targets +```{toctree} +:maxdepth: 1 + +cli +configs +targets ``` diff --git a/docs/source/targets/index.md b/docs/source/targets/index.md index cb5bcaa..71f6957 100644 --- a/docs/source/targets/index.md +++ b/docs/source/targets/index.md @@ -74,5 +74,6 @@ filtering transform classes rois +labels use ``` diff --git a/docs/source/tutorials/index.md b/docs/source/tutorials/index.md new file mode 100644 index 0000000..aaadf3d --- /dev/null +++ b/docs/source/tutorials/index.md @@ -0,0 +1,15 @@ +# Tutorials + +Tutorials are for learning by doing. They provide a single, reproducible path +to a concrete outcome. + +## Who this section is for + +- Ecologists who want practical workflows with minimal coding. +- New users who want to build confidence before customization. + +```{toctree} +:maxdepth: 1 + +run-inference-on-folder +``` diff --git a/docs/source/tutorials/run-inference-on-folder.md b/docs/source/tutorials/run-inference-on-folder.md new file mode 100644 index 0000000..838626c --- /dev/null +++ b/docs/source/tutorials/run-inference-on-folder.md @@ -0,0 +1,31 @@ +# Tutorial: Run inference on a folder of audio files + +## Prerequisites + +- BatDetect2 installed in your environment. +- A folder containing `.wav` files. +- A model checkpoint path. + +## Steps + +1. Choose your input and output directories. +2. Run prediction with the CLI. +3. Verify output files were written. +4. Inspect predictions and confidence scores. + +## Example command + +```bash +batdetect2 predict directory \ + path/to/model.ckpt \ + path/to/audio_dir \ + path/to/outputs +``` + +## What to do next + +- Use {doc}`../how_to/tune-detection-threshold` to tune sensitivity. +- Use {doc}`../reference/cli` for full command options. + +Note: this is the initial Phase 1 scaffold and will be expanded with a full, +validated end-to-end walkthrough. diff --git a/pyproject.toml b/pyproject.toml index 08066ed..84e10c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dev = [ "numpydoc>=1.8.0", "sphinx-autodoc-typehints>=2.3.0", "sphinx-book-theme>=1.1.4", + "sphinx-click>=6.1.0", "autodoc-pydantic>=2.2.0", "pytest-cov>=6.1.1", "ty>=0.0.1a12", diff --git a/src/batdetect2/cli/data.py b/src/batdetect2/cli/data.py index 51b7e4a..31dd7b3 100644 --- a/src/batdetect2/cli/data.py +++ b/src/batdetect2/cli/data.py @@ -8,7 +8,8 @@ __all__ = ["data"] @cli.group() -def data(): ... +def data(): + """Inspect and convert dataset configuration files.""" @data.command() @@ -37,6 +38,7 @@ def summary( targets_path: Path | None = None, base_dir: Path | None = None, ): + """Show annotation counts and optional class summary.""" from batdetect2.data import compute_class_summary, load_dataset_from_config from batdetect2.targets import load_targets diff --git a/src/batdetect2/cli/evaluate.py b/src/batdetect2/cli/evaluate.py index 6635d75..c1dbd1f 100644 --- a/src/batdetect2/cli/evaluate.py +++ b/src/batdetect2/cli/evaluate.py @@ -40,6 +40,7 @@ def evaluate_command( experiment_name: str | None = None, run_name: str | None = None, ): + """Evaluate a checkpoint against a configured test dataset.""" from batdetect2.api_v2 import BatDetect2API from batdetect2.audio import AudioConfig from batdetect2.data import load_dataset_from_config diff --git a/src/batdetect2/cli/inference.py b/src/batdetect2/cli/inference.py index 9ce28bb..26a770d 100644 --- a/src/batdetect2/cli/inference.py +++ b/src/batdetect2/cli/inference.py @@ -12,7 +12,7 @@ __all__ = ["predict"] @cli.group(name="predict") def predict() -> None: - """Run prediction with BatDetect2 API v2.""" + """Run model inference on audio using API v2.""" def _build_api( @@ -126,6 +126,7 @@ def predict_directory_command( num_workers: int, format_name: str | None, ) -> None: + """Predict on all audio files in a directory.""" audio_files = list(get_audio_files(audio_dir)) _run_prediction( model_path=model_path, @@ -164,6 +165,7 @@ def predict_file_list_command( num_workers: int, format_name: str | None, ) -> None: + """Predict on audio files listed in a text file.""" file_list = Path(file_list) audio_files = [ Path(line.strip()) @@ -208,6 +210,7 @@ def predict_dataset_command( num_workers: int, format_name: str | None, ) -> None: + """Predict on recordings referenced in an annotation dataset.""" dataset_path = Path(dataset_path) dataset = io.load(dataset_path, type="annotation_set") audio_files = sorted( diff --git a/src/batdetect2/cli/train.py b/src/batdetect2/cli/train.py index 543af20..0f630d7 100644 --- a/src/batdetect2/cli/train.py +++ b/src/batdetect2/cli/train.py @@ -49,6 +49,7 @@ def train_command( experiment_name: str | None = None, run_name: str | None = None, ): + """Train a model from dataset configs or a checkpoint.""" from batdetect2.api_v2 import BatDetect2API from batdetect2.audio import AudioConfig from batdetect2.config import BatDetect2Config