From ce6975770e64bd11a26077e89d9c2e4a943f71ff Mon Sep 17 00:00:00 2001
From: mbsantiago <santiago.mbal@gmail.com>
Date: Wed, 6 May 2026 17:22:18 +0100
Subject: [PATCH] ci: add GitHub workflows and release helpers

---
 .bumpversion.cfg                              |  4 +-
 .github/workflows/ci.yml                      | 79 +++++++++++++++++++
 .github/workflows/docs-pages.yml              | 69 ++++++++++++++++
 .github/workflows/publish-pypi.yml            | 70 ++++++++++++++++
 .github/workflows/python-package.yml          | 29 -------
 .github/workflows/python-publish.yml          | 30 -------
 .gitignore                                    |  1 +
 .../choose-and-configure-evaluation-tasks.md  | 26 +++---
 docs/source/reference/cli/index.md            |  4 +-
 .../tutorials/evaluate-on-a-test-set.md       | 30 ++++---
 justfile                                      | 26 ++++++
 src/batdetect2/train/logging.py               |  3 +-
 tests/test_cli/test_base.py                   |  2 +-
 tests/test_cli/test_evaluate.py               |  5 +-
 tests/test_models/test_detectors.py           |  3 +-
 15 files changed, 293 insertions(+), 88 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/docs-pages.yml
 create mode 100644 .github/workflows/publish-pypi.yml
 delete mode 100644 .github/workflows/python-package.yml
 delete mode 100644 .github/workflows/python-publish.yml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 5ce1035..e1e455d 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -3,6 +3,8 @@ current_version = 1.1.1
 commit = True
 tag = True
 
-[bumpversion:file:batdetect2/__init__.py]
+[bumpversion:file:src/batdetect2/__init__.py]
 
 [bumpversion:file:pyproject.toml]
+
+[bumpversion:file:docs/source/conf.py]
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..ed9c61d
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,79 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  checks:
+    name: Checks
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install just
+        uses: taiki-e/install-action@just
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install dependencies
+        run: just install-dev
+
+      - name: Run formatting, lint, and type checks
+        run: just check
+
+  tests:
+    name: Tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version:
+          - "3.10"
+          - "3.11"
+          - "3.12"
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install just
+        uses: taiki-e/install-action@just
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install dependencies
+        run: just install-dev
+
+      - name: Run test suite
+        run: just test
diff --git a/.github/workflows/docs-pages.yml b/.github/workflows/docs-pages.yml
new file mode 100644
index 0000000..dfefb43
--- /dev/null
+++ b/.github/workflows/docs-pages.yml
@@ -0,0 +1,69 @@
+name: Docs Pages
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: docs-pages
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Build Docs
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install just
+        uses: taiki-e/install-action@just
+
+      - name: Configure GitHub Pages
+        uses: actions/configure-pages@v5
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install dependencies
+        run: just install-dev
+
+      - name: Build docs
+        run: just check-docs
+
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v4
+        with:
+          path: docs/build
+
+  deploy:
+    name: Deploy Docs
+    needs: build
+    runs-on: ubuntu-latest
+    permissions:
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
new file mode 100644
index 0000000..8dea627
--- /dev/null
+++ b/.github/workflows/publish-pypi.yml
@@ -0,0 +1,70 @@
+name: Publish PyPI
+
+on:
+  release:
+    types:
+      - published
+
+permissions:
+  contents: read
+
+concurrency:
+  group: publish-pypi
+  cancel-in-progress: false
+
+jobs:
+  build:
+    name: Build Distributions
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install just
+        uses: taiki-e/install-action@just
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          cache-dependency-glob: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install dependencies
+        run: just install-dev
+
+      - name: Build distributions
+        run: just build-dist
+
+      - name: Upload distributions
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+
+  publish:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+    environment:
+      name: pypi
+      url: https://pypi.org/p/batdetect2
+
+    steps:
+      - name: Download distributions
+        uses: actions/download-artifact@v5
+        with:
+          name: release-dists
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
deleted file mode 100644
index 5017ed4..0000000
--- a/.github/workflows/python-package.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: Python package
-
-on:
-  push:
-    branches: ["main"]
-  pull_request:
-    branches: ["main"]
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-
-    steps:
-      - uses: actions/checkout@v4
-      - name: Install uv
-        uses: astral-sh/setup-uv@v3
-        with:
-          enable-cache: true
-          cache-dependency-glob: "uv.lock"
-      - name: Set up Python ${{ matrix.python-version }}
-        run: uv python install ${{ matrix.python-version }}
-      - name: Install the project
-        run: uv sync --all-extras --dev
-      - name: Test with pytest
-        run: uv run pytest
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
deleted file mode 100644
index 2d1f98f..0000000
--- a/.github/workflows/python-publish.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: Upload Python Package
-
-on:
-  release:
-    types: [published]
-
-permissions:
-  contents: read
-
-jobs:
-  deploy:
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v3
-        with:
-          python-version: "3.x"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install build
-      - name: Build package
-        run: python -m build
-      - name: Publish package
-        uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
-        with:
-          user: __token__
-          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 9cc849d..54d53bb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ cover/
 
 # Sphinx documentation
 docs/_build/
+docs/build/
 
 # PyBuilder
 .pybuilder/
diff --git a/docs/source/how_to/choose-and-configure-evaluation-tasks.md b/docs/source/how_to/choose-and-configure-evaluation-tasks.md
index 17dd7af..17a818e 100644
--- a/docs/source/how_to/choose-and-configure-evaluation-tasks.md
+++ b/docs/source/how_to/choose-and-configure-evaluation-tasks.md
@@ -1,6 +1,7 @@
 # How to choose and configure evaluation tasks
 
-Use this guide when the default evaluation tasks do not match the question you want to answer.
+Use this guide when the default evaluation tasks do not match the question you
+want to answer.
 
 ## Know the default first
 
@@ -24,8 +25,10 @@ Common built-in task families include:
 Choose based on the question you care about.
 
 - Use sound-event tasks when you care about individual call events.
-- Use clip tasks when you care about clip-level presence or clip-level class evidence.
-- Use top-class detection when you want matching based on the highest-scoring class per detection.
+- Use clip tasks when you care about clip-level presence or clip-level class
+  evidence.
+- Use top-class detection when you want matching based on the highest-scoring
+  class per detection.
 
 ## Configure tasks in `EvaluationConfig`
 
@@ -45,22 +48,27 @@ Pass the config with:
 
 ```bash
 batdetect2 evaluate \
-  path/to/model.ckpt \
   path/to/test_dataset.yaml \
+  --model path/to/model.ckpt \
   --base-dir path/to/project_root \
   --evaluation-config path/to/evaluation.yaml
 ```
 
-Include `--base-dir` when the dataset config resolves recordings through relative paths.
+Include `--base-dir` when the dataset config resolves recordings through
+relative paths.
 
 ## Change one thing at a time
 
-When comparing models or settings, avoid changing task definitions, thresholds, matching behavior, and datasets all at once.
+When comparing models or settings, avoid changing task definitions, thresholds,
+matching behavior, and datasets all at once.
 
 Otherwise it becomes hard to explain why the metric changed.
 
 ## Related pages
 
-- Evaluation tutorial: {doc}`../tutorials/evaluate-on-a-test-set`
-- Evaluation config reference: {doc}`../reference/evaluation-config`
-- Evaluation concepts: {doc}`../explanation/evaluation-concepts-and-matching`
+- Evaluation tutorial:
+  {doc}`../tutorials/evaluate-on-a-test-set`
+- Evaluation config reference:
+  {doc}`../reference/evaluation-config`
+- Evaluation concepts:
+  {doc}`../explanation/evaluation-concepts-and-matching`
diff --git a/docs/source/reference/cli/index.md b/docs/source/reference/cli/index.md
index 1ca9064..5d25211 100644
--- a/docs/source/reference/cli/index.md
+++ b/docs/source/reference/cli/index.md
@@ -11,7 +11,7 @@ for the full option list.
 | `batdetect2 data` | Inspect and convert dataset configs | Depends on subcommand (`summary`, `convert`) |
 | `batdetect2 train` | Train or fine-tune models | `TRAIN_DATASET` |
 | `batdetect2 finetune` | Fine-tune a checkpoint on new targets | `TRAIN_DATASET` plus `--targets` |
-| `batdetect2 evaluate` | Evaluate a checkpoint on a test dataset | `MODEL_PATH`, `TEST_DATASET` |
+| `batdetect2 evaluate` | Evaluate a checkpoint on a test dataset | `TEST_DATASET` |
 | `batdetect2 detect` | Legacy compatibility workflow | `AUDIO_DIR`, `ANN_DIR`, `DETECTION_THRESHOLD` |
 
 ## Notes
@@ -20,6 +20,8 @@ for the full option list.
 - Paths with spaces should be wrapped in quotes.
 - Input audio is expected to be mono.
 - `process` uses the optional `--detection-threshold` override.
+- `evaluate` takes `TEST_DATASET` as a positional argument and uses `--model`
+  for the checkpoint override.
 - `finetune` defaults to the bundled `uk_same` checkpoint if `--model` is not
   provided.
 
diff --git a/docs/source/tutorials/evaluate-on-a-test-set.md b/docs/source/tutorials/evaluate-on-a-test-set.md
index a4bce6a..6f8b2ed 100644
--- a/docs/source/tutorials/evaluate-on-a-test-set.md
+++ b/docs/source/tutorials/evaluate-on-a-test-set.md
@@ -3,7 +3,8 @@
 This tutorial shows how to evaluate a trained checkpoint on a held-out dataset
 and inspect the output metrics.
 
-This tutorial is for advanced users who want to compare one trained model against a separate test dataset.
+This tutorial is for advanced users who want to compare one trained model
+against a separate test dataset.
 
 ## Before you start
 
@@ -32,22 +33,22 @@ Use a dataset that was not used for training or tuning.
 
 A held-out dataset is simply a separate dataset kept aside for evaluation.
 
-If you tune thresholds or configs on the same dataset that you report as final evaluation, the results will be optimistic.
+If you tune thresholds or configs on the same dataset that you report as final
+evaluation, the results will be optimistic.
 
 ## 2. Run evaluation
 
 ```bash
 batdetect2 evaluate \
-  path/to/model.ckpt \
   path/to/test_dataset.yaml \
+  --model path/to/model.ckpt \
   --base-dir path/to/project_root \
   --output-dir path/to/eval_outputs
 ```
 
-This command loads the checkpoint,
-runs prediction on the test dataset,
-applies the chosen evaluation tasks,
-and writes metrics and result files to the output directory.
+This command loads the checkpoint, runs prediction on the test dataset, applies
+the chosen evaluation tasks, and writes metrics and result files to the output
+directory.
 
 Use `--base-dir` whenever the dataset config contains relative paths.
 
@@ -73,7 +74,8 @@ Check:
 - which task the metric belongs to,
 - which thresholding or matching assumptions were used,
 - whether class-level behavior matches your use case,
-- whether the failures are concentrated in specific taxa, sites, or recording conditions.
+- whether the failures are concentrated in specific taxa, sites, or recording
+  conditions.
 
 ## 5. Record the evaluation setup
 
@@ -85,7 +87,11 @@ That matters for reproducibility and for later model comparisons.
 
 - Compare thresholds on representative files:
   {doc}`../how_to/tune-detection-threshold`
-- Configure evaluation tasks: {doc}`../how_to/choose-and-configure-evaluation-tasks`
-- Interpret evaluation artifacts: {doc}`../how_to/interpret-evaluation-outputs`
-- Learn the evaluation concepts: {doc}`../explanation/evaluation-concepts-and-matching`
-- Check full evaluate options: {doc}`../reference/cli/evaluate`
+- Configure evaluation tasks:
+  {doc}`../how_to/choose-and-configure-evaluation-tasks`
+- Interpret evaluation artifacts:
+  {doc}`../how_to/interpret-evaluation-outputs`
+- Learn the evaluation concepts:
+  {doc}`../explanation/evaluation-concepts-and-matching`
+- Check full evaluate options:
+  {doc}`../reference/cli/evaluate`
diff --git a/justfile b/justfile
index 0cb576e..8e889f1 100644
--- a/justfile
+++ b/justfile
@@ -17,6 +17,10 @@ help:
 install:
     uv sync
 
+# Install full development dependencies for CI and docs builds.
+install-dev:
+    uv sync --all-extras --dev
+
 # Testing & Coverage
 # Run tests using pytest.
 test:
@@ -50,6 +54,9 @@ coverage-serve: coverage-html
 docs:
     uv run sphinx-build -b html {{DOCS_SOURCE}} {{DOCS_BUILD}}
 
+# Check that documentation builds successfully.
+check-docs: docs
+
 # Serve documentation with live reload.
 docs-serve:
     uv run sphinx-autobuild {{DOCS_SOURCE}} {{DOCS_BUILD}} --watch {{SOURCE_DIR}} --open-browser
@@ -84,6 +91,25 @@ check-types:
 # Run all checks (format-check, lint, typecheck).
 check: check-format check-lint check-types
 
+# Run the standard CI validation sequence.
+ci: check test
+
+# Build source and wheel distributions.
+build-dist:
+    uv run --with build python -m build
+
+# Bump the patch version, commit, and tag.
+bump-patch:
+    uvx bump2version patch
+
+# Bump the minor version, commit, and tag.
+bump-minor:
+    uvx bump2version minor
+
+# Bump the major version, commit, and tag.
+bump-major:
+    uvx bump2version major
+
 # Cleaning tasks
 # Remove Python bytecode and cache.
 clean-pyc:
diff --git a/src/batdetect2/train/logging.py b/src/batdetect2/train/logging.py
index 7e6377f..0afad6c 100644
--- a/src/batdetect2/train/logging.py
+++ b/src/batdetect2/train/logging.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
-from typing import Any
-
 from collections.abc import Sequence
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Any
 
 import pandas as pd
 from lightning.pytorch.loggers import Logger
diff --git a/tests/test_cli/test_base.py b/tests/test_cli/test_base.py
index 95782d4..a96a600 100644
--- a/tests/test_cli/test_base.py
+++ b/tests/test_cli/test_base.py
@@ -11,7 +11,7 @@ def test_cli_base_help_lists_main_commands() -> None:
     result = CliRunner().invoke(cli, ["--help"])
 
     assert result.exit_code == 0
-    assert "predict" in result.output
+    assert "process" in result.output
     assert "train" in result.output
     assert "evaluate" in result.output
     assert "data" in result.output
diff --git a/tests/test_cli/test_evaluate.py b/tests/test_cli/test_evaluate.py
index 8eb282d..2cd1eb0 100644
--- a/tests/test_cli/test_evaluate.py
+++ b/tests/test_cli/test_evaluate.py
@@ -15,8 +15,8 @@ def test_cli_evaluate_help() -> None:
     result = CliRunner().invoke(cli, ["evaluate", "--help"])
 
     assert result.exit_code == 0
-    assert "MODEL_PATH" in result.output
     assert "TEST_DATASET" in result.output
+    assert "--model" in result.output
     assert "--evaluation-config" in result.output
 
 
@@ -32,8 +32,9 @@ def test_cli_evaluate_writes_metrics_for_small_dataset(
         cli,
         [
             "evaluate",
-            str(tiny_checkpoint_path),
             str(BASE_DIR / "example_data" / "dataset.yaml"),
+            "--model",
+            str(tiny_checkpoint_path),
             "--base-dir",
             str(BASE_DIR),
             "--workers",
diff --git a/tests/test_models/test_detectors.py b/tests/test_models/test_detectors.py
index 35d39a9..129d705 100644
--- a/tests/test_models/test_detectors.py
+++ b/tests/test_models/test_detectors.py
@@ -1,7 +1,8 @@
+from typing import cast
+
 import numpy as np
 import pytest
 import torch
-from typing import cast
 
 from batdetect2.models import UNetBackbone
 from batdetect2.models.backbones import UNetBackboneConfig