ci: add GitHub workflows and release helpers

2026-05-22 22:32:18 +02:00 · 2026-05-06 17:22:18 +01:00 · 2026-05-06 17:22:18 +01:00 · ce6975770e
commit ce6975770e
parent 69d8e2d228
15 changed files with 293 additions and 88 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@ -3,6 +3,8 @@ current_version = 1.1.1
 commit = True
 tag = True
-[bumpversion:file:batdetect2/__init__.py]
+[bumpversion:file:src/batdetect2/__init__.py]
 [bumpversion:file:pyproject.toml]
 [bumpversion:file:docs/source/conf.py]
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -0,0 +1,79 @@
 name: CI
 on:
  pull_request:
  push:
    branches:
      - main
 concurrency:
  group: ci-${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  checks:
    name: Checks
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.12"
      - name: Install just
        uses: taiki-e/install-action@just
      - name: Install uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
          cache-dependency-glob: |
            pyproject.toml
            uv.lock
      - name: Install dependencies
        run: just install-dev
      - name: Run formatting, lint, and type checks
        run: just check
  tests:
    name: Tests (Python ${{ matrix.python-version }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version:
          - "3.10"
          - "3.11"
          - "3.12"
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install just
        uses: taiki-e/install-action@just
      - name: Install uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
          cache-dependency-glob: |
            pyproject.toml
            uv.lock
      - name: Install dependencies
        run: just install-dev
      - name: Run test suite
        run: just test
--- a/.github/workflows/docs-pages.yml
+++ b/.github/workflows/docs-pages.yml
@ -0,0 +1,69 @@
 name: Docs Pages
 on:
  push:
    branches:
      - main
  workflow_dispatch:
 permissions:
  contents: read
 concurrency:
  group: docs-pages
  cancel-in-progress: true
 jobs:
  build:
    name: Build Docs
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.12"
      - name: Install just
        uses: taiki-e/install-action@just
      - name: Configure GitHub Pages
        uses: actions/configure-pages@v5
      - name: Install uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
          cache-dependency-glob: |
            pyproject.toml
            uv.lock
      - name: Install dependencies
        run: just install-dev
      - name: Build docs
        run: just check-docs
      - name: Upload Pages artifact
        uses: actions/upload-pages-artifact@v4
        with:
          path: docs/build
  deploy:
    name: Deploy Docs
    needs: build
    runs-on: ubuntu-latest
    permissions:
      pages: write
      id-token: write
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    steps:
      - name: Deploy to GitHub Pages
        id: deployment
        uses: actions/deploy-pages@v4
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@ -0,0 +1,70 @@
 name: Publish PyPI
 on:
  release:
    types:
      - published
 permissions:
  contents: read
 concurrency:
  group: publish-pypi
  cancel-in-progress: false
 jobs:
  build:
    name: Build Distributions
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.12"
      - name: Install just
        uses: taiki-e/install-action@just
      - name: Install uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
          cache-dependency-glob: |
            pyproject.toml
            uv.lock
      - name: Install dependencies
        run: just install-dev
      - name: Build distributions
        run: just build-dist
      - name: Upload distributions
        uses: actions/upload-artifact@v4
        with:
          name: release-dists
          path: dist/
  publish:
    name: Publish to PyPI
    needs: build
    runs-on: ubuntu-latest
    permissions:
      id-token: write
    environment:
      name: pypi
      url: https://pypi.org/p/batdetect2
    steps:
      - name: Download distributions
        uses: actions/download-artifact@v5
        with:
          name: release-dists
          path: dist/
      - name: Publish to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -1,29 +0,0 @@
 name: Python package
 on:
  push:
    branches: ["main"]
  pull_request:
    branches: ["main"]
 jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.9", "3.10", "3.11", "3.12"]
    steps:
      - uses: actions/checkout@v4
      - name: Install uv
        uses: astral-sh/setup-uv@v3
        with:
          enable-cache: true
          cache-dependency-glob: "uv.lock"
      - name: Set up Python ${{ matrix.python-version }}
        run: uv python install ${{ matrix.python-version }}
      - name: Install the project
        run: uv sync --all-extras --dev
      - name: Test with pytest
        run: uv run pytest
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@ -1,30 +0,0 @@
 name: Upload Python Package
 on:
  release:
    types: [published]
 permissions:
  contents: read
 jobs:
  deploy:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v3
        with:
          python-version: "3.x"
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install build
      - name: Build package
        run: python -m build
      - name: Publish package
        uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
        with:
          user: __token__
          password: ${{ secrets.PYPI_API_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -50,6 +50,7 @@ cover/
 # Sphinx documentation
 docs/_build/
 docs/build/
 # PyBuilder
 .pybuilder/
--- a/docs/source/how_to/choose-and-configure-evaluation-tasks.md
+++ b/docs/source/how_to/choose-and-configure-evaluation-tasks.md
@ -1,6 +1,7 @@
 # How to choose and configure evaluation tasks
-Use this guide when the default evaluation tasks do not match the question you want to answer.
+Use this guide when the default evaluation tasks do not match the question you
 want to answer.
 ## Know the default first
@ -24,8 +25,10 @@ Common built-in task families include:
 Choose based on the question you care about.
 - Use sound-event tasks when you care about individual call events.
- Use clip tasks when you care about clip-level presence or clip-level class evidence.
+- Use clip tasks when you care about clip-level presence or clip-level class
- Use top-class detection when you want matching based on the highest-scoring class per detection.
+  evidence.
 - Use top-class detection when you want matching based on the highest-scoring
  class per detection.
 ## Configure tasks in `EvaluationConfig`
@ -45,22 +48,27 @@ Pass the config with:
 ```bash
 batdetect2 evaluate \
  path/to/model.ckpt \
  path/to/test_dataset.yaml \
  --model path/to/model.ckpt \
  --base-dir path/to/project_root \
  --evaluation-config path/to/evaluation.yaml
 ```
-Include `--base-dir` when the dataset config resolves recordings through relative paths.
+Include `--base-dir` when the dataset config resolves recordings through
 relative paths.
 ## Change one thing at a time
-When comparing models or settings, avoid changing task definitions, thresholds, matching behavior, and datasets all at once.
+When comparing models or settings, avoid changing task definitions, thresholds,
 matching behavior, and datasets all at once.
 Otherwise it becomes hard to explain why the metric changed.
 ## Related pages
- Evaluation tutorial: {doc}`../tutorials/evaluate-on-a-test-set`
+- Evaluation tutorial:
- Evaluation config reference: {doc}`../reference/evaluation-config`
+  {doc}`../tutorials/evaluate-on-a-test-set`
- Evaluation concepts: {doc}`../explanation/evaluation-concepts-and-matching`
+- Evaluation config reference:
  {doc}`../reference/evaluation-config`
 - Evaluation concepts:
  {doc}`../explanation/evaluation-concepts-and-matching`
--- a/docs/source/reference/cli/index.md
+++ b/docs/source/reference/cli/index.md
@ -11,7 +11,7 @@ for the full option list.
 | `batdetect2 data` | Inspect and convert dataset configs | Depends on subcommand (`summary`, `convert`) |
 | `batdetect2 train` | Train or fine-tune models | `TRAIN_DATASET` |
 | `batdetect2 finetune` | Fine-tune a checkpoint on new targets | `TRAIN_DATASET` plus `--targets` |
-| `batdetect2 evaluate` | Evaluate a checkpoint on a test dataset | `MODEL_PATH`, `TEST_DATASET` |
+| `batdetect2 evaluate` | Evaluate a checkpoint on a test dataset | `TEST_DATASET` |
 | `batdetect2 detect` | Legacy compatibility workflow | `AUDIO_DIR`, `ANN_DIR`, `DETECTION_THRESHOLD` |
 ## Notes
@ -20,6 +20,8 @@ for the full option list.
 - Paths with spaces should be wrapped in quotes.
 - Input audio is expected to be mono.
 - `process` uses the optional `--detection-threshold` override.
 - `evaluate` takes `TEST_DATASET` as a positional argument and uses `--model`
  for the checkpoint override.
 - `finetune` defaults to the bundled `uk_same` checkpoint if `--model` is not
  provided.
--- a/docs/source/tutorials/evaluate-on-a-test-set.md
+++ b/docs/source/tutorials/evaluate-on-a-test-set.md
@ -3,7 +3,8 @@
 This tutorial shows how to evaluate a trained checkpoint on a held-out dataset
 and inspect the output metrics.
-This tutorial is for advanced users who want to compare one trained model against a separate test dataset.
+This tutorial is for advanced users who want to compare one trained model
 against a separate test dataset.
 ## Before you start
@ -32,22 +33,22 @@ Use a dataset that was not used for training or tuning.
 A held-out dataset is simply a separate dataset kept aside for evaluation.
-If you tune thresholds or configs on the same dataset that you report as final evaluation, the results will be optimistic.
+If you tune thresholds or configs on the same dataset that you report as final
 evaluation, the results will be optimistic.
 ## 2. Run evaluation
 ```bash
 batdetect2 evaluate \
  path/to/model.ckpt \
  path/to/test_dataset.yaml \
  --model path/to/model.ckpt \
  --base-dir path/to/project_root \
  --output-dir path/to/eval_outputs
 ```
-This command loads the checkpoint,
+This command loads the checkpoint, runs prediction on the test dataset, applies
-runs prediction on the test dataset,
+the chosen evaluation tasks, and writes metrics and result files to the output
-applies the chosen evaluation tasks,
+directory.
 and writes metrics and result files to the output directory.
 Use `--base-dir` whenever the dataset config contains relative paths.
@ -73,7 +74,8 @@ Check:
 - which task the metric belongs to,
 - which thresholding or matching assumptions were used,
 - whether class-level behavior matches your use case,
- whether the failures are concentrated in specific taxa, sites, or recording conditions.
+- whether the failures are concentrated in specific taxa, sites, or recording
  conditions.
 ## 5. Record the evaluation setup
@ -85,7 +87,11 @@ That matters for reproducibility and for later model comparisons.
 - Compare thresholds on representative files:
  {doc}`../how_to/tune-detection-threshold`
- Configure evaluation tasks: {doc}`../how_to/choose-and-configure-evaluation-tasks`
+- Configure evaluation tasks:
- Interpret evaluation artifacts: {doc}`../how_to/interpret-evaluation-outputs`
+  {doc}`../how_to/choose-and-configure-evaluation-tasks`
- Learn the evaluation concepts: {doc}`../explanation/evaluation-concepts-and-matching`
+- Interpret evaluation artifacts:
- Check full evaluate options: {doc}`../reference/cli/evaluate`
+  {doc}`../how_to/interpret-evaluation-outputs`
 - Learn the evaluation concepts:
  {doc}`../explanation/evaluation-concepts-and-matching`
 - Check full evaluate options:
  {doc}`../reference/cli/evaluate`
--- a/26
+++ b/26
@ -17,6 +17,10 @@ help:
 install:
    uv sync
 # Install full development dependencies for CI and docs builds.
 install-dev:
    uv sync --all-extras --dev
 # Testing & Coverage
 # Run tests using pytest.
 test:
@ -50,6 +54,9 @@ coverage-serve: coverage-html
 docs:
    uv run sphinx-build -b html {{DOCS_SOURCE}} {{DOCS_BUILD}}
 # Check that documentation builds successfully.
 check-docs: docs
 # Serve documentation with live reload.
 docs-serve:
    uv run sphinx-autobuild {{DOCS_SOURCE}} {{DOCS_BUILD}} --watch {{SOURCE_DIR}} --open-browser
@ -84,6 +91,25 @@ check-types:
 # Run all checks (format-check, lint, typecheck).
 check: check-format check-lint check-types
 # Run the standard CI validation sequence.
 ci: check test
 # Build source and wheel distributions.
 build-dist:
    uv run --with build python -m build
 # Bump the patch version, commit, and tag.
 bump-patch:
    uvx bump2version patch
 # Bump the minor version, commit, and tag.
 bump-minor:
    uvx bump2version minor
 # Bump the major version, commit, and tag.
 bump-major:
    uvx bump2version major
 # Cleaning tasks
 # Remove Python bytecode and cache.
 clean-pyc:
--- a/src/batdetect2/train/logging.py
+++ b/src/batdetect2/train/logging.py
@ -1,10 +1,9 @@
 from __future__ import annotations
 from typing import Any
 from collections.abc import Sequence
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
 import pandas as pd
 from lightning.pytorch.loggers import Logger
--- a/tests/test_cli/test_base.py
+++ b/tests/test_cli/test_base.py
@ -11,7 +11,7 @@ def test_cli_base_help_lists_main_commands() -> None:
    result = CliRunner().invoke(cli, ["--help"])
    assert result.exit_code == 0
-    assert "predict" in result.output
+    assert "process" in result.output
    assert "train" in result.output
    assert "evaluate" in result.output
    assert "data" in result.output
--- a/tests/test_cli/test_evaluate.py
+++ b/tests/test_cli/test_evaluate.py
@ -15,8 +15,8 @@ def test_cli_evaluate_help() -> None:
    result = CliRunner().invoke(cli, ["evaluate", "--help"])
    assert result.exit_code == 0
    assert "MODEL_PATH" in result.output
    assert "TEST_DATASET" in result.output
    assert "--model" in result.output
    assert "--evaluation-config" in result.output
@ -32,8 +32,9 @@ def test_cli_evaluate_writes_metrics_for_small_dataset(
        cli,
        [
            "evaluate",
            str(tiny_checkpoint_path),
            str(BASE_DIR / "example_data" / "dataset.yaml"),
            "--model",
            str(tiny_checkpoint_path),
            "--base-dir",
            str(BASE_DIR),
            "--workers",
--- a/tests/test_models/test_detectors.py
+++ b/tests/test_models/test_detectors.py
@ -1,7 +1,8 @@
 from typing import cast
 import numpy as np
 import pytest
 import torch
 from typing import cast
 from batdetect2.models import UNetBackbone
 from batdetect2.models.backbones import UNetBackboneConfig