ci: add GitHub workflows and release helpers

This commit is contained in:
mbsantiago 2026-05-06 17:22:18 +01:00
parent 69d8e2d228
commit ce6975770e
15 changed files with 293 additions and 88 deletions

View File

@ -3,6 +3,8 @@ current_version = 1.1.1
commit = True commit = True
tag = True tag = True
[bumpversion:file:batdetect2/__init__.py] [bumpversion:file:src/batdetect2/__init__.py]
[bumpversion:file:pyproject.toml] [bumpversion:file:pyproject.toml]
[bumpversion:file:docs/source/conf.py]

79
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,79 @@
name: CI
on:
pull_request:
push:
branches:
- main
concurrency:
group: ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
checks:
name: Checks
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install just
uses: taiki-e/install-action@just
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install dependencies
run: just install-dev
- name: Run formatting, lint, and type checks
run: just check
tests:
name: Tests (Python ${{ matrix.python-version }})
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version:
- "3.10"
- "3.11"
- "3.12"
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install just
uses: taiki-e/install-action@just
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install dependencies
run: just install-dev
- name: Run test suite
run: just test

69
.github/workflows/docs-pages.yml vendored Normal file
View File

@ -0,0 +1,69 @@
name: Docs Pages
on:
push:
branches:
- main
workflow_dispatch:
permissions:
contents: read
concurrency:
group: docs-pages
cancel-in-progress: true
jobs:
build:
name: Build Docs
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install just
uses: taiki-e/install-action@just
- name: Configure GitHub Pages
uses: actions/configure-pages@v5
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install dependencies
run: just install-dev
- name: Build docs
run: just check-docs
- name: Upload Pages artifact
uses: actions/upload-pages-artifact@v4
with:
path: docs/build
deploy:
name: Deploy Docs
needs: build
runs-on: ubuntu-latest
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

70
.github/workflows/publish-pypi.yml vendored Normal file
View File

@ -0,0 +1,70 @@
name: Publish PyPI
on:
release:
types:
- published
permissions:
contents: read
concurrency:
group: publish-pypi
cancel-in-progress: false
jobs:
build:
name: Build Distributions
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install just
uses: taiki-e/install-action@just
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install dependencies
run: just install-dev
- name: Build distributions
run: just build-dist
- name: Upload distributions
uses: actions/upload-artifact@v4
with:
name: release-dists
path: dist/
publish:
name: Publish to PyPI
needs: build
runs-on: ubuntu-latest
permissions:
id-token: write
environment:
name: pypi
url: https://pypi.org/p/batdetect2
steps:
- name: Download distributions
uses: actions/download-artifact@v5
with:
name: release-dists
path: dist/
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1

View File

@ -1,29 +0,0 @@
name: Python package
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "uv.lock"
- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}
- name: Install the project
run: uv sync --all-extras --dev
- name: Test with pytest
run: uv run pytest

View File

@ -1,30 +0,0 @@
name: Upload Python Package
on:
release:
types: [published]
permissions:
contents: read
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: "3.x"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}

1
.gitignore vendored
View File

@ -50,6 +50,7 @@ cover/
# Sphinx documentation # Sphinx documentation
docs/_build/ docs/_build/
docs/build/
# PyBuilder # PyBuilder
.pybuilder/ .pybuilder/

View File

@ -1,6 +1,7 @@
# How to choose and configure evaluation tasks # How to choose and configure evaluation tasks
Use this guide when the default evaluation tasks do not match the question you want to answer. Use this guide when the default evaluation tasks do not match the question you
want to answer.
## Know the default first ## Know the default first
@ -24,8 +25,10 @@ Common built-in task families include:
Choose based on the question you care about. Choose based on the question you care about.
- Use sound-event tasks when you care about individual call events. - Use sound-event tasks when you care about individual call events.
- Use clip tasks when you care about clip-level presence or clip-level class evidence. - Use clip tasks when you care about clip-level presence or clip-level class
- Use top-class detection when you want matching based on the highest-scoring class per detection. evidence.
- Use top-class detection when you want matching based on the highest-scoring
class per detection.
## Configure tasks in `EvaluationConfig` ## Configure tasks in `EvaluationConfig`
@ -45,22 +48,27 @@ Pass the config with:
```bash ```bash
batdetect2 evaluate \ batdetect2 evaluate \
path/to/model.ckpt \
path/to/test_dataset.yaml \ path/to/test_dataset.yaml \
--model path/to/model.ckpt \
--base-dir path/to/project_root \ --base-dir path/to/project_root \
--evaluation-config path/to/evaluation.yaml --evaluation-config path/to/evaluation.yaml
``` ```
Include `--base-dir` when the dataset config resolves recordings through relative paths. Include `--base-dir` when the dataset config resolves recordings through
relative paths.
## Change one thing at a time ## Change one thing at a time
When comparing models or settings, avoid changing task definitions, thresholds, matching behavior, and datasets all at once. When comparing models or settings, avoid changing task definitions, thresholds,
matching behavior, and datasets all at once.
Otherwise it becomes hard to explain why the metric changed. Otherwise it becomes hard to explain why the metric changed.
## Related pages ## Related pages
- Evaluation tutorial: {doc}`../tutorials/evaluate-on-a-test-set` - Evaluation tutorial:
- Evaluation config reference: {doc}`../reference/evaluation-config` {doc}`../tutorials/evaluate-on-a-test-set`
- Evaluation concepts: {doc}`../explanation/evaluation-concepts-and-matching` - Evaluation config reference:
{doc}`../reference/evaluation-config`
- Evaluation concepts:
{doc}`../explanation/evaluation-concepts-and-matching`

View File

@ -11,7 +11,7 @@ for the full option list.
| `batdetect2 data` | Inspect and convert dataset configs | Depends on subcommand (`summary`, `convert`) | | `batdetect2 data` | Inspect and convert dataset configs | Depends on subcommand (`summary`, `convert`) |
| `batdetect2 train` | Train or fine-tune models | `TRAIN_DATASET` | | `batdetect2 train` | Train or fine-tune models | `TRAIN_DATASET` |
| `batdetect2 finetune` | Fine-tune a checkpoint on new targets | `TRAIN_DATASET` plus `--targets` | | `batdetect2 finetune` | Fine-tune a checkpoint on new targets | `TRAIN_DATASET` plus `--targets` |
| `batdetect2 evaluate` | Evaluate a checkpoint on a test dataset | `MODEL_PATH`, `TEST_DATASET` | | `batdetect2 evaluate` | Evaluate a checkpoint on a test dataset | `TEST_DATASET` |
| `batdetect2 detect` | Legacy compatibility workflow | `AUDIO_DIR`, `ANN_DIR`, `DETECTION_THRESHOLD` | | `batdetect2 detect` | Legacy compatibility workflow | `AUDIO_DIR`, `ANN_DIR`, `DETECTION_THRESHOLD` |
## Notes ## Notes
@ -20,6 +20,8 @@ for the full option list.
- Paths with spaces should be wrapped in quotes. - Paths with spaces should be wrapped in quotes.
- Input audio is expected to be mono. - Input audio is expected to be mono.
- `process` uses the optional `--detection-threshold` override. - `process` uses the optional `--detection-threshold` override.
- `evaluate` takes `TEST_DATASET` as a positional argument and uses `--model`
for the checkpoint override.
- `finetune` defaults to the bundled `uk_same` checkpoint if `--model` is not - `finetune` defaults to the bundled `uk_same` checkpoint if `--model` is not
provided. provided.

View File

@ -3,7 +3,8 @@
This tutorial shows how to evaluate a trained checkpoint on a held-out dataset This tutorial shows how to evaluate a trained checkpoint on a held-out dataset
and inspect the output metrics. and inspect the output metrics.
This tutorial is for advanced users who want to compare one trained model against a separate test dataset. This tutorial is for advanced users who want to compare one trained model
against a separate test dataset.
## Before you start ## Before you start
@ -32,22 +33,22 @@ Use a dataset that was not used for training or tuning.
A held-out dataset is simply a separate dataset kept aside for evaluation. A held-out dataset is simply a separate dataset kept aside for evaluation.
If you tune thresholds or configs on the same dataset that you report as final evaluation, the results will be optimistic. If you tune thresholds or configs on the same dataset that you report as final
evaluation, the results will be optimistic.
## 2. Run evaluation ## 2. Run evaluation
```bash ```bash
batdetect2 evaluate \ batdetect2 evaluate \
path/to/model.ckpt \
path/to/test_dataset.yaml \ path/to/test_dataset.yaml \
--model path/to/model.ckpt \
--base-dir path/to/project_root \ --base-dir path/to/project_root \
--output-dir path/to/eval_outputs --output-dir path/to/eval_outputs
``` ```
This command loads the checkpoint, This command loads the checkpoint, runs prediction on the test dataset, applies
runs prediction on the test dataset, the chosen evaluation tasks, and writes metrics and result files to the output
applies the chosen evaluation tasks, directory.
and writes metrics and result files to the output directory.
Use `--base-dir` whenever the dataset config contains relative paths. Use `--base-dir` whenever the dataset config contains relative paths.
@ -73,7 +74,8 @@ Check:
- which task the metric belongs to, - which task the metric belongs to,
- which thresholding or matching assumptions were used, - which thresholding or matching assumptions were used,
- whether class-level behavior matches your use case, - whether class-level behavior matches your use case,
- whether the failures are concentrated in specific taxa, sites, or recording conditions. - whether the failures are concentrated in specific taxa, sites, or recording
conditions.
## 5. Record the evaluation setup ## 5. Record the evaluation setup
@ -85,7 +87,11 @@ That matters for reproducibility and for later model comparisons.
- Compare thresholds on representative files: - Compare thresholds on representative files:
{doc}`../how_to/tune-detection-threshold` {doc}`../how_to/tune-detection-threshold`
- Configure evaluation tasks: {doc}`../how_to/choose-and-configure-evaluation-tasks` - Configure evaluation tasks:
- Interpret evaluation artifacts: {doc}`../how_to/interpret-evaluation-outputs` {doc}`../how_to/choose-and-configure-evaluation-tasks`
- Learn the evaluation concepts: {doc}`../explanation/evaluation-concepts-and-matching` - Interpret evaluation artifacts:
- Check full evaluate options: {doc}`../reference/cli/evaluate` {doc}`../how_to/interpret-evaluation-outputs`
- Learn the evaluation concepts:
{doc}`../explanation/evaluation-concepts-and-matching`
- Check full evaluate options:
{doc}`../reference/cli/evaluate`

View File

@ -17,6 +17,10 @@ help:
install: install:
uv sync uv sync
# Install full development dependencies for CI and docs builds.
install-dev:
uv sync --all-extras --dev
# Testing & Coverage # Testing & Coverage
# Run tests using pytest. # Run tests using pytest.
test: test:
@ -50,6 +54,9 @@ coverage-serve: coverage-html
docs: docs:
uv run sphinx-build -b html {{DOCS_SOURCE}} {{DOCS_BUILD}} uv run sphinx-build -b html {{DOCS_SOURCE}} {{DOCS_BUILD}}
# Check that documentation builds successfully.
check-docs: docs
# Serve documentation with live reload. # Serve documentation with live reload.
docs-serve: docs-serve:
uv run sphinx-autobuild {{DOCS_SOURCE}} {{DOCS_BUILD}} --watch {{SOURCE_DIR}} --open-browser uv run sphinx-autobuild {{DOCS_SOURCE}} {{DOCS_BUILD}} --watch {{SOURCE_DIR}} --open-browser
@ -84,6 +91,25 @@ check-types:
# Run all checks (format-check, lint, typecheck). # Run all checks (format-check, lint, typecheck).
check: check-format check-lint check-types check: check-format check-lint check-types
# Run the standard CI validation sequence.
ci: check test
# Build source and wheel distributions.
build-dist:
uv run --with build python -m build
# Bump the patch version, commit, and tag.
bump-patch:
uvx bump2version patch
# Bump the minor version, commit, and tag.
bump-minor:
uvx bump2version minor
# Bump the major version, commit, and tag.
bump-major:
uvx bump2version major
# Cleaning tasks # Cleaning tasks
# Remove Python bytecode and cache. # Remove Python bytecode and cache.
clean-pyc: clean-pyc:

View File

@ -1,10 +1,9 @@
from __future__ import annotations from __future__ import annotations
from typing import Any
from collections.abc import Sequence from collections.abc import Sequence
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Any
import pandas as pd import pandas as pd
from lightning.pytorch.loggers import Logger from lightning.pytorch.loggers import Logger

View File

@ -11,7 +11,7 @@ def test_cli_base_help_lists_main_commands() -> None:
result = CliRunner().invoke(cli, ["--help"]) result = CliRunner().invoke(cli, ["--help"])
assert result.exit_code == 0 assert result.exit_code == 0
assert "predict" in result.output assert "process" in result.output
assert "train" in result.output assert "train" in result.output
assert "evaluate" in result.output assert "evaluate" in result.output
assert "data" in result.output assert "data" in result.output

View File

@ -15,8 +15,8 @@ def test_cli_evaluate_help() -> None:
result = CliRunner().invoke(cli, ["evaluate", "--help"]) result = CliRunner().invoke(cli, ["evaluate", "--help"])
assert result.exit_code == 0 assert result.exit_code == 0
assert "MODEL_PATH" in result.output
assert "TEST_DATASET" in result.output assert "TEST_DATASET" in result.output
assert "--model" in result.output
assert "--evaluation-config" in result.output assert "--evaluation-config" in result.output
@ -32,8 +32,9 @@ def test_cli_evaluate_writes_metrics_for_small_dataset(
cli, cli,
[ [
"evaluate", "evaluate",
str(tiny_checkpoint_path),
str(BASE_DIR / "example_data" / "dataset.yaml"), str(BASE_DIR / "example_data" / "dataset.yaml"),
"--model",
str(tiny_checkpoint_path),
"--base-dir", "--base-dir",
str(BASE_DIR), str(BASE_DIR),
"--workers", "--workers",

View File

@ -1,7 +1,8 @@
from typing import cast
import numpy as np import numpy as np
import pytest import pytest
import torch import torch
from typing import cast
from batdetect2.models import UNetBackbone from batdetect2.models import UNetBackbone
from batdetect2.models.backbones import UNetBackboneConfig from batdetect2.models.backbones import UNetBackboneConfig