From 5760b6e017081540f97ee6dd4648bb59c38715aa Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Thu, 7 May 2026 07:54:46 +0100 Subject: [PATCH] docs: expand training tutorial and config guidance --- docs/source/reference/configs.rst | 3 + docs/source/reference/index.md | 3 + docs/source/reference/logging-config.md | 46 ++++ docs/source/reference/model-config.md | 37 ++++ docs/source/reference/training-config.md | 50 +++++ docs/source/tutorials/index.md | 12 +- docs/source/tutorials/train-a-custom-model.md | 207 ++++++++++++++---- src/batdetect2/logging.py | 2 +- src/batdetect2/train/train.py | 4 +- 9 files changed, 314 insertions(+), 50 deletions(-) create mode 100644 docs/source/reference/logging-config.md create mode 100644 docs/source/reference/model-config.md create mode 100644 docs/source/reference/training-config.md diff --git a/docs/source/reference/configs.rst b/docs/source/reference/configs.rst index 90261d1..f4c9312 100644 --- a/docs/source/reference/configs.rst +++ b/docs/source/reference/configs.rst @@ -5,6 +5,9 @@ BatDetect2 uses separate config objects for different workflow surfaces. Use the dedicated reference pages for each config family: +- model config +- training config +- logging config - inference config - evaluation config - outputs config diff --git a/docs/source/reference/index.md b/docs/source/reference/index.md index cd09bc1..b6aaf2f 100644 --- a/docs/source/reference/index.md +++ b/docs/source/reference/index.md @@ -10,6 +10,9 @@ details, or Python API entries. cli/index api +model-config +training-config +logging-config inference-config evaluation-config outputs-config diff --git a/docs/source/reference/logging-config.md b/docs/source/reference/logging-config.md new file mode 100644 index 0000000..33b36dc --- /dev/null +++ b/docs/source/reference/logging-config.md @@ -0,0 +1,46 @@ +# Logging config reference + +`AppLoggingConfig` controls which logger backend BatDetect2 uses for training, +evaluation, and inference. + +Defined in `batdetect2.logging`. + +## Top-level fields + +- `train` + - logger config for training runs. +- `evaluation` + - logger config for evaluation runs. +- `inference` + - logger config for inference runs. + +## Built-in logger backends + +Current built-in logger backends are: + +- `csv` +- `tensorboard` +- `mlflow` +- `dvclive` + +## Default behaviour + +By default: + +- training uses `csv`, +- evaluation uses `csv`, +- inference uses `csv`. + +With the CSV logger, training writes a `metrics.csv` file in the log folder. + +Example files live under `example_data/configs/`, including +`example_data/configs/logging.yaml`. + +## Related pages + +- Train command reference: + {doc}`cli/train` +- Evaluate command reference: + {doc}`cli/evaluate` +- Run inference on a folder: + {doc}`../tutorials/run-inference-on-folder` diff --git a/docs/source/reference/model-config.md b/docs/source/reference/model-config.md new file mode 100644 index 0000000..1fce1bb --- /dev/null +++ b/docs/source/reference/model-config.md @@ -0,0 +1,37 @@ +# Model config reference + +`ModelConfig` defines the model stack used for training or fresh model +construction. + +Defined in `batdetect2.models`. + +## Top-level fields + +- `samplerate` + - expected input sample rate. +- `architecture` + - backbone network settings. +- `preprocess` + - spectrogram preprocessing settings. +- `postprocess` + - decoding and output filtering settings. + +## What this config controls + +Use `ModelConfig` when you want to change things like: + +- the backbone architecture, +- the spectrogram settings used by the model, +- postprocessing settings stored with the model. + +Example files live under `example_data/configs/`, including +`example_data/configs/model.yaml`. + +## Related pages + +- Preprocessing config: + {doc}`preprocessing-config` +- Postprocess config: + {doc}`postprocess-config` +- Train command reference: + {doc}`cli/train` diff --git a/docs/source/reference/training-config.md b/docs/source/reference/training-config.md new file mode 100644 index 0000000..8626446 --- /dev/null +++ b/docs/source/reference/training-config.md @@ -0,0 +1,50 @@ +# Training config reference + +`TrainingConfig` controls the training loop, optimisation, data loading, losses, +and validation tasks. + +Defined in `batdetect2.train.config`. + +## Top-level fields + +- `train_loader` + - training data loading and clipping settings. +- `val_loader` + - validation data loading and clipping settings. +- `optimizer` + - optimiser type and learning rate settings. +- `scheduler` + - learning-rate schedule settings. +- `loss` + - detection, classification, and size loss settings. +- `trainer` + - PyTorch Lightning trainer settings such as `max_epochs`. +- `labels` + - target label generation settings. +- `validation` + - evaluation tasks used during validation. +- `checkpoints` + - checkpoint saving settings. + +## What this config controls + +Use `TrainingConfig` when you want to change things like: + +- batch size, +- augmentation, +- optimiser and scheduler settings, +- number of epochs, +- validation frequency, +- checkpoint behaviour. + +Example files live under `example_data/configs/`, including +`example_data/configs/training.yaml`. + +## Related pages + +- Evaluation config: + {doc}`evaluation-config` +- Train command reference: + {doc}`cli/train` +- Fine-tune from a checkpoint: + {doc}`../how_to/fine-tune-from-a-checkpoint` diff --git a/docs/source/tutorials/index.md b/docs/source/tutorials/index.md index a183e45..6ad6582 100644 --- a/docs/source/tutorials/index.md +++ b/docs/source/tutorials/index.md @@ -1,12 +1,14 @@ # Tutorials -Tutorials are the default learning path. +Welcome to the `batdetect2` tutorials. -Each tutorial follows one recommended route from start to finish. +These tutorials walk you step by step through the most common use cases and +workflows. +They follow the simplest route and are a good place to start with `batdetect2`. -Use tutorials when you want the simplest route to a concrete outcome. - -Use {doc}`../how_to/index` when you need to customize a workflow. +Use {doc}`../how_to/index` for focused guides on specific tasks, or +{doc}`../explanation/index` if you want to understand the concepts in more +depth. ```{toctree} :maxdepth: 1 diff --git a/docs/source/tutorials/train-a-custom-model.md b/docs/source/tutorials/train-a-custom-model.md index 3a1ff82..c1f2a34 100644 --- a/docs/source/tutorials/train-a-custom-model.md +++ b/docs/source/tutorials/train-a-custom-model.md @@ -1,85 +1,208 @@ -# Tutorial: Train a custom model +# Train a custom model -This tutorial walks through a first custom training run using your own -annotations. +This tutorial walks through a first custom training run using your own annotations. -This tutorial is for advanced users who already have dataset files and want to train a model on their own annotated data. +Use it when you already have labelled recordings and want to train a model for your own data. ## Before you start +You need: + - BatDetect2 installed. -- A training dataset config file. -- (Optional) A validation dataset config file. -- A targets config file if you are not using the default target setup. -- A model config file if you are not training from the built-in defaults. +- labelled recordings and annotations. ```{note} -This is not the first page to start with if you only want to run the existing model on recordings. +This is not the first page to start with if you only want to run the existing +model on recordings. Use {doc}`run-inference-on-folder` for that. ``` -## Outcome +## Optional: use the repository example files + +If you want to follow the steps with the same files shown here, clone the repository and move into it: + +```bash +git clone https://github.com/macaodha/batdetect2.git +cd batdetect2 +``` + +## What you will do By the end of this tutorial you will have: +- created a dataset config, +- defined a targets config, - started a training run, -- written checkpoints and logs, -- understood the minimum settings involved, -- identified the next pages for fine-tuning and evaluation. +- checked the checkpoint and log outputs, +- identified the next pages for evaluation and customisation. -## 1. Gather the minimum required inputs +## 1. Create a dataset config -At minimum, a custom training run needs: +The dataset config explicitly declares what data you want to use for training. +It is a YAML file. +If YAML is new to you, see [Learn YAML in Y Minutes](https://learnxinyminutes.com/yaml/). -- a training dataset config, -- optional validation dataset config, -- either a model config for a fresh run or a checkpoint for continued training, -- optional settings files for targets, audio, training, evaluation, inference, outputs, and logging. +In the dataset config, you list one or more data sources. +Each source tells `batdetect2` where the audio recordings live and where the matching annotations are stored. -The most important point is that the dataset file, target definitions, and preprocessing choices need to agree with each other. +BatDetect2 can read annotations from different source formats. +In this example, we use the example data in the `batdetect2` format. -## 2. Run a first training command +Use `example_data/dataset.yaml` as a reference: -Use a command like this for a fresh run: +```yaml +name: example dataset +description: Only for demonstration purposes +sources: + - format: batdetect2 + name: Example Data + description: Examples included for testing batdetect2 + annotations_dir: example_data/anns + audio_dir: example_data/audio +``` + +For your own project, the main thing to change is the file paths. +If you have several collections of recordings, you can add more than one source to the same dataset config. +That lets you describe the full training data you want to use in one place. + +If you need more detail on dataset source formats, see {doc}`../reference/data-sources`. + +## 2. Define a targets config + +The targets config tells BatDetect2 how to turn your annotations into training targets. + +It defines two main things: + +- what should count as a detection, +- which classes the model should learn to predict. + +In practice, this means the targets config maps the labels in your annotations to the detection and classification outputs used during training. + +Use `example_data/targets.yaml` as a reference: + +```yaml +detection_target: + name: bat + match_if: + name: all_of + conditions: + - name: has_tag + tag: { key: event, value: Echolocation } + - name: not + condition: + name: has_tag + tag: { key: class, value: Unknown } + assign_tags: + - key: class + value: Bat + +classification_targets: + - name: myomys + tags: + - key: class + value: Myotis mystacinus + - name: pippip + tags: + - key: class + value: Pipistrellus pipistrellus +``` + +For your own project, update the matching rules and class definitions so they fit your labels. + +In this example: + +- `detection_target` says that echolocation calls should be treated as detections, +- `classification_targets` define the classes the model should predict, + +It is worth taking a bit of time over this file, because your targets config decides what the model is actually being asked to learn. + +If you need help with that, see {doc}`../how_to/configure-target-definitions` and {doc}`../reference/targets-config-workflow`. + +## 3. Run a first training command + +For a first run, keep the command simple: ```bash batdetect2 train \ path/to/train_dataset.yaml \ --val-dataset path/to/val_dataset.yaml \ - --targets path/to/targets.yaml \ - --model-config path/to/model.yaml \ - --training-config path/to/training.yaml + --targets path/to/targets.yaml ``` -Use `--model` instead of `--model-config` when you want to continue from an existing checkpoint. +If you are using the repository example files, run: -## 3. Check that outputs are being written +```bash +batdetect2 train \ + example_data/dataset.yaml \ + --val-dataset example_data/dataset.yaml \ + --targets example_data/targets.yaml +``` -After the command starts, verify that: +This uses the same dataset for training and validation only to keep the example simple. +For real training runs, you usually want separate training and validation datasets. -- the run initializes without configuration errors, -- checkpoints are written to the checkpoint directory, -- logs are written to the log directory or configured logger backend, -- the training and validation datasets load as expected. +This uses the built-in default model and training settings. +If you want to change the model architecture later, see {doc}`../reference/model-config`. +If you want to change optimiser settings, batch size, epochs, or checkpoint behaviour, see {doc}`../reference/training-config`. -## 4. Run a sanity inference pass after training +## 4. Check the training outputs -Do not wait until full evaluation to confirm that the trained checkpoint behaves sensibly. +After the run starts, `batdetect2` should write checkpoints and logs. -Take a small reviewed subset of recordings and run a quick prediction pass with the new checkpoint. +By default, training logs are written with the CSV logger. +That means you should see a log folder with a `metrics.csv` file. -That catches setup mismatches early, especially around targets and preprocessing. +A typical layout looks like this: -## 5. Evaluate on held-out data +```text +outputs/ + checkpoints/ + epoch=19-step=20.ckpt + logs/ + version_0/ + metrics.csv + hparams.yaml + training_artifacts/ + train_dataset.yaml + val_dataset.yaml + targets.yaml + train_class_summary.csv + val_class_summary.csv +``` -Once the checkpoint looks sensible on a small sanity subset, run the formal evaluation workflow on a held-out test set. +The checkpoint is the trained model you can use later for inference, evaluation, or sharing with someone else. -That is where you should compare models, thresholds, and task-level performance metrics. +The files in `training_artifacts/` record which datasets and targets were used for the run. +The `hparams.yaml` file records the full training setup, including the configs used for the model, training, and other parts of the run. -## What to do next +The `metrics.csv` file stores one row per validation epoch. +It includes training losses as well as validation losses and metrics such as: + +```csv +classification/mean_average_precision,detection/average_precision,epoch,total_loss/val +0.10041624307632446,0.3697187900543213,0,4070.3515625 +0.11328697204589844,0.346899151802063,1,3941.6455078125 +0.1388484090566635,0.36171725392341614,2,3776.323974609375 +``` + +You may also see class-specific metrics in extra columns. + +The more detailed metrics are computed from the validation set. +If you do not provide `--val-dataset`, those validation metrics will not appear. + +Other logger backends are also supported, including TensorBoard, MLflow, and DVCLive. +See {doc}`../reference/logging-config` if you want to change that. + +## Use the trained model + +You can now use the trained checkpoint in BatDetect2, or share it with someone else to use in their own runs. +If you want to load it for inference or evaluation, see {doc}`../how_to/choose-a-model`. + +## Common next steps - Evaluate the trained checkpoint: {doc}`evaluate-on-a-test-set` - Fine-tune from a checkpoint: {doc}`../how_to/fine-tune-from-a-checkpoint` -- Configure targets: {doc}`../how_to/configure-target-definitions` -- Configure preprocessing: {doc}`../how_to/configure-audio-preprocessing` +- Configure targets in more detail: {doc}`../how_to/configure-target-definitions` +- Configure audio preprocessing: {doc}`../how_to/configure-audio-preprocessing` +- Configure spectrogram preprocessing: {doc}`../how_to/configure-spectrogram-preprocessing` - Check full train options: {doc}`../reference/cli/train` diff --git a/src/batdetect2/logging.py b/src/batdetect2/logging.py index 6376ae7..5837407 100644 --- a/src/batdetect2/logging.py +++ b/src/batdetect2/logging.py @@ -104,7 +104,7 @@ LoggerConfig = Annotated[ class AppLoggingConfig(BaseConfig): - train: LoggerConfig = Field(default_factory=TensorBoardLoggerConfig) + train: LoggerConfig = Field(default_factory=CSVLoggerConfig) evaluation: LoggerConfig = Field(default_factory=CSVLoggerConfig) inference: LoggerConfig = Field(default_factory=CSVLoggerConfig) diff --git a/src/batdetect2/train/train.py b/src/batdetect2/train/train.py index c1632d7..f59138b 100644 --- a/src/batdetect2/train/train.py +++ b/src/batdetect2/train/train.py @@ -10,9 +10,9 @@ from soundevent import data from batdetect2.audio import AudioConfig, AudioLoader, build_audio_loader from batdetect2.evaluate import EvaluatorProtocol, build_evaluator from batdetect2.logging import ( + CSVLoggerConfig, LoggerConfig, LoggingCallback, - TensorBoardLoggerConfig, build_logger, ) from batdetect2.models import ModelConfig, build_model @@ -165,7 +165,7 @@ def run_train( ) train_logger = build_logger( - logger_config or TensorBoardLoggerConfig(), + logger_config or CSVLoggerConfig(), log_dir=log_dir, experiment_name=experiment_name, run_name=run_name,