From 5760b6e017081540f97ee6dd4648bb59c38715aa Mon Sep 17 00:00:00 2001
From: mbsantiago <santiago.mbal@gmail.com>
Date: Thu, 7 May 2026 07:54:46 +0100
Subject: [PATCH] docs: expand training tutorial and config guidance

---
 docs/source/reference/configs.rst             |   3 +
 docs/source/reference/index.md                |   3 +
 docs/source/reference/logging-config.md       |  46 ++++
 docs/source/reference/model-config.md         |  37 ++++
 docs/source/reference/training-config.md      |  50 +++++
 docs/source/tutorials/index.md                |  12 +-
 docs/source/tutorials/train-a-custom-model.md | 207 ++++++++++++++----
 src/batdetect2/logging.py                     |   2 +-
 src/batdetect2/train/train.py                 |   4 +-
 9 files changed, 314 insertions(+), 50 deletions(-)
 create mode 100644 docs/source/reference/logging-config.md
 create mode 100644 docs/source/reference/model-config.md
 create mode 100644 docs/source/reference/training-config.md

diff --git a/docs/source/reference/configs.rst b/docs/source/reference/configs.rst
index 90261d1..f4c9312 100644
--- a/docs/source/reference/configs.rst
+++ b/docs/source/reference/configs.rst
@@ -5,6 +5,9 @@ BatDetect2 uses separate config objects for different workflow surfaces.
 
 Use the dedicated reference pages for each config family:
 
+- model config
+- training config
+- logging config
 - inference config
 - evaluation config
 - outputs config
diff --git a/docs/source/reference/index.md b/docs/source/reference/index.md
index cd09bc1..b6aaf2f 100644
--- a/docs/source/reference/index.md
+++ b/docs/source/reference/index.md
@@ -10,6 +10,9 @@ details, or Python API entries.
 
 cli/index
 api
+model-config
+training-config
+logging-config
 inference-config
 evaluation-config
 outputs-config
diff --git a/docs/source/reference/logging-config.md b/docs/source/reference/logging-config.md
new file mode 100644
index 0000000..33b36dc
--- /dev/null
+++ b/docs/source/reference/logging-config.md
@@ -0,0 +1,46 @@
+# Logging config reference
+
+`AppLoggingConfig` controls which logger backend BatDetect2 uses for training,
+evaluation, and inference.
+
+Defined in `batdetect2.logging`.
+
+## Top-level fields
+
+- `train`
+  - logger config for training runs.
+- `evaluation`
+  - logger config for evaluation runs.
+- `inference`
+  - logger config for inference runs.
+
+## Built-in logger backends
+
+Current built-in logger backends are:
+
+- `csv`
+- `tensorboard`
+- `mlflow`
+- `dvclive`
+
+## Default behaviour
+
+By default:
+
+- training uses `csv`,
+- evaluation uses `csv`,
+- inference uses `csv`.
+
+With the CSV logger, training writes a `metrics.csv` file in the log folder.
+
+Example files live under `example_data/configs/`, including
+`example_data/configs/logging.yaml`.
+
+## Related pages
+
+- Train command reference:
+  {doc}`cli/train`
+- Evaluate command reference:
+  {doc}`cli/evaluate`
+- Run inference on a folder:
+  {doc}`../tutorials/run-inference-on-folder`
diff --git a/docs/source/reference/model-config.md b/docs/source/reference/model-config.md
new file mode 100644
index 0000000..1fce1bb
--- /dev/null
+++ b/docs/source/reference/model-config.md
@@ -0,0 +1,37 @@
+# Model config reference
+
+`ModelConfig` defines the model stack used for training or fresh model
+construction.
+
+Defined in `batdetect2.models`.
+
+## Top-level fields
+
+- `samplerate`
+  - expected input sample rate.
+- `architecture`
+  - backbone network settings.
+- `preprocess`
+  - spectrogram preprocessing settings.
+- `postprocess`
+  - decoding and output filtering settings.
+
+## What this config controls
+
+Use `ModelConfig` when you want to change things like:
+
+- the backbone architecture,
+- the spectrogram settings used by the model,
+- postprocessing settings stored with the model.
+
+Example files live under `example_data/configs/`, including
+`example_data/configs/model.yaml`.
+
+## Related pages
+
+- Preprocessing config:
+  {doc}`preprocessing-config`
+- Postprocess config:
+  {doc}`postprocess-config`
+- Train command reference:
+  {doc}`cli/train`
diff --git a/docs/source/reference/training-config.md b/docs/source/reference/training-config.md
new file mode 100644
index 0000000..8626446
--- /dev/null
+++ b/docs/source/reference/training-config.md
@@ -0,0 +1,50 @@
+# Training config reference
+
+`TrainingConfig` controls the training loop, optimisation, data loading, losses,
+and validation tasks.
+
+Defined in `batdetect2.train.config`.
+
+## Top-level fields
+
+- `train_loader`
+  - training data loading and clipping settings.
+- `val_loader`
+  - validation data loading and clipping settings.
+- `optimizer`
+  - optimiser type and learning rate settings.
+- `scheduler`
+  - learning-rate schedule settings.
+- `loss`
+  - detection, classification, and size loss settings.
+- `trainer`
+  - PyTorch Lightning trainer settings such as `max_epochs`.
+- `labels`
+  - target label generation settings.
+- `validation`
+  - evaluation tasks used during validation.
+- `checkpoints`
+  - checkpoint saving settings.
+
+## What this config controls
+
+Use `TrainingConfig` when you want to change things like:
+
+- batch size,
+- augmentation,
+- optimiser and scheduler settings,
+- number of epochs,
+- validation frequency,
+- checkpoint behaviour.
+
+Example files live under `example_data/configs/`, including
+`example_data/configs/training.yaml`.
+
+## Related pages
+
+- Evaluation config:
+  {doc}`evaluation-config`
+- Train command reference:
+  {doc}`cli/train`
+- Fine-tune from a checkpoint:
+  {doc}`../how_to/fine-tune-from-a-checkpoint`
diff --git a/docs/source/tutorials/index.md b/docs/source/tutorials/index.md
index a183e45..6ad6582 100644
--- a/docs/source/tutorials/index.md
+++ b/docs/source/tutorials/index.md
@@ -1,12 +1,14 @@
 # Tutorials
 
-Tutorials are the default learning path.
+Welcome to the `batdetect2` tutorials.
 
-Each tutorial follows one recommended route from start to finish.
+These tutorials walk you step by step through the most common use cases and
+workflows.
+They follow the simplest route and are a good place to start with `batdetect2`.
 
-Use tutorials when you want the simplest route to a concrete outcome.
-
-Use {doc}`../how_to/index` when you need to customize a workflow.
+Use {doc}`../how_to/index` for focused guides on specific tasks, or
+{doc}`../explanation/index` if you want to understand the concepts in more
+depth.
 
 ```{toctree}
 :maxdepth: 1
diff --git a/docs/source/tutorials/train-a-custom-model.md b/docs/source/tutorials/train-a-custom-model.md
index 3a1ff82..c1f2a34 100644
--- a/docs/source/tutorials/train-a-custom-model.md
+++ b/docs/source/tutorials/train-a-custom-model.md
@@ -1,85 +1,208 @@
-# Tutorial: Train a custom model
+# Train a custom model
 
-This tutorial walks through a first custom training run using your own
-annotations.
+This tutorial walks through a first custom training run using your own annotations.
 
-This tutorial is for advanced users who already have dataset files and want to train a model on their own annotated data.
+Use it when you already have labelled recordings and want to train a model for your own data.
 
 ## Before you start
 
+You need:
+
 - BatDetect2 installed.
-- A training dataset config file.
-- (Optional) A validation dataset config file.
-- A targets config file if you are not using the default target setup.
-- A model config file if you are not training from the built-in defaults.
+- labelled recordings and annotations.
 
 ```{note}
-This is not the first page to start with if you only want to run the existing model on recordings.
+This is not the first page to start with if you only want to run the existing
+model on recordings.
 Use {doc}`run-inference-on-folder` for that.
 ```
 
-## Outcome
+## Optional: use the repository example files
+
+If you want to follow the steps with the same files shown here, clone the repository and move into it:
+
+```bash
+git clone https://github.com/macaodha/batdetect2.git
+cd batdetect2
+```
+
+## What you will do
 
 By the end of this tutorial you will have:
 
+- created a dataset config,
+- defined a targets config,
 - started a training run,
-- written checkpoints and logs,
-- understood the minimum settings involved,
-- identified the next pages for fine-tuning and evaluation.
+- checked the checkpoint and log outputs,
+- identified the next pages for evaluation and customisation.
 
-## 1. Gather the minimum required inputs
+## 1. Create a dataset config
 
-At minimum, a custom training run needs:
+The dataset config explicitly declares what data you want to use for training.
+It is a YAML file.
+If YAML is new to you, see [Learn YAML in Y Minutes](https://learnxinyminutes.com/yaml/).
 
-- a training dataset config,
-- optional validation dataset config,
-- either a model config for a fresh run or a checkpoint for continued training,
-- optional settings files for targets, audio, training, evaluation, inference, outputs, and logging.
+In the dataset config, you list one or more data sources.
+Each source tells `batdetect2` where the audio recordings live and where the matching annotations are stored.
 
-The most important point is that the dataset file, target definitions, and preprocessing choices need to agree with each other.
+BatDetect2 can read annotations from different source formats.
+In this example, we use the example data in the `batdetect2` format.
 
-## 2. Run a first training command
+Use `example_data/dataset.yaml` as a reference:
 
-Use a command like this for a fresh run:
+```yaml
+name: example dataset
+description: Only for demonstration purposes
+sources:
+  - format: batdetect2
+    name: Example Data
+    description: Examples included for testing batdetect2
+    annotations_dir: example_data/anns
+    audio_dir: example_data/audio
+```
+
+For your own project, the main thing to change is the file paths.
+If you have several collections of recordings, you can add more than one source to the same dataset config.
+That lets you describe the full training data you want to use in one place.
+
+If you need more detail on dataset source formats, see {doc}`../reference/data-sources`.
+
+## 2. Define a targets config
+
+The targets config tells BatDetect2 how to turn your annotations into training targets.
+
+It defines two main things:
+
+- what should count as a detection,
+- which classes the model should learn to predict.
+
+In practice, this means the targets config maps the labels in your annotations to the detection and classification outputs used during training.
+
+Use `example_data/targets.yaml` as a reference:
+
+```yaml
+detection_target:
+  name: bat
+  match_if:
+    name: all_of
+    conditions:
+      - name: has_tag
+        tag: { key: event, value: Echolocation }
+      - name: not
+        condition:
+          name: has_tag
+          tag: { key: class, value: Unknown }
+  assign_tags:
+    - key: class
+      value: Bat
+
+classification_targets:
+  - name: myomys
+    tags:
+      - key: class
+        value: Myotis mystacinus
+  - name: pippip
+    tags:
+      - key: class
+        value: Pipistrellus pipistrellus
+```
+
+For your own project, update the matching rules and class definitions so they fit your labels.
+
+In this example:
+
+- `detection_target` says that echolocation calls should be treated as detections,
+- `classification_targets` define the classes the model should predict,
+
+It is worth taking a bit of time over this file, because your targets config decides what the model is actually being asked to learn.
+
+If you need help with that, see {doc}`../how_to/configure-target-definitions` and {doc}`../reference/targets-config-workflow`.
+
+## 3. Run a first training command
+
+For a first run, keep the command simple:
 
 ```bash
 batdetect2 train \
   path/to/train_dataset.yaml \
   --val-dataset path/to/val_dataset.yaml \
-  --targets path/to/targets.yaml \
-  --model-config path/to/model.yaml \
-  --training-config path/to/training.yaml
+  --targets path/to/targets.yaml
 ```
 
-Use `--model` instead of `--model-config` when you want to continue from an existing checkpoint.
+If you are using the repository example files, run:
 
-## 3. Check that outputs are being written
+```bash
+batdetect2 train \
+  example_data/dataset.yaml \
+  --val-dataset example_data/dataset.yaml \
+  --targets example_data/targets.yaml
+```
 
-After the command starts, verify that:
+This uses the same dataset for training and validation only to keep the example simple.
+For real training runs, you usually want separate training and validation datasets.
 
-- the run initializes without configuration errors,
-- checkpoints are written to the checkpoint directory,
-- logs are written to the log directory or configured logger backend,
-- the training and validation datasets load as expected.
+This uses the built-in default model and training settings.
+If you want to change the model architecture later, see {doc}`../reference/model-config`.
+If you want to change optimiser settings, batch size, epochs, or checkpoint behaviour, see {doc}`../reference/training-config`.
 
-## 4. Run a sanity inference pass after training
+## 4. Check the training outputs
 
-Do not wait until full evaluation to confirm that the trained checkpoint behaves sensibly.
+After the run starts, `batdetect2` should write checkpoints and logs.
 
-Take a small reviewed subset of recordings and run a quick prediction pass with the new checkpoint.
+By default, training logs are written with the CSV logger.
+That means you should see a log folder with a `metrics.csv` file.
 
-That catches setup mismatches early, especially around targets and preprocessing.
+A typical layout looks like this:
 
-## 5. Evaluate on held-out data
+```text
+outputs/
+  checkpoints/
+    epoch=19-step=20.ckpt
+  logs/
+    version_0/
+      metrics.csv
+      hparams.yaml
+    training_artifacts/
+      train_dataset.yaml
+      val_dataset.yaml
+      targets.yaml
+      train_class_summary.csv
+      val_class_summary.csv
+```
 
-Once the checkpoint looks sensible on a small sanity subset, run the formal evaluation workflow on a held-out test set.
+The checkpoint is the trained model you can use later for inference, evaluation, or sharing with someone else.
 
-That is where you should compare models, thresholds, and task-level performance metrics.
+The files in `training_artifacts/` record which datasets and targets were used for the run.
+The `hparams.yaml` file records the full training setup, including the configs used for the model, training, and other parts of the run.
 
-## What to do next
+The `metrics.csv` file stores one row per validation epoch.
+It includes training losses as well as validation losses and metrics such as:
+
+```csv
+classification/mean_average_precision,detection/average_precision,epoch,total_loss/val
+0.10041624307632446,0.3697187900543213,0,4070.3515625
+0.11328697204589844,0.346899151802063,1,3941.6455078125
+0.1388484090566635,0.36171725392341614,2,3776.323974609375
+```
+
+You may also see class-specific metrics in extra columns.
+
+The more detailed metrics are computed from the validation set.
+If you do not provide `--val-dataset`, those validation metrics will not appear.
+
+Other logger backends are also supported, including TensorBoard, MLflow, and DVCLive.
+See {doc}`../reference/logging-config` if you want to change that.
+
+## Use the trained model
+
+You can now use the trained checkpoint in BatDetect2, or share it with someone else to use in their own runs.
+If you want to load it for inference or evaluation, see {doc}`../how_to/choose-a-model`.
+
+## Common next steps
 
 - Evaluate the trained checkpoint: {doc}`evaluate-on-a-test-set`
 - Fine-tune from a checkpoint: {doc}`../how_to/fine-tune-from-a-checkpoint`
-- Configure targets: {doc}`../how_to/configure-target-definitions`
-- Configure preprocessing: {doc}`../how_to/configure-audio-preprocessing`
+- Configure targets in more detail: {doc}`../how_to/configure-target-definitions`
+- Configure audio preprocessing: {doc}`../how_to/configure-audio-preprocessing`
+- Configure spectrogram preprocessing: {doc}`../how_to/configure-spectrogram-preprocessing`
 - Check full train options: {doc}`../reference/cli/train`
diff --git a/src/batdetect2/logging.py b/src/batdetect2/logging.py
index 6376ae7..5837407 100644
--- a/src/batdetect2/logging.py
+++ b/src/batdetect2/logging.py
@@ -104,7 +104,7 @@ LoggerConfig = Annotated[
 
 
 class AppLoggingConfig(BaseConfig):
-    train: LoggerConfig = Field(default_factory=TensorBoardLoggerConfig)
+    train: LoggerConfig = Field(default_factory=CSVLoggerConfig)
     evaluation: LoggerConfig = Field(default_factory=CSVLoggerConfig)
     inference: LoggerConfig = Field(default_factory=CSVLoggerConfig)
 
diff --git a/src/batdetect2/train/train.py b/src/batdetect2/train/train.py
index c1632d7..f59138b 100644
--- a/src/batdetect2/train/train.py
+++ b/src/batdetect2/train/train.py
@@ -10,9 +10,9 @@ from soundevent import data
 from batdetect2.audio import AudioConfig, AudioLoader, build_audio_loader
 from batdetect2.evaluate import EvaluatorProtocol, build_evaluator
 from batdetect2.logging import (
+    CSVLoggerConfig,
     LoggerConfig,
     LoggingCallback,
-    TensorBoardLoggerConfig,
     build_logger,
 )
 from batdetect2.models import ModelConfig, build_model
@@ -165,7 +165,7 @@ def run_train(
     )
 
     train_logger = build_logger(
-        logger_config or TensorBoardLoggerConfig(),
+        logger_config or CSVLoggerConfig(),
         log_dir=log_dir,
         experiment_name=experiment_name,
         run_name=run_name,