Remove rogue print

Fix validation is_in_subclip after encoder changes
Add load dataset config function
2026-01-11 17:29:34 +01:00 · 2025-06-24 06:27:41 -06:00 · 2025-06-24 06:26:08 -06:00 · 2025-06-24 12:40:29 +01:00 · 2025-06-23 19:08:55 +01:00 · 2025-06-23 18:53:15 +01:00
110 changed files with 1844 additions and 869 deletions
--- a/.gitignore
+++ b/.gitignore
@ -107,7 +107,7 @@ experiments/*
 # DO Include
 !batdetect2_notebook.ipynb
-!batdetect2/models/checkpoints/*.pth.tar
+!src/batdetect2/models/checkpoints/*.pth.tar
 !tests/data/*.wav
 !notebooks/*.ipynb
 !tests/data/**/*.wav
--- a/4
+++ b/4
@ -1,7 +1,7 @@
 # Variables
-SOURCE_DIR = batdetect2
+SOURCE_DIR = src
 TESTS_DIR = tests
-PYTHON_DIRS = batdetect2 tests
+PYTHON_DIRS = src tests
 DOCS_SOURCE = docs/source
 DOCS_BUILD = docs/build
 HTML_COVERAGE_DIR = htmlcov
--- a/batdetect2/targets/rois.py
+++ b/batdetect2/targets/rois.py
@ -1,511 +0,0 @@
 """Handles mapping between geometric ROIs and target representations.
 This module defines the interface and provides implementation for converting
 a sound event's Region of Interest (ROI), typically represented by a
 `soundevent.data.Geometry` object like a `BoundingBox`, into a format
 suitable for use as a machine learning target. This usually involves:
 1.  Extracting a single reference point (time, frequency) from the geometry.
 2.  Calculating relevant size dimensions (e.g., duration/width,
    bandwidth/height) and applying scaling factors.
 It also provides the inverse operation: recovering an approximate geometric ROI
 (like a `BoundingBox`) from a predicted reference point and predicted size
 dimensions.
 This logic is encapsulated within components adhering to the `ROITargetMapper`
 protocol. Configuration for this mapping (e.g., which reference point to use,
 scaling factors) is managed by the `ROIConfig`. This module separates the
 *geometric* aspect of target definition from the *semantic* classification
 handled in `batdetect2.targets.classes`.
 """
 from typing import List, Literal, Optional, Protocol, Tuple
 import numpy as np
 from soundevent import data
 from batdetect2.configs import BaseConfig, load_config
 Positions = Literal[
    "bottom-left",
    "bottom-right",
    "top-left",
    "top-right",
    "center-left",
    "center-right",
    "top-center",
    "bottom-center",
    "center",
    "centroid",
    "point_on_surface",
 ]
 __all__ = [
    "ROITargetMapper",
    "ROIConfig",
    "BBoxEncoder",
    "build_roi_mapper",
    "load_roi_mapper",
    "DEFAULT_POSITION",
    "SIZE_WIDTH",
    "SIZE_HEIGHT",
    "SIZE_ORDER",
    "DEFAULT_TIME_SCALE",
    "DEFAULT_FREQUENCY_SCALE",
 ]
 SIZE_WIDTH = "width"
 """Standard name for the width/time dimension component ('width')."""
 SIZE_HEIGHT = "height"
 """Standard name for the height/frequency dimension component ('height')."""
 SIZE_ORDER = (SIZE_WIDTH, SIZE_HEIGHT)
 """Standard order of dimensions for size arrays ([width, height])."""
 DEFAULT_TIME_SCALE = 1000.0
 """Default scaling factor for time duration."""
 DEFAULT_FREQUENCY_SCALE = 1 / 859.375
 """Default scaling factor for frequency bandwidth."""
 DEFAULT_POSITION = "bottom-left"
 """Default reference position within the geometry ('bottom-left' corner)."""
 class ROITargetMapper(Protocol):
    """Protocol defining the interface for ROI-to-target mapping.
    Specifies the methods required for converting a geometric region of interest
    (`soundevent.data.Geometry`) into a target representation (reference point
    and scaled dimensions) and for recovering an approximate ROI from that
    representation.
    Attributes
    ----------
    dimension_names : List[str]
        A list containing the names of the dimensions returned by
        `get_roi_size` and expected by `recover_roi`
        (e.g., ['width', 'height']).
    """
    dimension_names: List[str]
    def get_roi_position(
        self,
        geom: data.Geometry,
        position: Optional[Positions] = None,
    ) -> tuple[float, float]:
        """Extract the reference position from a geometry.
        Parameters
        ----------
        geom : soundevent.data.Geometry
            The input geometry (e.g., BoundingBox, Polygon).
        position : Positions, optional
            Overrides the default `position` configured for the mapper.
            If provided, this position will be used instead of the mapper's
            internal default.
        Returns
        -------
        Tuple[float, float]
            The calculated reference position as (time, frequency) coordinates,
            based on the implementing class's configuration (e.g., "center",
            "bottom-left").
        Raises
        ------
        ValueError
            If the position cannot be calculated for the given geometry type
            or configured reference point.
        """
        ...
    def get_roi_size(self, geom: data.Geometry) -> np.ndarray:
        """Calculate the scaled target dimensions from a geometry.
        Computes the relevant size measures.
        Parameters
        ----------
        geom : soundevent.data.Geometry
            The input geometry.
        Returns
        -------
        np.ndarray
            A NumPy array containing the scaled dimensions corresponding to
            `dimension_names`. For bounding boxes, typically contains
            `[scaled_width, scaled_height]`.
        Raises
        ------
        TypeError, ValueError
            If the size cannot be computed for the given geometry type.
        """
        ...
    def recover_roi(
        self,
        pos: tuple[float, float],
        dims: np.ndarray,
        position: Optional[Positions] = None,
    ) -> data.Geometry:
        """Recover an approximate ROI from a position and target dimensions.
        Performs the inverse mapping: takes a reference position and the
        predicted dimensions and reconstructs a geometric representation.
        Parameters
        ----------
        pos : Tuple[float, float]
            The reference position (time, frequency).
        dims : np.ndarray
            NumPy array containing the dimensions, matching the order
            specified by `dimension_names`.
        position : Positions, optional
            Overrides the default `position` configured for the mapper.
            If provided, this position will be used instead of the mapper's
            internal default when reconstructing the roi geometry.
        Returns
        -------
        soundevent.data.Geometry
            The reconstructed geometry.
        Raises
        ------
        ValueError
            If the number of provided dimensions `dims` does not match
            `dimension_names` or if reconstruction fails.
        """
        ...
 class ROIConfig(BaseConfig):
    """Configuration for mapping Regions of Interest (ROIs).
    Defines parameters controlling how geometric ROIs are converted into
    target representations (reference points and scaled sizes).
    Attributes
    ----------
    position : Positions, default="bottom-left"
        Specifies the reference point within the geometry (e.g., bounding box)
        to use as the target location (e.g., "center", "bottom-left").
        See `soundevent.geometry.operations.Positions`.
    time_scale : float, default=1000.0
        Scaling factor applied to the time duration (width) of the ROI
        when calculating the target size representation. Must match model
        expectations.
    frequency_scale : float, default=1/859.375
        Scaling factor applied to the frequency bandwidth (height) of the ROI
        when calculating the target size representation. Must match model
        expectations.
    """
    position: Positions = DEFAULT_POSITION
    time_scale: float = DEFAULT_TIME_SCALE
    frequency_scale: float = DEFAULT_FREQUENCY_SCALE
 class BBoxEncoder(ROITargetMapper):
    """Concrete implementation of `ROITargetMapper` focused on Bounding Boxes.
    This class implements the ROI mapping protocol primarily for
    `soundevent.data.BoundingBox` geometry. It extracts reference points,
    calculates scaled width/height, and recovers bounding boxes based on
    configured position and scaling factors.
    Attributes
    ----------
    dimension_names : List[str]
        Specifies the output dimension names as ['width', 'height'].
    position : Positions
        The configured reference point type (e.g., "center", "bottom-left").
    time_scale : float
        The configured scaling factor for the time dimension (width).
    frequency_scale : float
        The configured scaling factor for the frequency dimension (height).
    """
    dimension_names = [SIZE_WIDTH, SIZE_HEIGHT]
    def __init__(
        self,
        position: Positions = DEFAULT_POSITION,
        time_scale: float = DEFAULT_TIME_SCALE,
        frequency_scale: float = DEFAULT_FREQUENCY_SCALE,
    ):
        """Initialize the BBoxEncoder.
        Parameters
        ----------
        position : Positions, default="bottom-left"
            Reference point type within the bounding box.
        time_scale : float, default=1000.0
            Scaling factor for time duration (width).
        frequency_scale : float, default=1/859.375
            Scaling factor for frequency bandwidth (height).
        """
        self.position: Positions = position
        self.time_scale = time_scale
        self.frequency_scale = frequency_scale
    def get_roi_position(
        self,
        geom: data.Geometry,
        position: Optional[Positions] = None,
    ) -> Tuple[float, float]:
        """Extract the configured reference position from the geometry.
        Uses `soundevent.geometry.get_geometry_point`.
        Parameters
        ----------
        geom : soundevent.data.Geometry
            Input geometry (e.g., BoundingBox).
        position : Positions, optional
            Overrides the default `position` configured for the encoder.
            If provided, this position will be used instead of `self.position`.
        Returns
        -------
        Tuple[float, float]
            Reference position (time, frequency).
        """
        from soundevent import geometry
        position = position or self.position
        return geometry.get_geometry_point(geom, position=position)
    def get_roi_size(self, geom: data.Geometry) -> np.ndarray:
        """Calculate the scaled [width, height] from the geometry's bounds.
        Computes the bounding box, extracts duration and bandwidth, and applies
        the configured `time_scale` and `frequency_scale`.
        Parameters
        ----------
        geom : soundevent.data.Geometry
            Input geometry.
        Returns
        -------
        np.ndarray
            A 1D NumPy array: `[scaled_width, scaled_height]`.
        """
        from soundevent import geometry
        start_time, low_freq, end_time, high_freq = geometry.compute_bounds(
            geom
        )
        return np.array(
            [
                (end_time - start_time) * self.time_scale,
                (high_freq - low_freq) * self.frequency_scale,
            ]
        )
    def recover_roi(
        self,
        pos: tuple[float, float],
        dims: np.ndarray,
        position: Optional[Positions] = None,
    ) -> data.Geometry:
        """Recover a BoundingBox from a position and scaled dimensions.
        Un-scales the input dimensions using the configured factors and
        reconstructs a `soundevent.data.BoundingBox` centered or anchored at
        the given reference `pos` according to the configured `position` type.
        Parameters
        ----------
        pos : Tuple[float, float]
            Reference position (time, frequency).
        dims : np.ndarray
            NumPy array containing the *scaled* dimensions, expected order is
            [scaled_width, scaled_height].
        position : Positions, optional
            Overrides the default `position` configured for the encoder.
            If provided, this position will be used instead of `self.position`
            when reconstructing the bounding box.
        Returns
        -------
        soundevent.data.BoundingBox
            The reconstructed bounding box.
        Raises
        ------
        ValueError
            If `dims` does not have the expected shape (length 2).
        """
        position = position or self.position
        if dims.ndim != 1 or dims.shape[0] != 2:
            raise ValueError(
                "Dimension array does not have the expected shape. "
                f"({dims.shape = }) != ([2])"
            )
        width, height = dims
        return _build_bounding_box(
            pos,
            duration=float(width) / self.time_scale,
            bandwidth=float(height) / self.frequency_scale,
            position=self.position,
        )
 def build_roi_mapper(config: ROIConfig) -> ROITargetMapper:
    """Factory function to create an ROITargetMapper from configuration.
    Currently creates a `BBoxEncoder` instance based on the provided
    `ROIConfig`.
    Parameters
    ----------
    config : ROIConfig
        Configuration object specifying ROI mapping parameters.
    Returns
    -------
    ROITargetMapper
        An initialized `BBoxEncoder` instance configured with the settings
        from `config`.
    """
    return BBoxEncoder(
        position=config.position,
        time_scale=config.time_scale,
        frequency_scale=config.frequency_scale,
    )
 def load_roi_mapper(
    path: data.PathLike, field: Optional[str] = None
 ) -> ROITargetMapper:
    """Load ROI mapping configuration from a file and build the mapper.
    Convenience function that loads an `ROIConfig` from the specified file
    (and optional field) and then uses `build_roi_mapper` to create the
    corresponding `ROITargetMapper` instance.
    Parameters
    ----------
    path : PathLike
        Path to the configuration file (e.g., YAML).
    field : str, optional
        Dot-separated path to a nested section within the file containing the
        ROI configuration. If None, the entire file content is used.
    Returns
    -------
    ROITargetMapper
        An initialized ROI mapper instance based on the configuration file.
    Raises
    ------
    FileNotFoundError, yaml.YAMLError, pydantic.ValidationError, KeyError,
    TypeError
        If the configuration file cannot be found, parsed, validated, or if
        the specified `field` is invalid.
    """
    config = load_config(path=path, schema=ROIConfig, field=field)
    return build_roi_mapper(config)
 VALID_POSITIONS = [
    "bottom-left",
    "bottom-right",
    "top-left",
    "top-right",
    "center-left",
    "center-right",
    "top-center",
    "bottom-center",
    "center",
    "centroid",
    "point_on_surface",
 ]
 def _build_bounding_box(
    pos: tuple[float, float],
    duration: float,
    bandwidth: float,
    position: Positions = DEFAULT_POSITION,
 ) -> data.BoundingBox:
    """Construct a BoundingBox from a reference point, size, and position type.
    Internal helper for `BBoxEncoder.recover_roi`. Calculates the box
    coordinates [start_time, low_freq, end_time, high_freq] based on where
    the input `pos` (time, freq) is located relative to the box (e.g.,
    center, corner).
    Parameters
    ----------
    pos : Tuple[float, float]
        Reference position (time, frequency).
    duration : float
        The required *unscaled* duration (width) of the bounding box.
    bandwidth : float
        The required *unscaled* frequency bandwidth (height) of the bounding
        box.
    position : Positions, default="bottom-left"
        Specifies which part of the bounding box the input `pos` corresponds to.
    Returns
    -------
    data.BoundingBox
        The constructed bounding box object.
    Raises
    ------
    ValueError
        If `position` is not a recognized value or format.
    """
    time, freq = map(float, pos)
    duration = max(0, duration)
    bandwidth = max(0, bandwidth)
    if position in ["center", "centroid", "point_on_surface"]:
        return data.BoundingBox(
            coordinates=[
                max(time - duration / 2, 0),
                max(freq - bandwidth / 2, 0),
                max(time + duration / 2, 0),
                max(freq + bandwidth / 2, 0),
            ]
        )
    if position not in VALID_POSITIONS:
        raise ValueError(
            f"Invalid position: {position}. "
            f"Valid options are: {VALID_POSITIONS}"
        )
    y, x = position.split("-")
    start_time = {
        "left": time,
        "center": time - duration / 2,
        "right": time - duration,
    }[x]
    low_freq = {
        "bottom": freq,
        "center": freq - bandwidth / 2,
        "top": freq - bandwidth,
    }[y]
    return data.BoundingBox(
        coordinates=[
            max(0, start_time),
            max(0, low_freq),
            max(0, start_time + duration),
            max(0, low_freq + bandwidth),
        ]
    )
--- a/notebooks/signal_generation.py
+++ b/notebooks/signal_generation.py
@ -0,0 +1,19 @@
 import marimo
 __generated_with = "0.13.15"
 app = marimo.App(width="medium")
@app.cell
 def _():
    from batdetect2.preprocess import build_preprocessor
    return
@app.cell
 def _():
    return
 if __name__ == "__main__":
    app.run()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -65,8 +65,12 @@ build-backend = "hatchling.build"
 [project.scripts]
 batdetect2 = "batdetect2.cli:cli"
-[tool.uv]
+[dependency-groups]
-dev-dependencies = [
+jupyter = ["ipywidgets>=8.1.5", "jupyter>=1.1.1"]
 marimo = [
    "marimo>=0.12.2",
 ]
 dev = [
  "debugpy>=1.8.8",
  "hypothesis>=6.118.7",
  "pytest>=7.2.2",
@ -98,25 +102,16 @@ select = ["E4", "E7", "E9", "F", "B", "Q", "I", "NPY201"]
 convention = "numpy"
 [tool.pyright]
-include = ["batdetect2", "tests"]
+include = ["src", "tests"]
 pythonVersion = "3.9"
 pythonPlatform = "All"
 exclude = [
-  "batdetect2/detector/",
+  "src/batdetect2/detector/",
-  "batdetect2/finetune",
+  "src/batdetect2/finetune",
-  "batdetect2/utils",
+  "src/batdetect2/utils",
-  "batdetect2/plotting",
+  "src/batdetect2/plotting",
-  "batdetect2/plot",
+  "src/batdetect2/plot",
-  "batdetect2/api",
+  "src/batdetect2/api",
-  "batdetect2/evaluate/legacy",
+  "src/batdetect2/evaluate/legacy",
-  "batdetect2/train/legacy",
+  "src/batdetect2/train/legacy",
 ]
 [dependency-groups]
 jupyter = [
    "ipywidgets>=8.1.5",
    "jupyter>=1.1.1",
 ]
 marimo = [
    "marimo>=0.12.2",
 ]
--- a/src/batdetect2/init.py
+++ b/src/batdetect2/init.py
--- a/src/batdetect2/api.py
+++ b/src/batdetect2/api.py
--- a/src/batdetect2/cli/init.py
+++ b/src/batdetect2/cli/init.py
--- a/src/batdetect2/cli/ascii.py
+++ b/src/batdetect2/cli/ascii.py
--- a/src/batdetect2/cli/base.py
+++ b/src/batdetect2/cli/base.py
--- a/src/batdetect2/cli/compat.py
+++ b/src/batdetect2/cli/compat.py
--- a/src/batdetect2/cli/data.py
+++ b/src/batdetect2/cli/data.py
--- a/src/batdetect2/cli/preprocess.py
+++ b/src/batdetect2/cli/preprocess.py
--- a/src/batdetect2/cli/train.py
+++ b/src/batdetect2/cli/train.py
@ -189,8 +189,7 @@ def train_command(
            config=postprocess_config_loaded,
        )
        logger.debug(
-            "Loaded postprocessor from file {path}",
+            "Loaded postprocessor from file {path}", path=postprocess_config
            path=train_config,
        )
    except IOError:
        logger.debug(
--- a/src/batdetect2/compat/init.py
+++ b/src/batdetect2/compat/init.py
--- a/src/batdetect2/compat/data.py
+++ b/src/batdetect2/compat/data.py
--- a/src/batdetect2/compat/params.py
+++ b/src/batdetect2/compat/params.py
--- a/src/batdetect2/configs.py
+++ b/src/batdetect2/configs.py
@ -157,4 +157,4 @@ def load_config(
    if field:
        config = get_object_field(config, field)
-    return schema.model_validate(config)
+    return schema.model_validate(config or {})
--- a/src/batdetect2/data/init.py
+++ b/src/batdetect2/data/init.py
@ -8,6 +8,7 @@ from batdetect2.data.annotations import (
 from batdetect2.data.datasets import (
    DatasetConfig,
    load_dataset,
    load_dataset_config,
    load_dataset_from_config,
 )
@ -19,5 +20,6 @@ __all__ = [
    "DatasetConfig",
    "load_annotated_dataset",
    "load_dataset",
    "load_dataset_config",
    "load_dataset_from_config",
 ]
--- a/src/batdetect2/data/annotations/init.py
+++ b/src/batdetect2/data/annotations/init.py
--- a/src/batdetect2/data/annotations/aoef.py
+++ b/src/batdetect2/data/annotations/aoef.py
--- a/src/batdetect2/data/annotations/batdetect2.py
+++ b/src/batdetect2/data/annotations/batdetect2.py
--- a/src/batdetect2/data/annotations/legacy.py
+++ b/src/batdetect2/data/annotations/legacy.py
--- a/src/batdetect2/data/annotations/types.py
+++ b/src/batdetect2/data/annotations/types.py
--- a/src/batdetect2/data/datasets.py
+++ b/src/batdetect2/data/datasets.py
@ -161,6 +161,11 @@ def insert_source_tag(
    )
 # TODO: add documentation
 def load_dataset_config(path: data.PathLike, field: Optional[str] = None):
    return load_config(path=path, schema=DatasetConfig, field=field)
 def load_dataset_from_config(
    path: data.PathLike,
    field: Optional[str] = None,
--- a/src/batdetect2/data/iterators.py
+++ b/src/batdetect2/data/iterators.py
@ -72,7 +72,7 @@ def iterate_over_sound_events(
                    sound_event_annotation
                )
-            class_name = targets.encode(sound_event_annotation)
+            class_name = targets.encode_class(sound_event_annotation)
            if class_name is None and exclude_generic:
                continue
--- a/src/batdetect2/detector/init.py
+++ b/src/batdetect2/detector/init.py
--- a/src/batdetect2/detector/compute_features.py
+++ b/src/batdetect2/detector/compute_features.py
--- a/src/batdetect2/detector/model_helpers.py
+++ b/src/batdetect2/detector/model_helpers.py
--- a/src/batdetect2/detector/models.py
+++ b/src/batdetect2/detector/models.py
--- a/src/batdetect2/detector/parameters.py
+++ b/src/batdetect2/detector/parameters.py
--- a/src/batdetect2/detector/post_process.py
+++ b/src/batdetect2/detector/post_process.py
--- a/src/batdetect2/evaluate/init.py
+++ b/src/batdetect2/evaluate/init.py
--- a/src/batdetect2/evaluate/evaluate.py
+++ b/src/batdetect2/evaluate/evaluate.py
@ -1,4 +1,3 @@
 import numpy as np
 from sklearn.metrics import auc, roc_curve
--- a/src/batdetect2/evaluate/legacy/evaluate_models.py
+++ b/src/batdetect2/evaluate/legacy/evaluate_models.py
--- a/src/batdetect2/evaluate/legacy/readme.md
+++ b/src/batdetect2/evaluate/legacy/readme.md
--- a/src/batdetect2/evaluate/match.py
+++ b/src/batdetect2/evaluate/match.py
@ -40,7 +40,7 @@ def match_sound_events_and_raw_predictions(
        gt_uuid = target.uuid if target is not None else None
        gt_det = target is not None
-        gt_class = targets.encode(target) if target is not None else None
+        gt_class = targets.encode_class(target) if target is not None else None
        pred_score = float(prediction.detection_score) if prediction else 0
--- a/src/batdetect2/evaluate/metrics.py
+++ b/src/batdetect2/evaluate/metrics.py
--- a/src/batdetect2/evaluate/types.py
+++ b/src/batdetect2/evaluate/types.py
--- a/src/batdetect2/finetune/init.py
+++ b/src/batdetect2/finetune/init.py
--- a/src/batdetect2/finetune/finetune_model.py
+++ b/src/batdetect2/finetune/finetune_model.py
--- a/src/batdetect2/finetune/prep_data_finetune.py
+++ b/src/batdetect2/finetune/prep_data_finetune.py
--- a/src/batdetect2/finetune/readme.md
+++ b/src/batdetect2/finetune/readme.md
--- a/src/batdetect2/models/init.py
+++ b/src/batdetect2/models/init.py
--- a/src/batdetect2/models/backbones.py
+++ b/src/batdetect2/models/backbones.py
--- a/src/batdetect2/models/blocks.py
+++ b/src/batdetect2/models/blocks.py
--- a/src/batdetect2/models/bottleneck.py
+++ b/src/batdetect2/models/bottleneck.py
--- a/src/batdetect2/models/checkpoints/Net2DFast_UK_same.pth.tar
+++ b/src/batdetect2/models/checkpoints/Net2DFast_UK_same.pth.tar
--- a/src/batdetect2/models/checkpoints/readme.md
+++ b/src/batdetect2/models/checkpoints/readme.md
--- a/src/batdetect2/models/decoder.py
+++ b/src/batdetect2/models/decoder.py
--- a/src/batdetect2/models/detectors.py
+++ b/src/batdetect2/models/detectors.py
--- a/src/batdetect2/models/encoder.py
+++ b/src/batdetect2/models/encoder.py
--- a/src/batdetect2/models/heads.py
+++ b/src/batdetect2/models/heads.py
--- a/src/batdetect2/models/types.py
+++ b/src/batdetect2/models/types.py
--- a/src/batdetect2/plot.py
+++ b/src/batdetect2/plot.py
--- a/src/batdetect2/plotting/init.py
+++ b/src/batdetect2/plotting/init.py
--- a/src/batdetect2/plotting/common.py
+++ b/src/batdetect2/plotting/common.py
--- a/src/batdetect2/plotting/heatmaps.py
+++ b/src/batdetect2/plotting/heatmaps.py
--- a/src/batdetect2/postprocess/init.py
+++ b/src/batdetect2/postprocess/init.py
@ -7,8 +7,8 @@ containing detected sound events with associated class tags and geometry.
 The pipeline involves several configurable steps, implemented in submodules:
 1.  Non-Maximum Suppression (`.nms`): Isolates distinct detection peaks.
-2.  Coordinate Remapping (`.remapping`): Adds real-world time/frequency
+2.  Coordinate Remapping (`.remapping`): Adds time/frequency coordinates to raw
-    coordinates to raw model output arrays.
+    model output arrays.
 3.  Detection Extraction (`.detection`): Identifies candidate detection points
    (location and score) based on thresholds and score ranking (top-k).
 4.  Data Extraction (`.extraction`): Gathers associated model outputs (size,
@ -526,7 +526,7 @@ class Postprocessor(PostprocessorProtocol):
        return [
            convert_xr_dataset_to_raw_prediction(
                dataset,
-                self.targets.recover_roi,
+                self.targets.decode_roi,
            )
            for dataset in detection_datasets
        ]
@ -558,7 +558,7 @@ class Postprocessor(PostprocessorProtocol):
            convert_raw_predictions_to_clip_prediction(
                prediction,
                clip,
-                sound_event_decoder=self.targets.decode,
+                sound_event_decoder=self.targets.decode_class,
                generic_class_tags=self.targets.generic_class_tags,
                classification_threshold=self.config.classification_threshold,
            )
--- a/src/batdetect2/postprocess/decoding.py
+++ b/src/batdetect2/postprocess/decoding.py
@ -4,8 +4,7 @@ This module handles the final stages of the BatDetect2 postprocessing pipeline.
 It takes the structured detection data extracted by the `extraction` module
 (typically an `xarray.Dataset` containing scores, positions, predicted sizes,
 class probabilities, and features for each detection point) and converts it
-into meaningful, standardized prediction objects based on the `soundevent` data
+into standardized prediction objects based on the `soundevent` data model.
 model.
 The process involves:
 1.  Converting the `xarray.Dataset` into a list of intermediate `RawPrediction`
@ -33,7 +32,7 @@ import numpy as np
 import xarray as xr
 from soundevent import data
-from batdetect2.postprocess.types import GeometryBuilder, RawPrediction
+from batdetect2.postprocess.types import GeometryDecoder, RawPrediction
 from batdetect2.targets.classes import SoundEventDecoder
 from batdetect2.utils.arrays import iterate_over_array
@ -55,7 +54,7 @@ decoding.
 def convert_xr_dataset_to_raw_prediction(
    detection_dataset: xr.Dataset,
-    geometry_builder: GeometryBuilder,
+    geometry_decoder: GeometryDecoder,
 ) -> List[RawPrediction]:
    """Convert an xarray.Dataset of detections to RawPrediction objects.
@ -72,7 +71,7 @@ def convert_xr_dataset_to_raw_prediction(
        output by `extract_detection_xr_dataset`. Expected variables include
        'scores' (with time/freq coords), 'dimensions', 'classes', 'features'.
        Must have a 'detection' dimension.
-    geometry_builder : GeometryBuilder
+    geometry_decoder : GeometryDecoder
        A function that takes a position tuple `(time, freq)` and a NumPy array
        of dimensions, and returns the corresponding reconstructed
        `soundevent.data.Geometry`.
@ -96,14 +95,20 @@ def convert_xr_dataset_to_raw_prediction(
    for det_num in range(detection_dataset.sizes["detection"]):
        det_info = detection_dataset.sel(detection=det_num)
-        geom = geometry_builder(
+        # TODO: Maybe clean this up
        highest_scoring_class = det_info.coords["category"][
            det_info["classes"].argmax()
        ].item()
        geom = geometry_decoder(
            (det_info.time, det_info.frequency),
            det_info.dimensions,
            class_name=highest_scoring_class,
        )
        detections.append(
            RawPrediction(
-                detection_score=det_info.score,
+                detection_score=det_info.scores,
                geometry=geom,
                class_scores=det_info.classes,
                features=det_info.features,
--- a/src/batdetect2/postprocess/detection.py
+++ b/src/batdetect2/postprocess/detection.py
@ -1,6 +1,6 @@
 """Extracts candidate detection points from a model output heatmap.
-This module implements a specific step within the BatDetect2 postprocessing
+This module implements Step 3 within the BatDetect2 postprocessing
 pipeline. Its primary function is to identify potential sound event locations
 by finding peaks (local maxima or high-scoring points) in the detection heatmap
 produced by the neural network (usually after Non-Maximum Suppression and
--- a/src/batdetect2/postprocess/extraction.py
+++ b/src/batdetect2/postprocess/extraction.py
@ -1,9 +1,9 @@
 """Extracts associated data for detected points from model output arrays.
-This module implements a key step (Step 4) in the BatDetect2 postprocessing
+This module implements a Step 4 in the BatDetect2 postprocessing pipeline.
-pipeline. After candidate detection points (time, frequency, score) have been
+After candidate detection points (time, frequency, score) have been identified,
-identified, this module extracts the corresponding values from other raw model
+this module extracts the corresponding values from other raw model output
-output arrays, such as:
+arrays, such as:
 - Predicted bounding box sizes (width, height).
 - Class probability scores for each defined target class.
--- a/src/batdetect2/postprocess/nms.py
+++ b/src/batdetect2/postprocess/nms.py
--- a/src/batdetect2/postprocess/remapping.py
+++ b/src/batdetect2/postprocess/remapping.py
--- a/src/batdetect2/postprocess/types.py
+++ b/src/batdetect2/postprocess/types.py
@ -11,30 +11,37 @@ modularity and consistent interaction between different parts of the BatDetect2
 system that deal with model predictions.
 """
-from typing import Callable, List, NamedTuple, Protocol
+from typing import List, NamedTuple, Optional, Protocol
 import numpy as np
 import xarray as xr
 from soundevent import data
 from batdetect2.models.types import ModelOutput
 from batdetect2.targets.types import Position, Size
 __all__ = [
    "RawPrediction",
    "PostprocessorProtocol",
-    "GeometryBuilder",
+    "GeometryDecoder",
 ]
-GeometryBuilder = Callable[[tuple[float, float], np.ndarray], data.Geometry]
+# TODO: update the docstring
-"""Type alias for a function that recovers geometry from position and size.
+class GeometryDecoder(Protocol):
    """Type alias for a function that recovers geometry from position and size.
-This callable takes:
+    This callable takes:
-1.  A position tuple `(time, frequency)`.
+    1.  A position tuple `(time, frequency)`.
-2.  A NumPy array of size dimensions (e.g., `[width, height]`).
+    2.  A NumPy array of size dimensions (e.g., `[width, height]`).
-It should return the reconstructed `soundevent.data.Geometry` (typically a
+    3.  Optionally a class name of the highest scoring class. This is to accomodate
-`BoundingBox`).
+        different ways of decoding geometry that depend on the predicted class.
-"""
+    It should return the reconstructed `soundevent.data.Geometry` (typically a
    `BoundingBox`).
    """
    def __call__(
        self, position: Position, size: Size, class_name: Optional[str] = None
    ) -> data.Geometry: ...
 class RawPrediction(NamedTuple):
--- a/src/batdetect2/preprocess/init.py
+++ b/src/batdetect2/preprocess/init.py
--- a/src/batdetect2/preprocess/audio.py
+++ b/src/batdetect2/preprocess/audio.py
--- a/src/batdetect2/preprocess/spectrogram.py
+++ b/src/batdetect2/preprocess/spectrogram.py
--- a/src/batdetect2/preprocess/types.py
+++ b/src/batdetect2/preprocess/types.py
--- a/src/batdetect2/targets/init.py
+++ b/src/batdetect2/targets/init.py
@ -23,7 +23,7 @@ object is via the `build_targets` or `load_targets` functions.
 from typing import List, Optional
-import numpy as np
+from loguru import logger
 from pydantic import Field
 from soundevent import data
@ -50,7 +50,8 @@ from batdetect2.targets.filtering import (
    load_filter_from_config,
 )
 from batdetect2.targets.rois import (
-    ROIConfig,
+    AnchorBBoxMapperConfig,
    ROIMapperConfig,
    ROITargetMapper,
    build_roi_mapper,
 )
@ -59,11 +60,11 @@ from batdetect2.targets.terms import (
    TermInfo,
    TermRegistry,
    call_type,
    default_term_registry,
    get_tag_from_info,
    get_term_from_key,
    individual,
    register_term,
    term_registry,
 )
 from batdetect2.targets.transform import (
    DerivationRegistry,
@ -73,13 +74,13 @@ from batdetect2.targets.transform import (
    SoundEventTransformation,
    TransformConfig,
    build_transformation_from_config,
-    derivation_registry,
+    default_derivation_registry,
    get_derivation,
    load_transformation_config,
    load_transformation_from_config,
    register_derivation,
 )
-from batdetect2.targets.types import TargetProtocol
+from batdetect2.targets.types import Position, Size, TargetProtocol
 __all__ = [
    "ClassesConfig",
@ -88,7 +89,7 @@ __all__ = [
    "FilterConfig",
    "FilterRule",
    "MapValueRule",
-    "ROIConfig",
+    "AnchorBBoxMapperConfig",
    "ROITargetMapper",
    "ReplaceRule",
    "SoundEventDecoder",
@ -156,12 +157,12 @@ class TargetConfig(BaseConfig):
        omitted, default ROI mapping settings are used.
    """
-    filtering: Optional[FilterConfig] = None
+    filtering: FilterConfig = Field(default_factory=FilterConfig)
-    transforms: Optional[TransformConfig] = None
+    transforms: TransformConfig = Field(default_factory=TransformConfig)
    classes: ClassesConfig = Field(
        default_factory=lambda: DEFAULT_CLASSES_CONFIG
    )
-    roi: Optional[ROIConfig] = None
+    roi: ROIMapperConfig = Field(default_factory=AnchorBBoxMapperConfig)
 def load_target_config(
@ -240,6 +241,7 @@ class Targets(TargetProtocol):
        generic_class_tags: List[data.Tag],
        filter_fn: Optional[SoundEventFilter] = None,
        transform_fn: Optional[SoundEventTransformation] = None,
        roi_mapper_overrides: Optional[dict[str, ROITargetMapper]] = None,
    ):
        """Initialize the Targets object.
@ -272,6 +274,16 @@ class Targets(TargetProtocol):
        self._encode_fn = encode_fn
        self._decode_fn = decode_fn
        self._transform_fn = transform_fn
        self._roi_mapper_overrides = roi_mapper_overrides or {}
        for class_name in self._roi_mapper_overrides:
            if class_name not in self.class_names:
                # TODO: improve this warning
                logger.warning(
                    "The ROI mapper overrides contains a class ({class_name}) "
                    "not present in the class names.",
                    class_name=class_name,
                )
    def filter(self, sound_event: data.SoundEventAnnotation) -> bool:
        """Apply the configured filter to a sound event annotation.
@ -291,7 +303,9 @@ class Targets(TargetProtocol):
            return True
        return self._filter_fn(sound_event)
-    def encode(self, sound_event: data.SoundEventAnnotation) -> Optional[str]:
+    def encode_class(
        self, sound_event: data.SoundEventAnnotation
    ) -> Optional[str]:
        """Encode a sound event annotation to its target class name.
        Applies the configured class definition rules (including priority)
@ -312,7 +326,7 @@ class Targets(TargetProtocol):
        """
        return self._encode_fn(sound_event)
-    def decode(self, class_label: str) -> List[data.Tag]:
+    def decode_class(self, class_label: str) -> List[data.Tag]:
        """Decode a predicted class name back into representative tags.
        Uses the configured mapping (based on `TargetClass.output_tags` or
@ -352,9 +366,9 @@ class Targets(TargetProtocol):
            return self._transform_fn(sound_event)
        return sound_event
-    def get_position(
+    def encode_roi(
        self, sound_event: data.SoundEventAnnotation
-    ) -> tuple[float, float]:
+    ) -> tuple[Position, Size]:
        """Extract the target reference position from the annotation's roi.
        Delegates to the internal ROI mapper's `get_roi_position` method.
@ -374,50 +388,20 @@ class Targets(TargetProtocol):
        ValueError
            If the annotation lacks geometry.
        """
-        geom = sound_event.sound_event.geometry
+        class_name = self.encode_class(sound_event)
-        if geom is None:
+        if class_name in self._roi_mapper_overrides:
-            raise ValueError(
+            return self._roi_mapper_overrides[class_name].encode(
-                "Sound event has no geometry, cannot get its position."
+                sound_event.sound_event
            )
-        return self._roi_mapper.get_roi_position(geom)
+        return self._roi_mapper.encode(sound_event.sound_event)
-    def get_size(self, sound_event: data.SoundEventAnnotation) -> np.ndarray:
+    def decode_roi(
        """Calculate the target size dimensions from the annotation's geometry.
        Delegates to the internal ROI mapper's `get_roi_size` method, which
        applies configured scaling factors.
        Parameters
        ----------
        sound_event : data.SoundEventAnnotation
            The annotation containing the geometry (ROI).
        Returns
        -------
        np.ndarray
            NumPy array containing the size dimensions, matching the
            order in `self.dimension_names` (e.g., `[width, height]`).
        Raises
        ------
        ValueError
            If the annotation lacks geometry.
        """
        geom = sound_event.sound_event.geometry
        if geom is None:
            raise ValueError(
                "Sound event has no geometry, cannot get its size."
            )
        return self._roi_mapper.get_roi_size(geom)
    def recover_roi(
        self,
-        pos: tuple[float, float],
+        position: Position,
-        dims: np.ndarray,
+        size: Size,
        class_name: Optional[str] = None,
    ) -> data.Geometry:
        """Recover an approximate geometric ROI from a position and dimensions.
@ -438,7 +422,13 @@ class Targets(TargetProtocol):
        data.Geometry
            The reconstructed geometry (typically `BoundingBox`).
        """
-        return self._roi_mapper.recover_roi(pos, dims)
+        if class_name in self._roi_mapper_overrides:
            return self._roi_mapper_overrides[class_name].decode(
                position,
                size,
            )
        return self._roi_mapper.decode(position, size)
 DEFAULT_CLASSES = [
@ -493,10 +483,12 @@ DEFAULT_CLASSES = [
    TargetClass(
        tags=[TagInfo(value="Nyctalus leisleri")],
        name="nyclei",
        roi=AnchorBBoxMapperConfig(anchor="top-left"),
    ),
    TargetClass(
        tags=[TagInfo(value="Rhinolophus ferrumequinum")],
        name="rhifer",
        roi=AnchorBBoxMapperConfig(anchor="top-left"),
    ),
    TargetClass(
        tags=[TagInfo(value="Plecotus auritus")],
@ -537,13 +529,14 @@ DEFAULT_TARGET_CONFIG: TargetConfig = TargetConfig(
        ]
    ),
    classes=DEFAULT_CLASSES_CONFIG,
    roi=AnchorBBoxMapperConfig(),
 )
 def build_targets(
    config: Optional[TargetConfig] = None,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
-    derivation_registry: DerivationRegistry = derivation_registry,
+    derivation_registry: DerivationRegistry = default_derivation_registry,
 ) -> Targets:
    """Build a Targets object from a loaded TargetConfig.
@ -606,12 +599,17 @@ def build_targets(
        if config.transforms
        else None
    )
-    roi_mapper = build_roi_mapper(config.roi or ROIConfig())
+    roi_mapper = build_roi_mapper(config.roi)
    class_names = get_class_names_from_config(config.classes)
    generic_class_tags = build_generic_class_tags(
        config.classes,
        term_registry=term_registry,
    )
    roi_overrides = {
        class_config.name: build_roi_mapper(class_config.roi)
        for class_config in config.classes.classes
        if class_config.roi is not None
    }
    return Targets(
        filter_fn=filter_fn,
@ -621,14 +619,15 @@ def build_targets(
        roi_mapper=roi_mapper,
        generic_class_tags=generic_class_tags,
        transform_fn=transform_fn,
        roi_mapper_overrides=roi_overrides,
    )
 def load_targets(
    config_path: data.PathLike,
    field: Optional[str] = None,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
-    derivation_registry: DerivationRegistry = derivation_registry,
+    derivation_registry: DerivationRegistry = default_derivation_registry,
 ) -> Targets:
    """Load a Targets object directly from a configuration file.
--- a/src/batdetect2/targets/classes.py
+++ b/src/batdetect2/targets/classes.py
@ -6,29 +6,27 @@ from pydantic import Field, field_validator
 from soundevent import data
 from batdetect2.configs import BaseConfig, load_config
 from batdetect2.targets.rois import ROIMapperConfig
 from batdetect2.targets.terms import (
    GENERIC_CLASS_KEY,
    TagInfo,
    TermRegistry,
    default_term_registry,
    get_tag_from_info,
    term_registry,
 )
 __all__ = [
    "SoundEventEncoder",
    "SoundEventDecoder",
    "TargetClass",
    "ClassesConfig",
    "load_classes_config",
    "load_encoder_from_config",
    "load_decoder_from_config",
    "build_sound_event_encoder",
    "build_sound_event_decoder",
    "build_generic_class_tags",
    "get_class_names_from_config",
    "DEFAULT_SPECIES_LIST",
    "build_generic_class_tags",
    "build_sound_event_decoder",
    "build_sound_event_encoder",
    "get_class_names_from_config",
    "load_classes_config",
    "load_decoder_from_config",
    "load_encoder_from_config",
 ]
 SoundEventEncoder = Callable[[data.SoundEventAnnotation], Optional[str]]
 """Type alias for a sound event class encoder function.
@ -113,6 +111,7 @@ class TargetClass(BaseConfig):
    tags: List[TagInfo] = Field(min_length=1)
    match_type: Literal["all", "any"] = Field(default="all")
    output_tags: Optional[List[TagInfo]] = None
    roi: Optional[ROIMapperConfig] = None
 def _get_default_classes() -> List[TargetClass]:
@ -235,7 +234,7 @@ class ClassesConfig(BaseConfig):
        return v
-def _is_target_class(
+def is_target_class(
    sound_event_annotation: data.SoundEventAnnotation,
    tags: Set[data.Tag],
    match_all: bool = True,
@ -316,7 +315,7 @@ def _encode_with_multiple_classifiers(
 def build_sound_event_encoder(
    config: ClassesConfig,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
 ) -> SoundEventEncoder:
    """Build a sound event encoder function from the classes configuration.
@ -350,7 +349,7 @@ def build_sound_event_encoder(
        (
            class_info.name,
            partial(
-                _is_target_class,
+                is_target_class,
                tags={
                    get_tag_from_info(tag_info, term_registry=term_registry)
                    for tag_info in class_info.tags
@ -410,7 +409,7 @@ def _decode_class(
 def build_sound_event_decoder(
    config: ClassesConfig,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
    raise_on_unmapped: bool = False,
 ) -> SoundEventDecoder:
    """Build a sound event decoder function from the classes configuration.
@ -465,7 +464,7 @@ def build_sound_event_decoder(
 def build_generic_class_tags(
    config: ClassesConfig,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
 ) -> List[data.Tag]:
    """Extract and build the list of tags for the generic class from config.
@ -530,7 +529,7 @@ def load_classes_config(
 def load_encoder_from_config(
    path: data.PathLike,
    field: Optional[str] = None,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
 ) -> SoundEventEncoder:
    """Load a class encoder function directly from a configuration file.
@ -571,7 +570,7 @@ def load_encoder_from_config(
 def load_decoder_from_config(
    path: data.PathLike,
    field: Optional[str] = None,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
    raise_on_unmapped: bool = False,
 ) -> SoundEventDecoder:
    """Load a class decoder function directly from a configuration file.
--- a/src/batdetect2/targets/filtering.py
+++ b/src/batdetect2/targets/filtering.py
@ -10,7 +10,7 @@ from batdetect2.targets.terms import (
    TagInfo,
    TermRegistry,
    get_tag_from_info,
-    term_registry,
+    default_term_registry,
 )
 __all__ = [
@ -156,7 +156,7 @@ def equal_tags(
 def build_filter_from_rule(
    rule: FilterRule,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
 ) -> SoundEventFilter:
    """Creates a callable filter function from a single FilterRule.
@ -243,7 +243,7 @@ class FilterConfig(BaseConfig):
 def build_sound_event_filter(
    config: FilterConfig,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
 ) -> SoundEventFilter:
    """Builds a merged filter function from a FilterConfig object.
@ -291,7 +291,7 @@ def load_filter_config(
 def load_filter_from_config(
    path: data.PathLike,
    field: Optional[str] = None,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
 ) -> SoundEventFilter:
    """Loads filter configuration from a file and builds the filter function.
--- a/src/batdetect2/targets/rois.py
+++ b/src/batdetect2/targets/rois.py
@ -0,0 +1,684 @@
 """Handles mapping between geometric ROIs and target representations.
 This module defines a standardized interface (`ROITargetMapper`) for converting
 a sound event's Region of Interest (ROI) into a target representation suitable
 for machine learning models, and for decoding model outputs back into geometric
 ROIs.
 The core operations are:
 1.  **Encoding**: A `soundevent.data.SoundEvent` is mapped to a reference
    `Position` (time, frequency) and a `Size` array. The method for
    determining the position and size varies by the mapper implementation
    (e.g., using a bounding box anchor or the point of peak energy).
 2.  **Decoding**: A `Position` and `Size` array are mapped back to an
    approximate `soundevent.data.Geometry` (typically a `BoundingBox`).
 This logic is encapsulated within specific mapper classes. Configuration for
 each mapper (e.g., anchor point, scaling factors) is managed by a corresponding
 Pydantic config object. The `ROIMapperConfig` type allows for flexibly
 selecting and configuring the desired mapper. This module separates the
 *geometric* aspect of target definition from *semantic* classification.
 """
 from typing import Annotated, List, Literal, Optional, Protocol, Tuple, Union
 import numpy as np
 from pydantic import Field
 from soundevent import data
 from batdetect2.configs import BaseConfig
 from batdetect2.preprocess import PreprocessingConfig, build_preprocessor
 from batdetect2.preprocess.types import PreprocessorProtocol
 from batdetect2.targets.types import Position, Size
 __all__ = [
    "Anchor",
    "AnchorBBoxMapper",
    "AnchorBBoxMapperConfig",
    "DEFAULT_ANCHOR",
    "DEFAULT_FREQUENCY_SCALE",
    "DEFAULT_TIME_SCALE",
    "PeakEnergyBBoxMapper",
    "PeakEnergyBBoxMapperConfig",
    "ROIMapperConfig",
    "ROITargetMapper",
    "SIZE_HEIGHT",
    "SIZE_ORDER",
    "SIZE_WIDTH",
    "build_roi_mapper",
 ]
 Anchor = Literal[
    "bottom-left",
    "bottom-right",
    "top-left",
    "top-right",
    "center-left",
    "center-right",
    "top-center",
    "bottom-center",
    "center",
    "centroid",
    "point_on_surface",
 ]
 SIZE_WIDTH = "width"
 """Standard name for the width/time dimension component ('width')."""
 SIZE_HEIGHT = "height"
 """Standard name for the height/frequency dimension component ('height')."""
 SIZE_ORDER = (SIZE_WIDTH, SIZE_HEIGHT)
 """Standard order of dimensions for size arrays ([width, height])."""
 DEFAULT_TIME_SCALE = 1000.0
 """Default scaling factor for time duration."""
 DEFAULT_FREQUENCY_SCALE = 1 / 859.375
 """Default scaling factor for frequency bandwidth."""
 DEFAULT_ANCHOR = "bottom-left"
 """Default reference position within the geometry ('bottom-left' corner)."""
 class ROITargetMapper(Protocol):
    """Protocol defining the interface for ROI-to-target mapping.
    Specifies the `encode` and `decode` methods required for converting a
    `soundevent.data.SoundEvent` into a target representation (a reference
    position and a size vector) and for recovering an approximate ROI from that
    representation.
    Attributes
    ----------
    dimension_names : List[str]
        A list containing the names of the dimensions in the `Size` array
        returned by `encode` and expected by `decode`.
    """
    dimension_names: List[str]
    def encode(self, sound_event: data.SoundEvent) -> tuple[Position, Size]:
        """Encode a SoundEvent's geometry into a position and size.
        Parameters
        ----------
        sound_event : data.SoundEvent
            The input sound event, which must have a geometry attribute.
        Returns
        -------
        Tuple[Position, Size]
            A tuple containing:
            - The reference position as (time, frequency) coordinates.
            - A NumPy array with the calculated size dimensions.
        Raises
        ------
        ValueError
            If the sound event does not have a geometry.
        """
        ...
    def decode(self, position: Position, size: Size) -> data.Geometry:
        """Decode a position and size back into a geometric ROI.
        Performs the inverse mapping: takes a reference position and size
        dimensions and reconstructs a geometric representation.
        Parameters
        ----------
        position : Position
            The reference position (time, frequency).
        size : Size
            NumPy array containing the size dimensions, matching the order
            and meaning specified by `dimension_names`.
        Returns
        -------
        soundevent.data.Geometry
            The reconstructed geometry, typically a `BoundingBox`.
        Raises
        ------
        ValueError
            If the `size` array has an unexpected shape or if reconstruction
            fails.
        """
        ...
 class AnchorBBoxMapperConfig(BaseConfig):
    """Configuration for `AnchorBBoxMapper`.
    Defines parameters for converting ROIs into targets using a fixed anchor
    point on the bounding box.
    Attributes
    ----------
    name : Literal["anchor_bbox"]
        The unique identifier for this mapper type.
    anchor : Anchor
        Specifies the anchor point within the bounding box to use as the
        target's reference position (e.g., "center", "bottom-left").
    time_scale : float
        Scaling factor applied to the time duration (width) of the ROI.
    frequency_scale : float
        Scaling factor applied to the frequency bandwidth (height) of the ROI.
    """
    name: Literal["anchor_bbox"] = "anchor_bbox"
    anchor: Anchor = DEFAULT_ANCHOR
    time_scale: float = DEFAULT_TIME_SCALE
    frequency_scale: float = DEFAULT_FREQUENCY_SCALE
 class AnchorBBoxMapper(ROITargetMapper):
    """Maps ROIs using a bounding box anchor point and width/height.
    This class implements the `ROITargetMapper` protocol for `BoundingBox`
    geometries.
    **Encoding**: The `position` is a fixed anchor point on the bounding box
    (e.g., "bottom-left"). The `size` is a 2-element array containing the
    scaled width and height of the box.
    **Decoding**: Reconstructs a `BoundingBox` from an anchor point and
    scaled width/height.
    Attributes
    ----------
    dimension_names : List[str]
        The output dimension names: `['width', 'height']`.
    anchor : Anchor
        The configured anchor point type (e.g., "center", "bottom-left").
    time_scale : float
        The scaling factor for the time dimension (width).
    frequency_scale : float
        The scaling factor for the frequency dimension (height).
    """
    dimension_names = [SIZE_WIDTH, SIZE_HEIGHT]
    def __init__(
        self,
        anchor: Anchor = DEFAULT_ANCHOR,
        time_scale: float = DEFAULT_TIME_SCALE,
        frequency_scale: float = DEFAULT_FREQUENCY_SCALE,
    ):
        """Initialize the BBoxEncoder.
        Parameters
        ----------
        anchor : Anchor
            Reference point type within the bounding box.
        time_scale : float
            Scaling factor for time duration (width).
        frequency_scale : float
            Scaling factor for frequency bandwidth (height).
        """
        self.anchor: Anchor = anchor
        self.time_scale = time_scale
        self.frequency_scale = frequency_scale
    def encode(self, sound_event: data.SoundEvent) -> Tuple[Position, Size]:
        """Encode a SoundEvent into an anchor position and scaled box size.
        The position is determined by the configured anchor on the sound
        event's bounding box. The size is the scaled width and height.
        Parameters
        ----------
        sound_event : data.SoundEvent
            The input sound event with a geometry.
        Returns
        -------
        Tuple[Position, Size]
            A tuple of (anchor_position, [scaled_width, scaled_height]).
        """
        from soundevent import geometry
        geom = sound_event.geometry
        if geom is None:
            raise ValueError(
                "Cannot encode the geometry of a sound event without geometry."
                f" Sound event: {sound_event}"
            )
        position = geometry.get_geometry_point(geom, position=self.anchor)
        start_time, low_freq, end_time, high_freq = geometry.compute_bounds(
            geom
        )
        size = np.array(
            [
                (end_time - start_time) * self.time_scale,
                (high_freq - low_freq) * self.frequency_scale,
            ]
        )
        return position, size
    def decode(
        self,
        position: Position,
        size: Size,
    ) -> data.Geometry:
        """Recover a BoundingBox from an anchor position and scaled size.
        Un-scales the input dimensions and reconstructs a
        `soundevent.data.BoundingBox` relative to the given anchor position.
        Parameters
        ----------
        position : Position
            Reference anchor position (time, frequency).
        size : Size
            NumPy array containing the scaled [width, height].
        Returns
        -------
        data.BoundingBox
            The reconstructed bounding box.
        Raises
        ------
        ValueError
            If `size` does not have the expected shape (length 2).
        """
        if size.ndim != 1 or size.shape[0] != 2:
            raise ValueError(
                "Dimension array does not have the expected shape. "
                f"({size.shape = }) != ([2])"
            )
        width, height = size
        return _build_bounding_box(
            position,
            duration=float(width) / self.time_scale,
            bandwidth=float(height) / self.frequency_scale,
            anchor=self.anchor,
        )
 class PeakEnergyBBoxMapperConfig(BaseConfig):
    """Configuration for `PeakEnergyBBoxMapper`.
    Attributes
    ----------
    name : Literal["peak_energy_bbox"]
        The unique identifier for this mapper type.
    preprocessing : PreprocessingConfig
        Configuration for the spectrogram preprocessor needed to find the
        peak energy.
    loading_buffer : float
        Seconds to add to each side of the ROI when loading audio to ensure
        the peak is captured accurately, avoiding boundary effects.
    time_scale : float
        Scaling factor applied to the time dimensions.
    frequency_scale : float
        Scaling factor applied to the frequency dimensions.
    """
    name: Literal["peak_energy_bbox"] = "peak_energy_bbox"
    preprocessing: PreprocessingConfig = Field(
        default_factory=PreprocessingConfig
    )
    loading_buffer: float = 0.01
    time_scale: float = DEFAULT_TIME_SCALE
    frequency_scale: float = DEFAULT_FREQUENCY_SCALE
 class PeakEnergyBBoxMapper(ROITargetMapper):
    """Maps ROIs using the peak energy point and distances to edges.
    This class implements the `ROITargetMapper` protocol.
    **Encoding**: The `position` is the (time, frequency) coordinate of the
    point with the highest energy within the sound event's bounding box. The
    `size` is a 4-element array representing the scaled distances from this
    peak energy point to the left, bottom, right, and top edges of the box.
    **Decoding**: Reconstructs a `BoundingBox` by adding/subtracting the
    un-scaled distances from the peak energy point.
    Attributes
    ----------
    dimension_names : List[str]
        The output dimension names: `['left', 'bottom', 'right', 'top']`.
    preprocessor : PreprocessorProtocol
        The spectrogram preprocessor instance.
    time_scale : float
        The scaling factor for time-based distances.
    frequency_scale : float
        The scaling factor for frequency-based distances.
    loading_buffer : float
        The buffer used for loading audio around the ROI.
    """
    dimension_names = ["left", "bottom", "right", "top"]
    def __init__(
        self,
        preprocessor: PreprocessorProtocol,
        time_scale: float = DEFAULT_TIME_SCALE,
        frequency_scale: float = DEFAULT_FREQUENCY_SCALE,
        loading_buffer: float = 0.01,
    ):
        """Initialize the PeakEnergyBBoxMapper.
        Parameters
        ----------
        preprocessor : PreprocessorProtocol
            An initialized preprocessor for generating spectrograms.
        time_scale : float
            Scaling factor for time dimensions (left, right distances).
        frequency_scale : float
            Scaling factor for frequency dimensions (bottom, top distances).
        loading_buffer : float
            Buffer in seconds to add when loading audio clips.
        """
        self.preprocessor = preprocessor
        self.time_scale = time_scale
        self.frequency_scale = frequency_scale
        self.loading_buffer = loading_buffer
    def encode(
        self,
        sound_event: data.SoundEvent,
    ) -> tuple[Position, Size]:
        """Encode a SoundEvent into a peak energy position and edge distances.
        Finds the peak energy coordinates within the event's bounding box
        and calculates the scaled distances from this point to the box edges.
        Parameters
        ----------
        sound_event : data.SoundEvent
            The input sound event with a geometry and associated recording.
        Returns
        -------
        Tuple[Position, Size]
            A tuple of (peak_position, [l, b, r, t] distances).
        """
        from soundevent import geometry
        geom = sound_event.geometry
        if geom is None:
            raise ValueError(
                "Cannot encode the geometry of a sound event without geometry."
                f" Sound event: {sound_event}"
            )
        start_time, low_freq, end_time, high_freq = geometry.compute_bounds(
            geom
        )
        time, freq = get_peak_energy_coordinates(
            recording=sound_event.recording,
            preprocessor=self.preprocessor,
            start_time=start_time,
            end_time=end_time,
            low_freq=low_freq,
            high_freq=high_freq,
            loading_buffer=self.loading_buffer,
        )
        size = np.array(
            [
                (time - start_time) * self.time_scale,
                (freq - low_freq) * self.frequency_scale,
                (end_time - time) * self.time_scale,
                (high_freq - freq) * self.frequency_scale,
            ]
        )
        return (time, freq), size
    def decode(self, position: Position, size: Size) -> data.Geometry:
        """Recover a BoundingBox from a peak position and edge distances.
        Parameters
        ----------
        position : Position
            The reference peak energy position (time, frequency).
        size : Size
            NumPy array with scaled distances [left, bottom, right, top].
        Returns
        -------
        data.BoundingBox
            The reconstructed bounding box.
        """
        time, freq = position
        left, bottom, right, top = size
        return data.BoundingBox(
            coordinates=[
                time - max(0, float(left)) / self.time_scale,
                freq - max(0, float(bottom)) / self.frequency_scale,
                time + max(0, float(right)) / self.time_scale,
                freq + max(0, float(top)) / self.frequency_scale,
            ]
        )
 ROIMapperConfig = Annotated[
    Union[AnchorBBoxMapperConfig, PeakEnergyBBoxMapperConfig],
    Field(discriminator="name"),
 ]
 """A discriminated union of all supported ROI mapper configurations.
 This type allows for selecting and configuring different `ROITargetMapper`
 implementations by using the `name` field as a discriminator.
 """
 def build_roi_mapper(
    config: Optional[ROIMapperConfig] = None,
 ) -> ROITargetMapper:
    """Factory function to create an ROITargetMapper from a config object.
    Parameters
    ----------
    config : ROIMapperConfig
        A configuration object specifying the mapper type and its parameters.
    Returns
    -------
    ROITargetMapper
        An initialized ROI mapper instance.
    Raises
    ------
    NotImplementedError
        If the `name` in the config does not correspond to a known mapper.
    """
    config = config or AnchorBBoxMapperConfig()
    if config.name == "anchor_bbox":
        return AnchorBBoxMapper(
            anchor=config.anchor,
            time_scale=config.time_scale,
            frequency_scale=config.frequency_scale,
        )
    if config.name == "peak_energy_bbox":
        preprocessor = build_preprocessor(config.preprocessing)
        return PeakEnergyBBoxMapper(
            preprocessor=preprocessor,
            time_scale=config.time_scale,
            frequency_scale=config.frequency_scale,
            loading_buffer=config.loading_buffer,
        )
    raise NotImplementedError(
        f"No ROI mapper of name '{config.name}' is implemented"
    )
 VALID_ANCHORS = [
    "bottom-left",
    "bottom-right",
    "top-left",
    "top-right",
    "center-left",
    "center-right",
    "top-center",
    "bottom-center",
    "center",
    "centroid",
    "point_on_surface",
 ]
 def _build_bounding_box(
    pos: tuple[float, float],
    duration: float,
    bandwidth: float,
    anchor: Anchor = DEFAULT_ANCHOR,
 ) -> data.BoundingBox:
    """Construct a BoundingBox from a reference point, size, and position type.
    Internal helper for `BBoxEncoder.recover_roi`. Calculates the box
    coordinates [start_time, low_freq, end_time, high_freq] based on where
    the input `pos` (time, freq) is located relative to the box (e.g.,
    center, corner).
    Parameters
    ----------
    pos : Tuple[float, float]
        Reference position (time, frequency).
    duration : float
        The required *unscaled* duration (width) of the bounding box.
    bandwidth : float
        The required *unscaled* frequency bandwidth (height) of the bounding
        box.
    anchor : Anchor
        Specifies which part of the bounding box the input `pos` corresponds to.
    Returns
    -------
    data.BoundingBox
        The constructed bounding box object.
    Raises
    ------
    ValueError
        If `anchor` is not a recognized value or format.
    """
    time, freq = map(float, pos)
    duration = max(0, duration)
    bandwidth = max(0, bandwidth)
    if anchor in ["center", "centroid", "point_on_surface"]:
        return data.BoundingBox(
            coordinates=[
                max(time - duration / 2, 0),
                max(freq - bandwidth / 2, 0),
                max(time + duration / 2, 0),
                max(freq + bandwidth / 2, 0),
            ]
        )
    if anchor not in VALID_ANCHORS:
        raise ValueError(
            f"Invalid anchor: {anchor}. Valid options are: {VALID_ANCHORS}"
        )
    y, x = anchor.split("-")
    start_time = {
        "left": time,
        "center": time - duration / 2,
        "right": time - duration,
    }[x]
    low_freq = {
        "bottom": freq,
        "center": freq - bandwidth / 2,
        "top": freq - bandwidth,
    }[y]
    return data.BoundingBox(
        coordinates=[
            max(0, start_time),
            max(0, low_freq),
            max(0, start_time + duration),
            max(0, low_freq + bandwidth),
        ]
    )
 def get_peak_energy_coordinates(
    recording: data.Recording,
    preprocessor: PreprocessorProtocol,
    start_time: float = 0,
    end_time: Optional[float] = None,
    low_freq: float = 0,
    high_freq: Optional[float] = None,
    loading_buffer: float = 0.05,
 ) -> Position:
    """Find the coordinates of the highest energy point in a spectrogram.
    Generates a spectrogram for a specified time-frequency region of a
    recording and returns the (time, frequency) coordinates of the pixel with
    the maximum value.
    Parameters
    ----------
    recording : data.Recording
        The recording to analyze.
    preprocessor : PreprocessorProtocol
        The processor to convert audio to a spectrogram.
    start_time : float, default=0
        The start time of the region of interest.
    end_time : float, optional
        The end time of the region of interest. Defaults to recording duration.
    low_freq : float, default=0
        The low frequency of the region of interest.
    high_freq : float, optional
        The high frequency of the region of interest. Defaults to Nyquist.
    loading_buffer : float, default=0.05
        Buffer in seconds to add around the time range when loading the clip
        to mitigate border effects from transformations like STFT.
    Returns
    -------
    Position
        A (time, frequency) tuple for the peak energy location.
    """
    if end_time is None:
        end_time = recording.duration
    end_time = min(end_time, recording.duration)
    if high_freq is None:
        high_freq = recording.samplerate / 2
    clip_start = max(0, start_time - loading_buffer)
    clip_end = min(recording.duration, end_time + loading_buffer)
    clip = data.Clip(
        recording=recording,
        start_time=clip_start,
        end_time=clip_end,
    )
    spec = preprocessor.preprocess_clip(clip)
    low_freq = max(low_freq, preprocessor.min_freq)
    high_freq = min(high_freq, preprocessor.max_freq)
    selection = spec.sel(
        time=slice(start_time, end_time),
        frequency=slice(low_freq, high_freq),
    )
    index = selection.argmax(dim=["time", "frequency"])
    point = selection.isel(index)  # type: ignore
    peak_time: float = point.time.item()
    peak_freq: float = point.frequency.item()
    return peak_time, peak_freq
--- a/src/batdetect2/targets/terms.py
+++ b/src/batdetect2/targets/terms.py
@ -230,11 +230,12 @@ class TermRegistry(Mapping[str, data.Term]):
        del self._terms[key]
-term_registry = TermRegistry(
+default_term_registry = TermRegistry(
    terms=dict(
        [
            *getmembers(terms, lambda x: isinstance(x, data.Term)),
            ("event", call_type),
            ("species", terms.scientific_name),
            ("individual", individual),
            ("data_source", data_source),
            (GENERIC_CLASS_KEY, generic_class),
@ -252,7 +253,7 @@ is explicitly passed.
 def get_term_from_key(
    key: str,
-    term_registry: TermRegistry = term_registry,
+    term_registry: Optional[TermRegistry] = None,
 ) -> data.Term:
    """Convenience function to retrieve a term by key from a registry.
@ -277,10 +278,13 @@ def get_term_from_key(
    KeyError
        If the key is not found in the specified registry.
    """
    term_registry = term_registry or default_term_registry
    return term_registry.get_term(key)
-def get_term_keys(term_registry: TermRegistry = term_registry) -> List[str]:
+def get_term_keys(
    term_registry: TermRegistry = default_term_registry,
 ) -> List[str]:
    """Convenience function to get all registered keys from a registry.
    Uses the global default registry unless a specific `term_registry`
@ -299,7 +303,9 @@ def get_term_keys(term_registry: TermRegistry = term_registry) -> List[str]:
    return term_registry.get_keys()
-def get_terms(term_registry: TermRegistry = term_registry) -> List[data.Term]:
+def get_terms(
    term_registry: TermRegistry = default_term_registry,
 ) -> List[data.Term]:
    """Convenience function to get all registered terms from a registry.
    Uses the global default registry unless a specific `term_registry`
@ -342,7 +348,7 @@ class TagInfo(BaseModel):
 def get_tag_from_info(
    tag_info: TagInfo,
-    term_registry: TermRegistry = term_registry,
+    term_registry: Optional[TermRegistry] = None,
 ) -> data.Tag:
    """Creates a soundevent.data.Tag object from TagInfo data.
@ -368,6 +374,7 @@ def get_tag_from_info(
        If the term key specified in `tag_info.key` is not found
        in the registry.
    """
    term_registry = term_registry or default_term_registry
    term = get_term_from_key(tag_info.key, term_registry=term_registry)
    return data.Tag(term=term, value=tag_info.value)
@ -439,7 +446,7 @@ class TermConfig(BaseModel):
 def load_terms_from_config(
    path: data.PathLike,
    field: Optional[str] = None,
-    term_registry: TermRegistry = term_registry,
+    term_registry: TermRegistry = default_term_registry,
 ) -> Dict[str, data.Term]:
    """Loads term definitions from a configuration file and registers them.
@ -490,6 +497,6 @@ def load_terms_from_config(
 def register_term(
-    key: str, term: data.Term, registry: TermRegistry = term_registry
+    key: str, term: data.Term, registry: TermRegistry = default_term_registry
 ) -> None:
    registry.add_term(key, term)
--- a/src/batdetect2/targets/transform.py
+++ b/src/batdetect2/targets/transform.py
@ -21,9 +21,6 @@ from batdetect2.targets.terms import (
    get_tag_from_info,
    get_term_from_key,
 )
 from batdetect2.targets.terms import (
    term_registry as default_term_registry,
 )
 __all__ = [
    "DerivationRegistry",
@ -34,7 +31,7 @@ __all__ = [
    "TransformConfig",
    "build_transform_from_rule",
    "build_transformation_from_config",
-    "derivation_registry",
+    "default_derivation_registry",
    "get_derivation",
    "load_transformation_config",
    "load_transformation_from_config",
@ -398,7 +395,7 @@ class DerivationRegistry(Mapping[str, Derivation]):
        return list(self._derivations.values())
-derivation_registry = DerivationRegistry()
+default_derivation_registry = DerivationRegistry()
 """Global instance of the DerivationRegistry.
 Register custom derivation functions here to make them available by key
@ -409,7 +406,7 @@ in `DeriveTagRule` configuration.
 def get_derivation(
    key: str,
    import_derivation: bool = False,
-    registry: DerivationRegistry = derivation_registry,
+    registry: Optional[DerivationRegistry] = None,
 ):
    """Retrieve a derivation function by key, optionally importing it.
@ -443,6 +440,8 @@ def get_derivation(
    AttributeError
        If dynamic import fails because the function name isn't in the module.
    """
    registry = registry or default_derivation_registry
    if not import_derivation or key in registry:
        return registry.get_derivation(key)
@ -458,10 +457,16 @@ def get_derivation(
        ) from err
 TranformationRule = Annotated[
    Union[ReplaceRule, MapValueRule, DeriveTagRule],
    Field(discriminator="rule_type"),
 ]
 def build_transform_from_rule(
-    rule: Union[ReplaceRule, MapValueRule, DeriveTagRule],
+    rule: TranformationRule,
-    derivation_registry: DerivationRegistry = derivation_registry,
+    derivation_registry: Optional[DerivationRegistry] = None,
-    term_registry: TermRegistry = default_term_registry,
+    term_registry: Optional[TermRegistry] = None,
 ) -> SoundEventTransformation:
    """Build a specific SoundEventTransformation function from a rule config.
@ -559,8 +564,8 @@ def build_transform_from_rule(
 def build_transformation_from_config(
    config: TransformConfig,
-    derivation_registry: DerivationRegistry = derivation_registry,
+    derivation_registry: Optional[DerivationRegistry] = None,
-    term_registry: TermRegistry = default_term_registry,
+    term_registry: Optional[TermRegistry] = None,
 ) -> SoundEventTransformation:
    """Build a composite transformation function from a TransformConfig.
@ -581,6 +586,7 @@ def build_transformation_from_config(
    SoundEventTransformation
        A single function that applies all configured transformations in order.
    """
    transforms = [
        build_transform_from_rule(
            rule,
@ -590,14 +596,16 @@ def build_transformation_from_config(
        for rule in config.rules
    ]
-    def transformation(
+    return partial(apply_sequence_of_transforms, transforms=transforms)
        sound_event_annotation: data.SoundEventAnnotation,
    ) -> data.SoundEventAnnotation:
        for transform in transforms:
            sound_event_annotation = transform(sound_event_annotation)
        return sound_event_annotation
-    return transformation
+
 def apply_sequence_of_transforms(
    sound_event_annotation: data.SoundEventAnnotation,
    transforms: list[SoundEventTransformation],
 ) -> data.SoundEventAnnotation:
    for transform in transforms:
        sound_event_annotation = transform(sound_event_annotation)
    return sound_event_annotation
 def load_transformation_config(
@ -631,8 +639,8 @@ def load_transformation_config(
 def load_transformation_from_config(
    path: data.PathLike,
    field: Optional[str] = None,
-    derivation_registry: DerivationRegistry = derivation_registry,
+    derivation_registry: Optional[DerivationRegistry] = None,
-    term_registry: TermRegistry = default_term_registry,
+    term_registry: Optional[TermRegistry] = None,
 ) -> SoundEventTransformation:
    """Load transformation config from a file and build the final function.
@ -677,7 +685,7 @@ def load_transformation_from_config(
 def register_derivation(
    key: str,
    derivation: Derivation,
-    derivation_registry: DerivationRegistry = derivation_registry,
+    derivation_registry: Optional[DerivationRegistry] = None,
 ) -> None:
    """Register a new derivation function in the global registry.
@ -696,4 +704,5 @@ def register_derivation(
    KeyError
        If a derivation function with the same key is already registered.
    """
    derivation_registry = derivation_registry or default_derivation_registry
    derivation_registry.register(key, derivation)
--- a/src/batdetect2/targets/types.py
+++ b/src/batdetect2/targets/types.py
@ -19,8 +19,16 @@ from soundevent import data
 __all__ = [
    "TargetProtocol",
    "Position",
    "Size",
 ]
 Position = tuple[float, float]
 """A tuple representing (time, frequency) coordinates."""
 Size = np.ndarray
 """A NumPy array representing the size dimensions of a target."""
 class TargetProtocol(Protocol):
    """Protocol defining the interface for the target definition pipeline.
@ -102,7 +110,7 @@ class TargetProtocol(Protocol):
        """
        ...
-    def encode(
+    def encode_class(
        self,
        sound_event: data.SoundEventAnnotation,
    ) -> Optional[str]:
@ -123,7 +131,7 @@ class TargetProtocol(Protocol):
        """
        ...
-    def decode(self, class_label: str) -> List[data.Tag]:
+    def decode_class(self, class_label: str) -> List[data.Tag]:
        """Decode a predicted class name back into representative tags.
        Parameters
@ -147,9 +155,9 @@ class TargetProtocol(Protocol):
        """
        ...
-    def get_position(
+    def encode_roi(
        self, sound_event: data.SoundEventAnnotation
-    ) -> tuple[float, float]:
+    ) -> tuple[Position, Size]:
        """Extract the target reference position from the annotation's geometry.
        Calculates the `(time, frequency)` coordinate representing the primary
@ -173,36 +181,12 @@ class TargetProtocol(Protocol):
        """
        ...
-    def get_size(self, sound_event: data.SoundEventAnnotation) -> np.ndarray:
+    # TODO: Update docstrings
-        """Calculate the target size dimensions from the annotation's geometry.
+    def decode_roi(
-
+        self,
-        Computes the relevant physical size (e.g., duration/width,
+        position: Position,
-        bandwidth/height from a bounding box) to produce
+        size: Size,
-        the numerical target values expected by the model.
+        class_name: Optional[str] = None,
        Parameters
        ----------
        sound_event : data.SoundEventAnnotation
            The annotation containing the geometry (ROI) to process.
        Returns
        -------
        np.ndarray
            A NumPy array containing the size dimensions, matching the
            order specified by the `dimension_names` attribute (e.g.,
            `[width, height]`).
        Raises
        ------
        ValueError
            If the annotation lacks geometry or if the size cannot be computed.
        TypeError
            If geometry type is unsupported.
        """
        ...
    def recover_roi(
        self, pos: tuple[float, float], dims: np.ndarray
    ) -> data.Geometry:
        """Recover the ROI geometry from a position and dimensions.
@ -217,6 +201,8 @@ class TargetProtocol(Protocol):
        dims : np.ndarray
            The NumPy array containing the dimensions (e.g., predicted
            by the model), corresponding to the order in `dimension_names`.
        class_name: str
            class
        Returns
        -------
--- a/src/batdetect2/train/init.py
+++ b/src/batdetect2/train/init.py
--- a/src/batdetect2/train/augmentations.py
+++ b/src/batdetect2/train/augmentations.py
--- a/src/batdetect2/train/callbacks.py
+++ b/src/batdetect2/train/callbacks.py
@ -97,7 +97,7 @@ def _is_in_subclip(
    start_time: float,
    end_time: float,
 ) -> bool:
-    time, _ = targets.get_position(sound_event_annotation)
+    (time, _), _ = targets.encode_roi(sound_event_annotation)
    return start_time <= time <= end_time
--- a/src/batdetect2/train/clips.py
+++ b/src/batdetect2/train/clips.py
--- a/src/batdetect2/train/config.py
+++ b/src/batdetect2/train/config.py
--- a/src/batdetect2/train/dataset.py
+++ b/src/batdetect2/train/dataset.py
--- a/src/batdetect2/train/labels.py
+++ b/src/batdetect2/train/labels.py
@ -138,7 +138,7 @@ def generate_clip_label(
    logger.debug(
        "Will generate heatmaps for clip annotation {uuid} with {num} annotated sound events",
        uuid=clip_annotation.uuid,
-        num=len(clip_annotation.sound_events)
+        num=len(clip_annotation.sound_events),
    )
    sound_events = []
@ -260,7 +260,7 @@ def generate_heatmaps(
            continue
        # Get the position of the sound event
-        time, frequency = targets.get_position(sound_event_annotation)
+        (time, frequency), size = targets.encode_roi(sound_event_annotation)
        # Set 1.0 at the position of the sound event in the detection heatmap
        try:
@ -280,8 +280,6 @@ def generate_heatmaps(
            )
            continue
        size = targets.get_size(sound_event_annotation)
        size_heatmap = arrays.set_value_at_pos(
            size_heatmap,
            size,
@ -291,7 +289,7 @@ def generate_heatmaps(
        # Get the class name of the sound event
        try:
-            class_name = targets.encode(sound_event_annotation)
+            class_name = targets.encode_class(sound_event_annotation)
        except ValueError as e:
            logger.warning(
                "Skipping annotation %s: Unexpected error while encoding "
--- a/src/batdetect2/train/legacy/readme.md
+++ b/src/batdetect2/train/legacy/readme.md
--- a/src/batdetect2/train/legacy/train.py
+++ b/src/batdetect2/train/legacy/train.py
--- a/src/batdetect2/train/legacy/train_model.py
+++ b/src/batdetect2/train/legacy/train_model.py
--- a/src/batdetect2/train/legacy/train_split.py
+++ b/src/batdetect2/train/legacy/train_split.py
--- a/src/batdetect2/train/legacy/train_utils.py
+++ b/src/batdetect2/train/legacy/train_utils.py
--- a/src/batdetect2/train/lightning.py
+++ b/src/batdetect2/train/lightning.py
--- a/src/batdetect2/train/losses.py
+++ b/src/batdetect2/train/losses.py
--- a/src/batdetect2/train/preprocess.py
+++ b/src/batdetect2/train/preprocess.py
--- a/src/batdetect2/train/train.py
+++ b/src/batdetect2/train/train.py
--- a/src/batdetect2/train/types.py
+++ b/src/batdetect2/train/types.py
--- a/src/batdetect2/types.py
+++ b/src/batdetect2/types.py
--- a/src/batdetect2/utils/init.py
+++ b/src/batdetect2/utils/init.py
--- a/src/batdetect2/utils/arrays.py
+++ b/src/batdetect2/utils/arrays.py
--- a/src/batdetect2/utils/audio_utils.py
+++ b/src/batdetect2/utils/audio_utils.py
--- a/src/batdetect2/utils/detector_utils.py
+++ b/src/batdetect2/utils/detector_utils.py
--- a/src/batdetect2/utils/plot_utils.py
+++ b/src/batdetect2/utils/plot_utils.py
--- a/src/batdetect2/utils/tensors.py
+++ b/src/batdetect2/utils/tensors.py
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
mbsantiago	a462beaeb8	Remove rogue print	2025-06-24 06:27:41 -06:00
mbsantiago	daab8ff0d7	Fix validation is_in_subclip after encoder changes	2025-06-24 06:26:08 -06:00
mbsantiago	235f0e27da	Add load dataset config function	2025-06-24 12:40:29 +01:00
mbsantiago	b5b4229990	Fix testing issues	2025-06-23 19:08:55 +01:00
mbsantiago	8253b5bdc4	Update makefile	2025-06-23 18:53:15 +01:00
mbsantiago	c7ea361cf4	Implement changes needed to make roi encode/decode class dependent	2025-06-23 18:52:36 +01:00
mbsantiago	3407e1b5f0	Add other roi tests	2025-06-21 23:51:07 +01:00
mbsantiago	0a0d6f7162	Use standard dependency-groups instead of tool.uv section	2025-06-21 23:01:54 +01:00
mbsantiago	ad0f0bcb24	Add tests for peak energy function	2025-06-21 23:01:08 +01:00
mbsantiago	3103630c26	Update pyproject to use src layout	2025-06-21 13:49:06 +01:00
mbsantiago	960558be8b	move to src layout	2025-06-21 13:48:40 +01:00
mbsantiago	e352dc40bd	Fixed Target object after changes to roi	2025-06-21 13:47:04 +01:00
mbsantiago	c559bcc682	Changed ROIMapper protocol to only have encoder/decoder methods	2025-06-21 11:44:15 +01:00
`@ -1,4 +1,3 @@`

	`import numpy as np`	`import numpy as np`
	`from sklearn.metrics import auc, roc_curve`	`from sklearn.metrics import auc, roc_curve`