Add iterators module

2025-06-29 14:41:58 +02:00 · 2025-06-19 00:28:24 +01:00 · 2025-06-19 00:28:24 +01:00 · 84a13c65a7
commit 84a13c65a7
parent ebc89af4c6
1 changed files with 79 additions and 0 deletions
--- a/batdetect2/data/iterators.py
+++ b/batdetect2/data/iterators.py
@ -0,0 +1,79 @@
 from collections.abc import Generator
 from typing import Optional, Tuple
 from soundevent import data
 from batdetect2.data.datasets import Dataset
 from batdetect2.targets.types import TargetProtocol
 def iterate_over_sound_events(
    dataset: Dataset,
    targets: TargetProtocol,
    apply_filter: bool = True,
    apply_transform: bool = True,
    exclude_generic: bool = True,
 ) -> Generator[Tuple[Optional[str], data.SoundEventAnnotation], None, None]:
    """Iterate over sound events in a dataset, applying filtering and
    transformations.
    This generator function processes sound event annotations from a given
    dataset, allowing for optional filtering, transformation, and exclusion of
    unclassifiable (generic) events based on the provided target definitions.
    Parameters
    ----------
    dataset : Dataset
        The dataset containing clip annotations, each of which may contain
        multiple sound event annotations.
    targets : TargetProtocol
        An object implementing the `TargetProtocol`, which provides methods
        for filtering, transforming, and encoding sound events.
    apply_filter : bool, optional
        If True, sound events will be filtered using `targets.filter()`.
        Only events for which `targets.filter()` returns True will be yielded.
        Defaults to True.
    apply_transform : bool, optional
        If True, sound events will be transformed using `targets.transform()`
        before being yielded. Defaults to True.
    exclude_generic : bool, optional
        If True, sound events that result in a `None` class name after
        `targets.encode()` will be excluded. This is typically used to
        filter out events that cannot be mapped to a specific target class.
        Defaults to True.
    Yields
    ------
    Tuple[Optional[str], data.SoundEventAnnotation]
        A tuple containing:
        - The encoded class name (str) for the sound event, or None if it
          cannot be encoded to a specific class.
        - The sound event annotation itself, after passing all specified
          filtering and transformation steps.
    Notes
    -----
    The processing order for each sound event is:
    1. Filtering (if `apply_filter` is True). Events failing the filter are
       skipped.
    2. Transformation (if `apply_transform` is True).
    3. Encoding to determine class name and check for genericity (if
       `exclude_generic` is True). Events with a `None` class name are skipped
        if `exclude_generic` is True.
    """
    for clip_annotation in dataset:
        for sound_event_annotation in clip_annotation.sound_events:
            if apply_filter:
                if not targets.filter(sound_event_annotation):
                    continue
            if apply_transform:
                sound_event_annotation = targets.transform(
                    sound_event_annotation
                )
            class_name = targets.encode(sound_event_annotation)
            if class_name is None and exclude_generic:
                continue
            yield class_name, sound_event_annotation