Add iterators module

2025-06-29 14:41:58 +02:00 · 2025-06-19 00:28:24 +01:00 · 2025-06-19 00:28:24 +01:00 · 84a13c65a7
commit 84a13c65a7
parent ebc89af4c6
1 changed files with 79 additions and 0 deletions
--- a/batdetect2/data/iterators.py
+++ b/batdetect2/data/iterators.py
@ -0,0 +1,79 @@
+from collections.abc import Generator
+from typing import Optional, Tuple
+
+from soundevent import data
+
+from batdetect2.data.datasets import Dataset
+from batdetect2.targets.types import TargetProtocol
+
+
+def iterate_over_sound_events(
+    dataset: Dataset,
+    targets: TargetProtocol,
+    apply_filter: bool = True,
+    apply_transform: bool = True,
+    exclude_generic: bool = True,
+) -> Generator[Tuple[Optional[str], data.SoundEventAnnotation], None, None]:
+    """Iterate over sound events in a dataset, applying filtering and
+    transformations.
+
+    This generator function processes sound event annotations from a given
+    dataset, allowing for optional filtering, transformation, and exclusion of
+    unclassifiable (generic) events based on the provided target definitions.
+
+    Parameters
+    ----------
+    dataset : Dataset
+        The dataset containing clip annotations, each of which may contain
+        multiple sound event annotations.
+    targets : TargetProtocol
+        An object implementing the `TargetProtocol`, which provides methods
+        for filtering, transforming, and encoding sound events.
+    apply_filter : bool, optional
+        If True, sound events will be filtered using `targets.filter()`.
+        Only events for which `targets.filter()` returns True will be yielded.
+        Defaults to True.
+    apply_transform : bool, optional
+        If True, sound events will be transformed using `targets.transform()`
+        before being yielded. Defaults to True.
+    exclude_generic : bool, optional
+        If True, sound events that result in a `None` class name after
+        `targets.encode()` will be excluded. This is typically used to
+        filter out events that cannot be mapped to a specific target class.
+        Defaults to True.
+
+    Yields
+    ------
+    Tuple[Optional[str], data.SoundEventAnnotation]
+        A tuple containing:
+        - The encoded class name (str) for the sound event, or None if it
+          cannot be encoded to a specific class.
+        - The sound event annotation itself, after passing all specified
+          filtering and transformation steps.
+
+    Notes
+    -----
+    The processing order for each sound event is:
+    1. Filtering (if `apply_filter` is True). Events failing the filter are
+       skipped.
+    2. Transformation (if `apply_transform` is True).
+    3. Encoding to determine class name and check for genericity (if
+       `exclude_generic` is True). Events with a `None` class name are skipped
+        if `exclude_generic` is True.
+    """
+    for clip_annotation in dataset:
+        for sound_event_annotation in clip_annotation.sound_events:
+            if apply_filter:
+                if not targets.filter(sound_event_annotation):
+                    continue
+
+            if apply_transform:
+                sound_event_annotation = targets.transform(
+                    sound_event_annotation
+                )
+
+            class_name = targets.encode(sound_event_annotation)
+            if class_name is None and exclude_generic:
+                continue
+
+            yield class_name, sound_event_annotation