From 84a13c65a7f69a554d461ea4d3438bfa67cf757c Mon Sep 17 00:00:00 2001
From: mbsantiago <santiago.mbal@gmail.com>
Date: Thu, 19 Jun 2025 00:28:24 +0100
Subject: [PATCH] Add iterators module

---
 batdetect2/data/iterators.py | 79 ++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 batdetect2/data/iterators.py

diff --git a/batdetect2/data/iterators.py b/batdetect2/data/iterators.py
new file mode 100644
index 0000000..4a3e11d
--- /dev/null
+++ b/batdetect2/data/iterators.py
@@ -0,0 +1,79 @@
+from collections.abc import Generator
+from typing import Optional, Tuple
+
+from soundevent import data
+
+from batdetect2.data.datasets import Dataset
+from batdetect2.targets.types import TargetProtocol
+
+
+def iterate_over_sound_events(
+    dataset: Dataset,
+    targets: TargetProtocol,
+    apply_filter: bool = True,
+    apply_transform: bool = True,
+    exclude_generic: bool = True,
+) -> Generator[Tuple[Optional[str], data.SoundEventAnnotation], None, None]:
+    """Iterate over sound events in a dataset, applying filtering and
+    transformations.
+
+    This generator function processes sound event annotations from a given
+    dataset, allowing for optional filtering, transformation, and exclusion of
+    unclassifiable (generic) events based on the provided target definitions.
+
+    Parameters
+    ----------
+    dataset : Dataset
+        The dataset containing clip annotations, each of which may contain
+        multiple sound event annotations.
+    targets : TargetProtocol
+        An object implementing the `TargetProtocol`, which provides methods
+        for filtering, transforming, and encoding sound events.
+    apply_filter : bool, optional
+        If True, sound events will be filtered using `targets.filter()`.
+        Only events for which `targets.filter()` returns True will be yielded.
+        Defaults to True.
+    apply_transform : bool, optional
+        If True, sound events will be transformed using `targets.transform()`
+        before being yielded. Defaults to True.
+    exclude_generic : bool, optional
+        If True, sound events that result in a `None` class name after
+        `targets.encode()` will be excluded. This is typically used to
+        filter out events that cannot be mapped to a specific target class.
+        Defaults to True.
+
+    Yields
+    ------
+    Tuple[Optional[str], data.SoundEventAnnotation]
+        A tuple containing:
+        - The encoded class name (str) for the sound event, or None if it
+          cannot be encoded to a specific class.
+        - The sound event annotation itself, after passing all specified
+          filtering and transformation steps.
+
+    Notes
+    -----
+    The processing order for each sound event is:
+    1. Filtering (if `apply_filter` is True). Events failing the filter are
+       skipped.
+    2. Transformation (if `apply_transform` is True).
+    3. Encoding to determine class name and check for genericity (if
+       `exclude_generic` is True). Events with a `None` class name are skipped
+        if `exclude_generic` is True.
+    """
+    for clip_annotation in dataset:
+        for sound_event_annotation in clip_annotation.sound_events:
+            if apply_filter:
+                if not targets.filter(sound_event_annotation):
+                    continue
+
+            if apply_transform:
+                sound_event_annotation = targets.transform(
+                    sound_event_annotation
+                )
+
+            class_name = targets.encode(sound_event_annotation)
+            if class_name is None and exclude_generic:
+                continue
+
+            yield class_name, sound_event_annotation