diff --git a/src/batdetect2/data/annotations/legacy.py b/src/batdetect2/data/annotations/legacy.py index 0b443bb..4dfd28c 100644 --- a/src/batdetect2/data/annotations/legacy.py +++ b/src/batdetect2/data/annotations/legacy.py @@ -89,18 +89,9 @@ def annotation_to_sound_event( uuid=uuid.uuid5(NAMESPACE, f"{sound_event.uuid}_annotation"), sound_event=sound_event, tags=[ - data.Tag( - key=label_key, # type: ignore - value=annotation.label, - ), - data.Tag( - key=event_key, # type: ignore - value=annotation.event, - ), - data.Tag( - key=individual_key, # type: ignore - value=str(annotation.individual), - ), + data.Tag(key=label_key, value=annotation.label), + data.Tag(key=event_key, value=annotation.event), + data.Tag(key=individual_key, value=str(annotation.individual)), ], ) @@ -121,12 +112,7 @@ def file_annotation_to_clip( recording = data.Recording.from_file( full_path, time_expansion=file_annotation.time_exp, - tags=[ - data.Tag( - key=label_key, # type: ignore - value=file_annotation.label, - ) - ], + tags=[data.Tag(key=label_key, value=file_annotation.label)], ) return data.Clip( @@ -153,12 +139,7 @@ def file_annotation_to_clip_annotation( uuid=uuid.uuid5(NAMESPACE, f"{file_annotation.id}_clip_annotation"), clip=clip, notes=notes, - tags=[ - data.Tag( - key=label_key, # type: ignore - value=file_annotation.label, - ) - ], + tags=[data.Tag(key=label_key, value=file_annotation.label)], sound_events=[ annotation_to_sound_event( annotation, diff --git a/src/batdetect2/evaluate/match.py b/src/batdetect2/evaluate/match.py index df3bc60..cca9946 100644 --- a/src/batdetect2/evaluate/match.py +++ b/src/batdetect2/evaluate/match.py @@ -57,6 +57,7 @@ class MatchConfig(BaseConfig): affinity_threshold: float = 0.0 time_buffer: float = 0.005 frequency_buffer: float = 1_000 + ignore_start_end: float = 0.01 def _to_bbox(geometry: data.Geometry) -> data.BoundingBox: @@ -273,6 +274,17 @@ def greedy_match( yield None, target_idx, 0 +def _is_in_bounds( + geometry: data.Geometry, + clip: data.Clip, + buffer: float, +) -> bool: + start_time = compute_bounds(geometry)[0] + return (start_time >= clip.start_time + buffer) and ( + start_time <= clip.end_time - buffer + ) + + def match_sound_events_and_raw_predictions( clip_annotation: data.ClipAnnotation, raw_predictions: List[RawPrediction], @@ -286,14 +298,29 @@ def match_sound_events_and_raw_predictions( for sound_event_annotation in clip_annotation.sound_events if targets.filter(sound_event_annotation) and sound_event_annotation.sound_event.geometry is not None + and _is_in_bounds( + sound_event_annotation.sound_event.geometry, + clip=clip_annotation.clip, + buffer=config.ignore_start_end, + ) ] - target_geometries: List[data.Geometry] = [ # type: ignore + target_geometries: List[data.Geometry] = [ sound_event_annotation.sound_event.geometry for sound_event_annotation in target_sound_events if sound_event_annotation.sound_event.geometry is not None ] + raw_predictions = [ + raw_prediction + for raw_prediction in raw_predictions + if _is_in_bounds( + raw_prediction.geometry, + clip=clip_annotation.clip, + buffer=config.ignore_start_end, + ) + ] + predicted_geometries = [ raw_prediction.geometry for raw_prediction in raw_predictions ] diff --git a/src/batdetect2/plotting/common.py b/src/batdetect2/plotting/common.py index de54b76..ff47802 100644 --- a/src/batdetect2/plotting/common.py +++ b/src/batdetect2/plotting/common.py @@ -32,9 +32,12 @@ def plot_spectrogram( max_freq: Optional[float] = None, ax: Optional[axes.Axes] = None, figsize: Optional[Tuple[int, int]] = None, + add_colorbar: bool = False, + colorbar_kwargs: Optional[dict] = None, + vmin: Optional[float] = None, + vmax: Optional[float] = None, cmap="gray", ) -> axes.Axes: - if isinstance(spec, torch.Tensor): spec = spec.numpy() @@ -54,10 +57,16 @@ def plot_spectrogram( if max_freq is None: max_freq = spec.shape[-2] - ax.pcolormesh( + mappable = ax.pcolormesh( np.linspace(start_time, end_time, spec.shape[-1] + 1, endpoint=True), np.linspace(min_freq, max_freq, spec.shape[-2] + 1, endpoint=True), spec, cmap=cmap, + vmin=vmin, + vmax=vmax, ) + + if add_colorbar: + plt.colorbar(mappable, ax=ax, **(colorbar_kwargs or {})) + return ax diff --git a/src/batdetect2/targets/__init__.py b/src/batdetect2/targets/__init__.py index 1da4163..19e586b 100644 --- a/src/batdetect2/targets/__init__.py +++ b/src/batdetect2/targets/__init__.py @@ -28,12 +28,17 @@ from batdetect2.targets.rois import ( ROITargetMapper, build_roi_mapper, ) -from batdetect2.targets.terms import call_type, individual +from batdetect2.targets.terms import ( + call_type, + data_source, + generic_class, + individual, +) from batdetect2.typing.targets import Position, Size, TargetProtocol __all__ = [ - "DEFAULT_TARGET_CONFIG", "AnchorBBoxMapperConfig", + "DEFAULT_TARGET_CONFIG", "ROITargetMapper", "SoundEventDecoder", "SoundEventEncoder", @@ -44,6 +49,8 @@ __all__ = [ "build_sound_event_decoder", "build_sound_event_encoder", "call_type", + "data_source", + "generic_class", "get_class_names_from_config", "individual", "load_target_config", diff --git a/src/batdetect2/targets/terms.py b/src/batdetect2/targets/terms.py index 7904b53..cf58750 100644 --- a/src/batdetect2/targets/terms.py +++ b/src/batdetect2/targets/terms.py @@ -6,6 +6,7 @@ __all__ = [ "call_type", "individual", "data_source", + "generic_class", ] # The default key used to reference the 'generic_class' term. diff --git a/src/batdetect2/typing/postprocess.py b/src/batdetect2/typing/postprocess.py index f75ca81..0654853 100644 --- a/src/batdetect2/typing/postprocess.py +++ b/src/batdetect2/typing/postprocess.py @@ -47,29 +47,7 @@ class GeometryDecoder(Protocol): class RawPrediction(NamedTuple): - """Intermediate representation of a single detected sound event. - - Holds extracted information about a detection after initial processing - (like peak finding, coordinate remapping, geometry recovery) but before - final class decoding and conversion into a `SoundEventPrediction`. This - can be useful for evaluation or simpler data handling formats. - - Attributes - ---------- - geometry: data.Geometry - The recovered estimated geometry of the detected sound event. - Usually a bounding box. - detection_score : float - The confidence score associated with this detection, typically from - the detection heatmap peak. - class_scores : xr.DataArray - An xarray DataArray containing the predicted probabilities or scores - for each target class at the detection location. Indexed by a - 'category' coordinate containing class names. - features : xr.DataArray - An xarray DataArray containing extracted feature vectors at the - detection location. Indexed by a 'feature' coordinate. - """ + """Intermediate representation of a single detected sound event.""" geometry: data.Geometry detection_score: float