batdetect2/tests/test_targets/test_rois.py

import numpy as np
import pytest
import soundfile as sf
from soundevent import data

from batdetect2.preprocess import PreprocessingConfig, build_preprocessor
from batdetect2.targets.rois import (
    DEFAULT_ANCHOR,
    DEFAULT_FREQUENCY_SCALE,
    DEFAULT_TIME_SCALE,
    SIZE_HEIGHT,
    SIZE_WIDTH,
    AnchorBBoxMapper,
    BBoxAnchorMapperConfig,
    PeakEnergyBBoxMapper,
    PeakEnergyBBoxMapperConfig,
    _build_bounding_box,
    build_roi_mapper,
    get_peak_energy_coordinates,
)


@pytest.fixture
def sample_bbox() -> data.BoundingBox:
    """A standard bounding box for testing."""
    return data.BoundingBox(coordinates=[10.0, 100.0, 20.0, 200.0])


@pytest.fixture
def sample_recording(create_recording) -> data.Recording:
    return create_recording(duration=30, samplerate=4_000)


@pytest.fixture
def sample_sound_event(sample_bbox, sample_recording) -> data.SoundEvent:
    return data.SoundEvent(geometry=sample_bbox, recording=sample_recording)


@pytest.fixture
def zero_bbox() -> data.BoundingBox:
    """A bounding box with zero duration and bandwidth."""
    return data.BoundingBox(coordinates=[15.0, 150.0, 15.0, 150.0])


@pytest.fixture
def zero_sound_event(zero_bbox, sample_recording) -> data.SoundEvent:
    """A sample sound event with a zero-sized bounding box."""
    return data.SoundEvent(geometry=zero_bbox, recording=sample_recording)


@pytest.fixture
def default_mapper() -> AnchorBBoxMapper:
    """A BBoxEncoder with default settings."""
    return AnchorBBoxMapper()


@pytest.fixture
def custom_encoder() -> AnchorBBoxMapper:
    """A BBoxEncoder with custom settings."""
    return AnchorBBoxMapper(
        anchor="center", time_scale=1.0, frequency_scale=10.0
    )


@pytest.fixture
def custom_mapper() -> AnchorBBoxMapper:
    """An AnchorBBoxMapper with custom settings."""
    return AnchorBBoxMapper(
        anchor="center", time_scale=1.0, frequency_scale=10.0
    )


def test_bbox_encoder_init_defaults(default_mapper):
    """Test BBoxEncoder initialization with default arguments."""
    assert default_mapper.anchor == DEFAULT_ANCHOR
    assert default_mapper.time_scale == DEFAULT_TIME_SCALE
    assert default_mapper.frequency_scale == DEFAULT_FREQUENCY_SCALE
    assert default_mapper.dimension_names == [SIZE_WIDTH, SIZE_HEIGHT]


def test_bbox_encoder_init_custom(custom_encoder):
    """Test BBoxEncoder initialization with custom arguments."""
    assert custom_encoder.anchor == "center"
    assert custom_encoder.time_scale == 1.0
    assert custom_encoder.frequency_scale == 10.0
    assert custom_encoder.dimension_names == [SIZE_WIDTH, SIZE_HEIGHT]


POSITION_TEST_CASES = [
    ("bottom-left", (10.0, 100.0)),
    ("bottom-right", (20.0, 100.0)),
    ("top-left", (10.0, 200.0)),
    ("top-right", (20.0, 200.0)),
    ("center-left", (10.0, 150.0)),
    ("center-right", (20.0, 150.0)),
    ("top-center", (15.0, 200.0)),
    ("bottom-center", (15.0, 100.0)),
    ("center", (15.0, 150.0)),
    ("centroid", (15.0, 150.0)),
    ("point_on_surface", (15.0, 150.0)),
]


@pytest.mark.parametrize("anchor, expected_pos", POSITION_TEST_CASES)
def test_anchor_bbox_mapper_encode_position(
    sample_sound_event, anchor, expected_pos
):
    """Test encode returns the correct position for various anchors."""
    encoder = AnchorBBoxMapper(anchor=anchor)
    actual_pos, _ = encoder.encode(sample_sound_event)
    assert actual_pos == pytest.approx(expected_pos)


def test_anchor_bbox_mapper_encode_defaults(
    sample_sound_event, default_mapper
):
    """Test encode with default settings returns correct position and size."""
    expected_pos = (10.0, 100.0)  # bottom-left
    expected_size = np.array(
        [
            10.0 * DEFAULT_TIME_SCALE,
            100.0 * DEFAULT_FREQUENCY_SCALE,
        ]
    )
    actual_pos, actual_size = default_mapper.encode(sample_sound_event)
    assert actual_pos == pytest.approx(expected_pos)
    np.testing.assert_allclose(actual_size, expected_size)
    assert actual_size.shape == (2,)


def test_anchor_bbox_mapper_encode_custom(sample_sound_event, custom_mapper):
    """Test encode with custom settings returns correct position and size."""
    expected_pos = (15.0, 150.0)  # center
    expected_size = np.array([10.0 * 1.0, 100.0 * 10.0])

    actual_pos, actual_size = custom_mapper.encode(sample_sound_event)
    assert actual_pos == pytest.approx(expected_pos)
    np.testing.assert_allclose(actual_size, expected_size)
    assert actual_size.shape == (2,)


def test_anchor_bbox_mapper_encode_zero_box(zero_sound_event, default_mapper):
    """Test encode for a zero-sized box."""
    expected_pos = (15.0, 150.0)
    expected_size = np.array([0.0, 0.0])
    actual_pos, actual_size = default_mapper.encode(zero_sound_event)
    assert actual_pos == pytest.approx(expected_pos)
    np.testing.assert_allclose(actual_size, expected_size)


BUILD_BOX_TEST_CASES = [
    ("bottom-left", [50.0, 500.0, 60.0, 600.0]),
    ("bottom-right", [40.0, 500.0, 50.0, 600.0]),
    ("top-left", [50.0, 400.0, 60.0, 500.0]),
    ("top-right", [40.0, 400.0, 50.0, 500.0]),
    ("center-left", [50.0, 450.0, 60.0, 550.0]),
    ("center-right", [40.0, 450.0, 50.0, 550.0]),
    ("top-center", [45.0, 400.0, 55.0, 500.0]),
    ("bottom-center", [45.0, 500.0, 55.0, 600.0]),
    ("center", [45.0, 450.0, 55.0, 550.0]),
    ("centroid", [45.0, 450.0, 55.0, 550.0]),
    ("point_on_surface", [45.0, 450.0, 55.0, 550.0]),
]


@pytest.mark.parametrize(
    "position_type, expected_coords", BUILD_BOX_TEST_CASES
)
def test_build_bounding_box(position_type, expected_coords):
    """Test _build_bounding_box for various position types."""
    ref_pos = (50.0, 500.0)
    duration = 10.0
    bandwidth = 100.0
    bbox = _build_bounding_box(
        ref_pos, duration, bandwidth, anchor=position_type
    )
    assert isinstance(bbox, data.BoundingBox)
    np.testing.assert_allclose(bbox.coordinates, expected_coords)


def test_build_bounding_box_invalid_anchor():
    """Test _build_bounding_box raises error for invalid position."""
    with pytest.raises(ValueError, match="Invalid anchor"):
        _build_bounding_box(
            (0, 0),
            1,
            1,
            anchor="invalid-spot",  # type: ignore
        )


@pytest.mark.parametrize(
    "anchor", [anchor for anchor, _ in POSITION_TEST_CASES]
)
def test_anchor_bbox_mapper_encode_decode_roundtrip(
    sample_sound_event, sample_bbox, anchor
):
    """Test encode-decode roundtrip reconstructs the original bbox."""
    mapper = AnchorBBoxMapper(anchor=anchor)
    position, size = mapper.encode(sample_sound_event)
    recovered_bbox = mapper.decode(position, size)

    assert isinstance(recovered_bbox, data.BoundingBox)
    np.testing.assert_allclose(
        recovered_bbox.coordinates, sample_bbox.coordinates, atol=1e-6
    )


def test_anchor_bbox_mapper_roundtrip_custom_scale(
    sample_sound_event, sample_bbox, custom_mapper
):
    """Test encode-decode roundtrip with custom scaling factors."""
    position, size = custom_mapper.encode(sample_sound_event)
    recovered_bbox = custom_mapper.decode(position, size)

    assert isinstance(recovered_bbox, data.BoundingBox)
    np.testing.assert_allclose(
        recovered_bbox.coordinates, sample_bbox.coordinates, atol=1e-6
    )


def test_anchor_bbox_mapper_roundtrip_zero_box(
    zero_sound_event, zero_bbox, default_mapper
):
    """Test encode-decode roundtrip for a zero-sized box."""
    position, size = default_mapper.encode(zero_sound_event)
    recovered_bbox = default_mapper.decode(position, size)
    np.testing.assert_allclose(
        recovered_bbox.coordinates, zero_bbox.coordinates, atol=1e-6
    )


def test_anchor_bbox_mapper_decode_invalid_size_shape(default_mapper):
    """Test decode raises ValueError for incorrect size shape."""
    ref_pos = (10, 100)
    with pytest.raises(ValueError, match="does not have the expected shape"):
        default_mapper.decode(ref_pos, np.array([1.0]))
    with pytest.raises(ValueError, match="does not have the expected shape"):
        default_mapper.decode(ref_pos, np.array([1.0, 2.0, 3.0]))
    with pytest.raises(ValueError, match="does not have the expected shape"):
        default_mapper.decode(ref_pos, np.array([[1.0], [2.0]]))


def test_build_roi_mapper():
    """Test build_roi_mapper creates a configured BBoxEncoder."""
    config = BBoxAnchorMapperConfig(
        anchor="top-right", time_scale=2.0, frequency_scale=20.0
    )
    mapper = build_roi_mapper(config)

    assert isinstance(mapper, AnchorBBoxMapper)
    assert mapper.anchor == config.anchor
    assert mapper.time_scale == config.time_scale
    assert mapper.frequency_scale == config.frequency_scale


def test_get_peak_energy_coordinates(generate_whistle):
    whistle_time = 0.5
    whistle_frequency = 40_000
    duration = 1.0
    samplerate = 256_000

    # Generate a WAV file with a whistle
    whistle_path = generate_whistle(
        time=whistle_time,
        frequency=whistle_frequency,
        duration=duration,
        samplerate=samplerate,
        whistle_duration=0.01,
    )

    # Create a recording object from the generated WAV
    recording = data.Recording.from_file(path=whistle_path)

    # Build a preprocessor (default config should be fine for this test)
    preprocessor = build_preprocessor()

    # Define a region of interest that contains the whistle
    start_time = 0.2
    end_time = 0.7
    low_freq = 20_000
    high_freq = 60_000

    # Get the peak energy coordinates
    peak_time, peak_freq = get_peak_energy_coordinates(
        recording=recording,
        preprocessor=preprocessor,
        start_time=start_time,
        end_time=end_time,
        low_freq=low_freq,
        high_freq=high_freq,
        loading_buffer=0.05,
    )

    # Assert that the peak coordinates are close to the expected values
    assert peak_time == pytest.approx(whistle_time, abs=0.01)
    assert peak_freq == pytest.approx(whistle_frequency, abs=1000)


def test_get_peak_energy_coordinates_with_two_whistles(generate_whistle):
    # Parameters for the first (stronger) whistle
    strong_whistle_time = 0.2
    strong_whistle_frequency = 30_000
    strong_whistle_amplitude = 1.0  # Full amplitude

    # Parameters for the second (weaker) whistle
    weak_whistle_time = 0.8
    weak_whistle_frequency = 50_000
    weak_whistle_amplitude = 0.1  # Weaker amplitude

    # Recording parameters
    duration = 1.0
    samplerate = 256_000

    # Generate WAV files for each whistle
    strong_whistle_path = generate_whistle(
        time=strong_whistle_time,
        frequency=strong_whistle_frequency,
        duration=duration,
        samplerate=samplerate,
        whistle_duration=0.01,
    )
    weak_whistle_path = generate_whistle(
        time=weak_whistle_time,
        frequency=weak_whistle_frequency,
        duration=duration,
        samplerate=samplerate,
        whistle_duration=0.01,
    )

    # Load audio data
    strong_audio, _ = sf.read(strong_whistle_path)
    weak_audio, _ = sf.read(weak_whistle_path)

    # Mix the audio files
    mixed_audio = (
        strong_audio * strong_whistle_amplitude
        + weak_audio * weak_whistle_amplitude
    )
    mixed_audio_path = strong_whistle_path.parent / "mixed_whistles.wav"
    sf.write(str(mixed_audio_path), mixed_audio, samplerate)

    # Create a recording object from the mixed WAV
    recording = data.Recording.from_file(path=mixed_audio_path)

    # Build a preprocessor
    preprocessor = build_preprocessor()

    # Define a region of interest that contains only the weaker whistle
    start_time = 0.7
    end_time = 0.9
    low_freq = 45_000
    high_freq = 55_000

    # Get the peak energy coordinates within the bounding box
    peak_time, peak_freq = get_peak_energy_coordinates(
        recording=recording,
        preprocessor=preprocessor,
        start_time=start_time,
        end_time=end_time,
        low_freq=low_freq,
        high_freq=high_freq,
        loading_buffer=0.05,
    )

    # Assert that the peak coordinates are close to the weaker whistle's values
    assert peak_time == pytest.approx(weak_whistle_time, abs=0.01)
    assert peak_freq == pytest.approx(weak_whistle_frequency, abs=1000)


def test_get_peak_energy_coordinates_silent_region(create_recording):
    # Parameters for a silent recording
    duration = 2.0  # seconds
    samplerate = 44_100  # Hz

    # Create a silent recording
    recording = create_recording(duration=duration, samplerate=samplerate)

    # Build a preprocessor
    preprocessor = build_preprocessor()

    # Define a region of interest within the silent recording
    start_time = 0.5
    end_time = 1.5
    low_freq = 10_000
    high_freq = 20_000

    # Get the peak energy coordinates from the silent region
    peak_time, peak_freq = get_peak_energy_coordinates(
        recording=recording,
        preprocessor=preprocessor,
        start_time=start_time,
        end_time=end_time,
        low_freq=low_freq,
        high_freq=high_freq,
        loading_buffer=0.05,
    )

    # Assert that the peak coordinates are within the defined ROI bounds
    assert start_time <= peak_time <= end_time
    assert low_freq <= peak_freq <= high_freq

    # Since there's no actual peak, the exact values might vary depending on
    # argmax behavior with all-zero or very low, uniform energy. We just need
    # to ensure they are within the search bounds.


def test_peak_energy_bbox_mapper_encode(generate_whistle):
    """
    Tests the 'happy path' for PeakEnergyBBoxMapper.encode.

    It verifies that the method correctly identifies a known peak within a
    bounding box and calculates the four scaled distances to the box edges.
    """
    # 1. SETUP
    samplerate = 256_000
    time_scale = 100.0
    freq_scale = 0.1

    bbox_start_time, bbox_low_freq = 1.0, 10000
    bbox_end_time, bbox_high_freq = 2.0, 30000
    bbox = data.BoundingBox(
        coordinates=[
            bbox_start_time,
            bbox_low_freq,
            bbox_end_time,
            bbox_high_freq,
        ]
    )

    # Define the known location of the peak energy inside the bbox
    peak_time, peak_freq = 1.6, 25000

    # Create a recording with a whistle at the defined peak location
    recording_path = generate_whistle(
        time=peak_time,
        frequency=peak_freq,
        duration=3.0,
        samplerate=samplerate,
    )
    recording = data.Recording.from_file(path=recording_path)
    sound_event = data.SoundEvent(geometry=bbox, recording=recording)

    # Instantiate the mapper with a preprocessor
    preprocessor = build_preprocessor(
        PreprocessingConfig.model_validate(
            {
                "spectrogram": {
                    "pcen": None,
                    "spectral_mean_substraction": False,
                }
            }
        )
    )
    mapper = PeakEnergyBBoxMapper(
        preprocessor=preprocessor,
        time_scale=time_scale,
        frequency_scale=freq_scale,
    )

    # Encode the sound event to get the position and size
    actual_pos, actual_size = mapper.encode(sound_event)

    # Then
    assert actual_pos[0] == pytest.approx(peak_time, abs=0.01)
    assert actual_pos[1] == pytest.approx(peak_freq, abs=1000)

    # Assert that the calculated scaled distances are correct
    identified_time, identified_freq = actual_pos
    expected_left = (identified_time - bbox_start_time) * time_scale
    expected_bottom = (identified_freq - bbox_low_freq) * freq_scale
    expected_right = (bbox_end_time - identified_time) * time_scale
    expected_top = (bbox_high_freq - identified_freq) * freq_scale
    expected_size = np.array(
        [expected_left, expected_bottom, expected_right, expected_top]
    )

    assert actual_size.shape == (4,)
    np.testing.assert_allclose(actual_size, expected_size, rtol=1e-5)


def test_peak_energy_bbox_mapper_decode():
    """
    Tests that PeakEnergyBBoxMapper.decode correctly reconstructs a BoundingBox.
    """
    # Given
    time_scale = 100.0
    freq_scale = 0.1

    # Define a known peak position and scaled distances.
    peak_position = (1.5, 15000)
    scaled_size = np.array([50.0, 500.0, 50.0, 500.0])

    mapper = PeakEnergyBBoxMapper(
        preprocessor=build_preprocessor(),
        time_scale=time_scale,
        frequency_scale=freq_scale,
    )

    # When
    reconstructed_bbox = mapper.decode(peak_position, scaled_size)

    # Then
    # Calculate the expected coordinates based on the decode logic.
    expected_start_time = peak_position[0] - scaled_size[0] / time_scale
    expected_low_freq = peak_position[1] - scaled_size[1] / freq_scale
    expected_end_time = peak_position[0] + scaled_size[2] / time_scale
    expected_high_freq = peak_position[1] + scaled_size[3] / freq_scale

    expected_coordinates = [
        expected_start_time,
        expected_low_freq,
        expected_end_time,
        expected_high_freq,
    ]

    assert isinstance(reconstructed_bbox, data.BoundingBox)
    np.testing.assert_allclose(
        reconstructed_bbox.coordinates, expected_coordinates
    )


def test_peak_energy_bbox_mapper_encode_decode_roundtrip(generate_whistle):
    """
    Tests that encoding and then decoding a SoundEvent with the
    PeakEnergyBBoxMapper results in the original BoundingBox.
    """
    # Given
    samplerate = 256_000

    # Define the original geometry and the peak location within it.
    original_bbox = data.BoundingBox(coordinates=[1.0, 10000, 2.0, 30000])
    peak_time, peak_freq = 1.6, 25000

    # Create the recording and sound event.
    recording_path = generate_whistle(
        time=peak_time,
        frequency=peak_freq,
        duration=3.0,
        samplerate=samplerate,
    )
    recording = data.Recording.from_file(path=recording_path)
    sound_event = data.SoundEvent(geometry=original_bbox, recording=recording)

    # Instantiate the mapper.
    preprocessor = build_preprocessor(
        PreprocessingConfig.model_validate(
            {
                "spectrogram": {
                    "pcen": None,
                    "spectral_mean_substraction": False,
                }
            }
        )
    )
    mapper = PeakEnergyBBoxMapper(preprocessor=preprocessor)

    # When
    # Encode the sound event, then immediately decode the result.
    position, size = mapper.encode(sound_event)
    reconstructed_bbox = mapper.decode(position, size)

    # Then
    # Verify the reconstructed bounding box is identical to the original.
    np.testing.assert_allclose(
        reconstructed_bbox.coordinates,
        original_bbox.coordinates,
        rtol=1e-5,
    )


def test_build_roi_mapper_for_anchor_bbox():
    # Given
    config = BBoxAnchorMapperConfig(
        anchor="center",
        time_scale=123.0,
        frequency_scale=456.0,
    )

    # When
    mapper = build_roi_mapper(config)

    # Then
    assert isinstance(mapper, AnchorBBoxMapper)
    assert mapper.anchor == "center"
    assert mapper.time_scale == 123.0
    assert mapper.frequency_scale == 456.0


def test_build_roi_mapper_for_peak_energy_bbox():
    # Given
    preproc_config = PreprocessingConfig.model_validate(
        {
            "spectrogram": {
                "pcen": None,
                "spectral_mean_substraction": True,
                "scale": "dB",
            }
        }
    )
    config = PeakEnergyBBoxMapperConfig(
        loading_buffer=0.99,
        time_scale=789.0,
        frequency_scale=123.0,
        preprocessing=preproc_config,
    )

    # When
    mapper = build_roi_mapper(config)

    # Then
    assert isinstance(mapper, PeakEnergyBBoxMapper)
    assert mapper.loading_buffer == 0.99
    assert mapper.time_scale == 789.0
    assert mapper.frequency_scale == 123.0


def test_build_roi_mapper_raises_error_for_unknown_name():
    """
    Tests that the factory raises a NotImplementedError when given a
    config with an unrecognized mapper name.
    """

    # Given
    class DummyConfig:
        name = "non_existent_mapper"

    # Then
    with pytest.raises(NotImplementedError) as excinfo:
        build_roi_mapper(DummyConfig())  # type: ignore

    # Check that the error message is informative.
    assert "No ROI mapper of name 'non_existent_mapper'" in str(excinfo.value)