mirror of
https://github.com/macaodha/batdetect2.git
synced 2025-06-29 22:51:58 +02:00
404 lines
13 KiB
Python
404 lines
13 KiB
Python
import numpy as np
|
|
import pytest
|
|
import soundfile as sf
|
|
from soundevent import data
|
|
|
|
from batdetect2.preprocess import build_preprocessor
|
|
from batdetect2.targets.rois import (
|
|
DEFAULT_ANCHOR,
|
|
DEFAULT_FREQUENCY_SCALE,
|
|
DEFAULT_TIME_SCALE,
|
|
SIZE_HEIGHT,
|
|
SIZE_WIDTH,
|
|
AnchorBBoxMapper,
|
|
BBoxAnchorMapperConfig,
|
|
_build_bounding_box,
|
|
build_roi_mapper,
|
|
get_peak_energy_coordinates,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_bbox() -> data.BoundingBox:
|
|
"""A standard bounding box for testing."""
|
|
return data.BoundingBox(coordinates=[10.0, 100.0, 20.0, 200.0])
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_recording(create_recording) -> data.Recording:
|
|
return create_recording(duration=30, samplerate=4_000)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_sound_event(sample_bbox, sample_recording) -> data.SoundEvent:
|
|
return data.SoundEvent(geometry=sample_bbox, recording=sample_recording)
|
|
|
|
|
|
@pytest.fixture
|
|
def zero_bbox() -> data.BoundingBox:
|
|
"""A bounding box with zero duration and bandwidth."""
|
|
return data.BoundingBox(coordinates=[15.0, 150.0, 15.0, 150.0])
|
|
|
|
|
|
@pytest.fixture
|
|
def zero_sound_event(zero_bbox, sample_recording) -> data.SoundEvent:
|
|
"""A sample sound event with a zero-sized bounding box."""
|
|
return data.SoundEvent(geometry=zero_bbox, recording=sample_recording)
|
|
|
|
|
|
@pytest.fixture
|
|
def default_mapper() -> AnchorBBoxMapper:
|
|
"""A BBoxEncoder with default settings."""
|
|
return AnchorBBoxMapper()
|
|
|
|
|
|
@pytest.fixture
|
|
def custom_encoder() -> AnchorBBoxMapper:
|
|
"""A BBoxEncoder with custom settings."""
|
|
return AnchorBBoxMapper(
|
|
anchor="center", time_scale=1.0, frequency_scale=10.0
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def custom_mapper() -> AnchorBBoxMapper:
|
|
"""An AnchorBBoxMapper with custom settings."""
|
|
return AnchorBBoxMapper(
|
|
anchor="center", time_scale=1.0, frequency_scale=10.0
|
|
)
|
|
|
|
|
|
def test_bbox_encoder_init_defaults(default_mapper):
|
|
"""Test BBoxEncoder initialization with default arguments."""
|
|
assert default_mapper.anchor == DEFAULT_ANCHOR
|
|
assert default_mapper.time_scale == DEFAULT_TIME_SCALE
|
|
assert default_mapper.frequency_scale == DEFAULT_FREQUENCY_SCALE
|
|
assert default_mapper.dimension_names == [SIZE_WIDTH, SIZE_HEIGHT]
|
|
|
|
|
|
def test_bbox_encoder_init_custom(custom_encoder):
|
|
"""Test BBoxEncoder initialization with custom arguments."""
|
|
assert custom_encoder.anchor == "center"
|
|
assert custom_encoder.time_scale == 1.0
|
|
assert custom_encoder.frequency_scale == 10.0
|
|
assert custom_encoder.dimension_names == [SIZE_WIDTH, SIZE_HEIGHT]
|
|
|
|
|
|
POSITION_TEST_CASES = [
|
|
("bottom-left", (10.0, 100.0)),
|
|
("bottom-right", (20.0, 100.0)),
|
|
("top-left", (10.0, 200.0)),
|
|
("top-right", (20.0, 200.0)),
|
|
("center-left", (10.0, 150.0)),
|
|
("center-right", (20.0, 150.0)),
|
|
("top-center", (15.0, 200.0)),
|
|
("bottom-center", (15.0, 100.0)),
|
|
("center", (15.0, 150.0)),
|
|
("centroid", (15.0, 150.0)),
|
|
("point_on_surface", (15.0, 150.0)),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("anchor, expected_pos", POSITION_TEST_CASES)
|
|
def test_anchor_bbox_mapper_encode_position(
|
|
sample_sound_event, anchor, expected_pos
|
|
):
|
|
"""Test encode returns the correct position for various anchors."""
|
|
encoder = AnchorBBoxMapper(anchor=anchor)
|
|
actual_pos, _ = encoder.encode(sample_sound_event)
|
|
assert actual_pos == pytest.approx(expected_pos)
|
|
|
|
|
|
def test_anchor_bbox_mapper_encode_defaults(
|
|
sample_sound_event, default_mapper
|
|
):
|
|
"""Test encode with default settings returns correct position and size."""
|
|
expected_pos = (10.0, 100.0) # bottom-left
|
|
expected_size = np.array(
|
|
[
|
|
10.0 * DEFAULT_TIME_SCALE,
|
|
100.0 * DEFAULT_FREQUENCY_SCALE,
|
|
]
|
|
)
|
|
actual_pos, actual_size = default_mapper.encode(sample_sound_event)
|
|
assert actual_pos == pytest.approx(expected_pos)
|
|
np.testing.assert_allclose(actual_size, expected_size)
|
|
assert actual_size.shape == (2,)
|
|
|
|
|
|
def test_anchor_bbox_mapper_encode_custom(sample_sound_event, custom_mapper):
|
|
"""Test encode with custom settings returns correct position and size."""
|
|
expected_pos = (15.0, 150.0) # center
|
|
expected_size = np.array([10.0 * 1.0, 100.0 * 10.0])
|
|
|
|
actual_pos, actual_size = custom_mapper.encode(sample_sound_event)
|
|
assert actual_pos == pytest.approx(expected_pos)
|
|
np.testing.assert_allclose(actual_size, expected_size)
|
|
assert actual_size.shape == (2,)
|
|
|
|
|
|
def test_anchor_bbox_mapper_encode_zero_box(zero_sound_event, default_mapper):
|
|
"""Test encode for a zero-sized box."""
|
|
expected_pos = (15.0, 150.0)
|
|
expected_size = np.array([0.0, 0.0])
|
|
actual_pos, actual_size = default_mapper.encode(zero_sound_event)
|
|
assert actual_pos == pytest.approx(expected_pos)
|
|
np.testing.assert_allclose(actual_size, expected_size)
|
|
|
|
|
|
BUILD_BOX_TEST_CASES = [
|
|
("bottom-left", [50.0, 500.0, 60.0, 600.0]),
|
|
("bottom-right", [40.0, 500.0, 50.0, 600.0]),
|
|
("top-left", [50.0, 400.0, 60.0, 500.0]),
|
|
("top-right", [40.0, 400.0, 50.0, 500.0]),
|
|
("center-left", [50.0, 450.0, 60.0, 550.0]),
|
|
("center-right", [40.0, 450.0, 50.0, 550.0]),
|
|
("top-center", [45.0, 400.0, 55.0, 500.0]),
|
|
("bottom-center", [45.0, 500.0, 55.0, 600.0]),
|
|
("center", [45.0, 450.0, 55.0, 550.0]),
|
|
("centroid", [45.0, 450.0, 55.0, 550.0]),
|
|
("point_on_surface", [45.0, 450.0, 55.0, 550.0]),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"position_type, expected_coords", BUILD_BOX_TEST_CASES
|
|
)
|
|
def test_build_bounding_box(position_type, expected_coords):
|
|
"""Test _build_bounding_box for various position types."""
|
|
ref_pos = (50.0, 500.0)
|
|
duration = 10.0
|
|
bandwidth = 100.0
|
|
bbox = _build_bounding_box(
|
|
ref_pos, duration, bandwidth, anchor=position_type
|
|
)
|
|
assert isinstance(bbox, data.BoundingBox)
|
|
np.testing.assert_allclose(bbox.coordinates, expected_coords)
|
|
|
|
|
|
def test_build_bounding_box_invalid_anchor():
|
|
"""Test _build_bounding_box raises error for invalid position."""
|
|
with pytest.raises(ValueError, match="Invalid anchor"):
|
|
_build_bounding_box(
|
|
(0, 0),
|
|
1,
|
|
1,
|
|
anchor="invalid-spot", # type: ignore
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"anchor", [anchor for anchor, _ in POSITION_TEST_CASES]
|
|
)
|
|
def test_anchor_bbox_mapper_encode_decode_roundtrip(
|
|
sample_sound_event, sample_bbox, anchor
|
|
):
|
|
"""Test encode-decode roundtrip reconstructs the original bbox."""
|
|
mapper = AnchorBBoxMapper(anchor=anchor)
|
|
position, size = mapper.encode(sample_sound_event)
|
|
recovered_bbox = mapper.decode(position, size)
|
|
|
|
assert isinstance(recovered_bbox, data.BoundingBox)
|
|
np.testing.assert_allclose(
|
|
recovered_bbox.coordinates, sample_bbox.coordinates, atol=1e-6
|
|
)
|
|
|
|
|
|
def test_anchor_bbox_mapper_roundtrip_custom_scale(
|
|
sample_sound_event, sample_bbox, custom_mapper
|
|
):
|
|
"""Test encode-decode roundtrip with custom scaling factors."""
|
|
position, size = custom_mapper.encode(sample_sound_event)
|
|
recovered_bbox = custom_mapper.decode(position, size)
|
|
|
|
assert isinstance(recovered_bbox, data.BoundingBox)
|
|
np.testing.assert_allclose(
|
|
recovered_bbox.coordinates, sample_bbox.coordinates, atol=1e-6
|
|
)
|
|
|
|
|
|
def test_anchor_bbox_mapper_roundtrip_zero_box(
|
|
zero_sound_event, zero_bbox, default_mapper
|
|
):
|
|
"""Test encode-decode roundtrip for a zero-sized box."""
|
|
position, size = default_mapper.encode(zero_sound_event)
|
|
recovered_bbox = default_mapper.decode(position, size)
|
|
np.testing.assert_allclose(
|
|
recovered_bbox.coordinates, zero_bbox.coordinates, atol=1e-6
|
|
)
|
|
|
|
|
|
def test_anchor_bbox_mapper_decode_invalid_size_shape(default_mapper):
|
|
"""Test decode raises ValueError for incorrect size shape."""
|
|
ref_pos = (10, 100)
|
|
with pytest.raises(ValueError, match="does not have the expected shape"):
|
|
default_mapper.decode(ref_pos, np.array([1.0]))
|
|
with pytest.raises(ValueError, match="does not have the expected shape"):
|
|
default_mapper.decode(ref_pos, np.array([1.0, 2.0, 3.0]))
|
|
with pytest.raises(ValueError, match="does not have the expected shape"):
|
|
default_mapper.decode(ref_pos, np.array([[1.0], [2.0]]))
|
|
|
|
|
|
def test_build_roi_mapper():
|
|
"""Test build_roi_mapper creates a configured BBoxEncoder."""
|
|
config = BBoxAnchorMapperConfig(
|
|
anchor="top-right", time_scale=2.0, frequency_scale=20.0
|
|
)
|
|
mapper = build_roi_mapper(config)
|
|
|
|
assert isinstance(mapper, AnchorBBoxMapper)
|
|
assert mapper.anchor == config.anchor
|
|
assert mapper.time_scale == config.time_scale
|
|
assert mapper.frequency_scale == config.frequency_scale
|
|
|
|
|
|
def test_get_peak_energy_coordinates(generate_whistle):
|
|
whistle_time = 0.5
|
|
whistle_frequency = 40_000
|
|
duration = 1.0
|
|
samplerate = 256_000
|
|
|
|
# Generate a WAV file with a whistle
|
|
whistle_path = generate_whistle(
|
|
time=whistle_time,
|
|
frequency=whistle_frequency,
|
|
duration=duration,
|
|
samplerate=samplerate,
|
|
whistle_duration=0.01,
|
|
)
|
|
|
|
# Create a recording object from the generated WAV
|
|
recording = data.Recording.from_file(path=whistle_path)
|
|
|
|
# Build a preprocessor (default config should be fine for this test)
|
|
preprocessor = build_preprocessor()
|
|
|
|
# Define a region of interest that contains the whistle
|
|
start_time = 0.2
|
|
end_time = 0.7
|
|
low_freq = 20_000
|
|
high_freq = 60_000
|
|
|
|
# Get the peak energy coordinates
|
|
peak_time, peak_freq = get_peak_energy_coordinates(
|
|
recording=recording,
|
|
preprocessor=preprocessor,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
low_freq=low_freq,
|
|
high_freq=high_freq,
|
|
loading_buffer=0.05,
|
|
)
|
|
|
|
# Assert that the peak coordinates are close to the expected values
|
|
assert peak_time == pytest.approx(whistle_time, abs=0.01)
|
|
assert peak_freq == pytest.approx(whistle_frequency, abs=1000)
|
|
|
|
|
|
def test_get_peak_energy_coordinates_with_two_whistles(generate_whistle):
|
|
# Parameters for the first (stronger) whistle
|
|
strong_whistle_time = 0.2
|
|
strong_whistle_frequency = 30_000
|
|
strong_whistle_amplitude = 1.0 # Full amplitude
|
|
|
|
# Parameters for the second (weaker) whistle
|
|
weak_whistle_time = 0.8
|
|
weak_whistle_frequency = 50_000
|
|
weak_whistle_amplitude = 0.1 # Weaker amplitude
|
|
|
|
# Recording parameters
|
|
duration = 1.0
|
|
samplerate = 256_000
|
|
|
|
# Generate WAV files for each whistle
|
|
strong_whistle_path = generate_whistle(
|
|
time=strong_whistle_time,
|
|
frequency=strong_whistle_frequency,
|
|
duration=duration,
|
|
samplerate=samplerate,
|
|
whistle_duration=0.01,
|
|
)
|
|
weak_whistle_path = generate_whistle(
|
|
time=weak_whistle_time,
|
|
frequency=weak_whistle_frequency,
|
|
duration=duration,
|
|
samplerate=samplerate,
|
|
whistle_duration=0.01,
|
|
)
|
|
|
|
# Load audio data
|
|
strong_audio, _ = sf.read(strong_whistle_path)
|
|
weak_audio, _ = sf.read(weak_whistle_path)
|
|
|
|
# Mix the audio files
|
|
mixed_audio = (
|
|
strong_audio * strong_whistle_amplitude
|
|
+ weak_audio * weak_whistle_amplitude
|
|
)
|
|
mixed_audio_path = strong_whistle_path.parent / "mixed_whistles.wav"
|
|
sf.write(str(mixed_audio_path), mixed_audio, samplerate)
|
|
|
|
# Create a recording object from the mixed WAV
|
|
recording = data.Recording.from_file(path=mixed_audio_path)
|
|
|
|
# Build a preprocessor
|
|
preprocessor = build_preprocessor()
|
|
|
|
# Define a region of interest that contains only the weaker whistle
|
|
start_time = 0.7
|
|
end_time = 0.9
|
|
low_freq = 45_000
|
|
high_freq = 55_000
|
|
|
|
# Get the peak energy coordinates within the bounding box
|
|
peak_time, peak_freq = get_peak_energy_coordinates(
|
|
recording=recording,
|
|
preprocessor=preprocessor,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
low_freq=low_freq,
|
|
high_freq=high_freq,
|
|
loading_buffer=0.05,
|
|
)
|
|
|
|
# Assert that the peak coordinates are close to the weaker whistle's values
|
|
assert peak_time == pytest.approx(weak_whistle_time, abs=0.01)
|
|
assert peak_freq == pytest.approx(weak_whistle_frequency, abs=1000)
|
|
|
|
|
|
def test_get_peak_energy_coordinates_silent_region(create_recording):
|
|
# Parameters for a silent recording
|
|
duration = 2.0 # seconds
|
|
samplerate = 44_100 # Hz
|
|
|
|
# Create a silent recording
|
|
recording = create_recording(duration=duration, samplerate=samplerate)
|
|
|
|
# Build a preprocessor
|
|
preprocessor = build_preprocessor()
|
|
|
|
# Define a region of interest within the silent recording
|
|
start_time = 0.5
|
|
end_time = 1.5
|
|
low_freq = 10_000
|
|
high_freq = 20_000
|
|
|
|
# Get the peak energy coordinates from the silent region
|
|
peak_time, peak_freq = get_peak_energy_coordinates(
|
|
recording=recording,
|
|
preprocessor=preprocessor,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
low_freq=low_freq,
|
|
high_freq=high_freq,
|
|
loading_buffer=0.05,
|
|
)
|
|
|
|
# Assert that the peak coordinates are within the defined ROI bounds
|
|
assert start_time <= peak_time <= end_time
|
|
assert low_freq <= peak_freq <= high_freq
|
|
|
|
# Since there's no actual peak, the exact values might vary depending on
|
|
# argmax behavior with all-zero or very low, uniform energy. We just need
|
|
# to ensure they are within the search bounds.
|