Checked that returned features is a 2d numpy array

2026-01-10 17:19:34 +01:00 · 2023-04-07 14:46:38 -06:00 · 2023-04-07 14:46:38 -06:00 · 0a22f1798e
commit 0a22f1798e
parent 29074c689e
3 changed files with 57 additions and 30 deletions
--- a/batdetect2/api.py
+++ b/batdetect2/api.py
@ -115,6 +115,7 @@ from batdetect2.types import (
    DetectionModel,
    ModelOutput,
    ProcessingConfiguration,
    RunResults,
    SpectrogramParameters,
 )
 from batdetect2.utils.detector_utils import list_audio_files, load_model
@ -134,6 +135,7 @@ __all__ = [
    "process_audio",
    "process_file",
    "process_spectrogram",
    "print_summary",
 ]
@ -150,11 +152,11 @@ def get_config(**kwargs) -> ProcessingConfiguration:
    Can be used to override default parameters by passing keyword arguments.
    """
-    return {**DEFAULT_PROCESSING_CONFIGURATIONS, **kwargs}  # type: ignore
+    return {**DEFAULT_PROCESSING_CONFIGURATIONS, **PARAMS, **kwargs}  # type: ignore
 # Default processing configuration
-CONFIG = get_config(**PARAMS)
+CONFIG = get_config()
 def load_audio(
@ -270,7 +272,7 @@ def process_spectrogram(
    samp_rate: int = TARGET_SAMPLERATE_HZ,
    model: DetectionModel = MODEL,
    config: Optional[ProcessingConfiguration] = None,
-) -> Tuple[List[Annotation], List[np.ndarray]]:
+) -> Tuple[List[Annotation], np.ndarray]:
    """Process spectrogram with model.
    Parameters
@ -289,7 +291,11 @@ def process_spectrogram(
    Returns
    -------
-    DetectionResult
+    detections : List[Annotation]
        List of detections.
    features: np.ndarray
        An array of features. The array has shape (n_detections, n_features)
        where each row is a feature vector for a detection.
    """
    if config is None:
        config = CONFIG
@ -308,7 +314,7 @@ def process_audio(
    model: DetectionModel = MODEL,
    config: Optional[ProcessingConfiguration] = None,
    device: torch.device = DEVICE,
-) -> Tuple[List[Annotation], List[np.ndarray], torch.Tensor]:
+) -> Tuple[List[Annotation], np.ndarray, torch.Tensor]:
    """Process audio array with model.
    Parameters
@ -329,10 +335,9 @@ def process_audio(
    -------
    annotations : List[Annotation]
        List of predicted annotations.
-
+    features: np.ndarray
-    features: List[np.ndarray]
+        An array of features. The array has shape (n_detections, n_features)
-        List of extracted features for each annotation.
+        where each row is a feature vector for a detection.
    spec : torch.Tensor
        Spectrogram of the audio used for prediction.
    """
@ -395,3 +400,26 @@ model: DetectionModel = MODEL
 config: ProcessingConfiguration = CONFIG
 """Default processing configuration."""
 def print_summary(results: RunResults) -> None:
    """Print summary of results.
    Parameters
    ----------
    results : DetectionResult
        Detection result.
    """
    print("Results for " + results["pred_dict"]["id"])
    print("{} calls detected\n".format(len(results["pred_dict"]["annotation"])))
    print("time\tprob\tlfreq\tspecies_name")
    for ann in results["pred_dict"]["annotation"]:
        print(
            "{}\t{}\t{}\t{}".format(
                ann["start_time"],
                ann["class_prob"],
                ann["low_freq"],
                ann["class"],
            )
        )
--- a/batdetect2/utils/detector_utils.py
+++ b/batdetect2/utils/detector_utils.py
@ -295,7 +295,6 @@ def save_results_to_file(results, op_path: str) -> None:
        op_path (str): Output path.
    """
    # make directory if it does not exist
    if not os.path.isdir(os.path.dirname(op_path)):
        os.makedirs(os.path.dirname(op_path))
@ -474,7 +473,7 @@ def _process_spectrogram(
    samplerate: int,
    model: DetectionModel,
    config: ProcessingConfiguration,
-) -> Tuple[PredictionResults, List[np.ndarray]]:
+) -> Tuple[PredictionResults, np.ndarray]:
    # evaluate model
    with torch.no_grad():
        outputs = model(spec)
@ -504,7 +503,7 @@ def _process_spectrogram(
    ):
        pred_nms["class_probs"] = class_probs[:-1, :]
-    return pred_nms, features
+    return pred_nms, np.concatenate(features, axis=0)
 def postprocess_model_outputs(
@ -550,7 +549,7 @@ def process_spectrogram(
    samplerate: int,
    model: DetectionModel,
    config: ProcessingConfiguration,
-) -> Tuple[List[Annotation], List[np.ndarray]]:
+) -> Tuple[List[Annotation], np.ndarray]:
    """Process a spectrogram with detection model.
    Will run non-maximum suppression on the output of the model.
@ -569,10 +568,11 @@ def process_spectrogram(
    Returns
    -------
-    annotations : List[Annotation]
+    detections: List[Annotation]
-        List of annotations predicted by the model.
+        List of detections predicted by the model.
-    features : List[np.ndarray]
+    features : np.ndarray
-        List of CNN features associated with each annotation.
+        An array of CNN features associated with each annotation.
        The array is of shape (num_detections, num_features).
        Is empty if `config["cnn_features"]` is False.
    """
    pred_nms, features = _process_spectrogram(
@ -582,12 +582,12 @@ def process_spectrogram(
        config,
    )
-    annotations = get_annotations_from_preds(
+    detections = get_annotations_from_preds(
        pred_nms,
        config["class_names"],
    )
-    return annotations, features
+    return detections, features
 def _process_audio_array(
@ -596,7 +596,7 @@ def _process_audio_array(
    model: DetectionModel,
    config: ProcessingConfiguration,
    device: torch.device,
-) -> Tuple[PredictionResults, List[np.ndarray], torch.Tensor]:
+) -> Tuple[PredictionResults, np.ndarray, torch.Tensor]:
    # load audio file and compute spectrogram
    _, spec, _ = compute_spectrogram(
        audio,
@ -634,7 +634,7 @@ def process_audio_array(
    model: DetectionModel,
    config: ProcessingConfiguration,
    device: torch.device,
-) -> Tuple[List[Annotation], List[np.ndarray], torch.Tensor]:
+) -> Tuple[List[Annotation], np.ndarray, torch.Tensor]:
    """Process a single audio array with detection model.
    Parameters
@ -656,10 +656,9 @@ def process_audio_array(
    -------
    annotations : List[Annotation]
        List of annotations predicted by the model.
-
+    features : np.ndarray
-    features : List[np.ndarray]
+        Array of CNN features associated with each annotation.
-        List of CNN features associated with each annotation.
+        The array is of shape (num_detections, num_features).
    spec : torch.Tensor
        Spectrogram of the audio used as input.
--- a/tests/test_api.py
+++ b/tests/test_api.py
@ -81,7 +81,7 @@ def test_get_default_config():
    assert config["denoise_spec_avg"] is True
    assert config["max_scale_spec"] is False
    assert config["scale_raw_audio"] is False
-    assert len(config["class_names"]) == 0
+    assert len(config["class_names"]) == 17
    assert config["detection_threshold"] == 0.01
    assert config["time_expansion"] == 1
    assert config["top_n"] == 3
@ -193,8 +193,8 @@ def test_process_spectrogram_with_default_model():
    assert "high_freq" in sample_pred
    assert features is not None
-    assert isinstance(features, list)
+    assert isinstance(features, np.ndarray)
-    assert len(features) == 1
+    assert len(features) == len(predictions)
 def test_process_audio_with_default_model():
@ -216,8 +216,8 @@ def test_process_audio_with_default_model():
    assert "high_freq" in sample_pred
    assert features is not None
-    assert isinstance(features, list)
+    assert isinstance(features, np.ndarray)
-    assert len(features) == 1
+    assert len(features) == len(predictions)
    assert spec is not None
    assert isinstance(spec, torch.Tensor)