Added documentation to the API module

2025-06-29 22:51:58 +02:00 · 2023-03-30 11:24:13 -06:00 · 2023-03-30 11:24:13 -06:00 · b0c3dbe403
commit b0c3dbe403
parent cb650c265b
1 changed files with 100 additions and 2 deletions
--- a/bat_detect/api.py
+++ b/bat_detect/api.py
@ -1,3 +1,101 @@
+"""Python API for bat_detect.
+
+This module provides a Python API for bat_detect. It can be used to
+process audio files or spectrograms with the default model or a custom
+model.
+
+Example
+-------
+You can use the default model to process audio files. To process a single
+file, use the `process_file` function.
+>>> import bat_detect.api as api
+>>> # Process audio file
+>>> results = api.process_file("audio_file.wav")
+
+To process multiple files, use the `list_audio_files` function to get a list
+of audio files in a directory. Then use the `process_file` function to
+process each file.
+
+>>> import bat_detect.api as api
+>>> # Get list of audio files
+>>> audio_files = api.list_audio_files("audio_directory")
+>>> # Process audio files
+>>> results = [api.process_file(f) for f in audio_files]
+
+The `process_file` function will slice the recording into 3 second chunks
+and process each chunk separately, in case the recording is longer. The
+results will be combined into a dictionary with the following keys:
+
+    - `pred_dict`: All the predictions from the model in the format
+    expected by the annotation tool.
+    - `cnn_feats`: Optional. A list of `numpy` arrays containing the CNN features
+    for each detection. The CNN features are the output of the CNN before
+    the final classification layer. You can use these features to train
+    your own classifier, or to do other processing on the detections.
+    They are in the same order as the detections in 
+    `results['pred_dict']['annotation']`. Will only be returned if the
+    `cnn_feats` parameter in the config is set to `True`.
+    - `spec_slices`: Optional. A list of `numpy` arrays containing the spectrogram
+    for each of the processed chunks. Will only be returned if the
+    `spec_slices` parameter in the config is set to `True`.
+
+Alternatively, you can use the `process_audio` function to process an audio
+array directly, or `process_spectrogram` to process spectrograms. This
+allows you to do other preprocessing steps before running the model for
+predictions.
+
+>>> import bat_detect.api as api
+>>> # Load audio
+>>> audio = api.load_audio("audio_file.wav")
+>>> # Process the audio array
+>>> detections, features, spec = api.process_audio(audio)
+>>> # Or compute and process the spectrogram
+>>> spec = api.generate_spectrogram(audio)
+>>> detections, features = api.process_spectrogram(spec)
+
+Here `detections` is the list of detected calls, `features` is the list of
+CNN features for each detection, and `spec` is the spectrogram of the
+processed audio. Each detection is a dictionary similary to the
+following:
+
+    {
+        'start_time': 0.0,
+        'end_time': 0.1,
+        'low_freq': 10000,
+        'high_freq': 20000,
+        'class': 'Myotis myotis',
+        'class_prob': 0.9,
+        'det_prob': 0.9,
+        'individual': 0,
+        'event': 'Echolocation'
+    }
+
+If you wish to interact directly with the model, you can use the `model`
+attribute to get the default model.
+
+>>> import bat_detect.api as api
+>>> # Get the default model
+>>> model = api.model
+>>> # Process the spectrogram
+>>> outputs = model(spec)
+
+However, you will need to do the postprocessing yourself. The
+model outputs are a collection of raw tensors. The `postprocess`
+function can be used to convert the model outputs into a list of
+detections and a list of CNN features.
+
+>>> import bat_detect.api as api
+>>> # Get the default model
+>>> model = api.model
+>>> # Process the spectrogram
+>>> outputs = model(spec)
+>>> # Postprocess the outputs
+>>> detections, features = api.postprocess(outputs)
+
+If you wish to use a custom model or change the default parameters, please
+consult the API documentation in the code.
+
+"""
 import warnings
 from typing import List, Optional, Tuple

@ -81,7 +179,7 @@ def load_audio(
        Target sample rate, by default 256000
    scale : bool, optional
        Scale audio to [-1, 1], by default False
-    max_duration : Optional[float], optional
+    max_duration : float, optional
        Maximum duration of audio in seconds, by default None

    Returns
@ -115,7 +213,7 @@ def generate_spectrogram(
        Sample rate. Defaults to 256000 which is the target sample rate of
        the default model. Only change if you loaded the audio with a
        different sample rate.
-    config : Optional[SpectrogramParameters], optional
+    config : SpectrogramParameters, optional
        Spectrogram parameters, by default None (uses default parameters).

    Returns