"""Manages the vocabulary (Terms and Tags) for defining training targets. This module provides the necessary tools to declare, register, and manage the set of `soundevent.data.Term` objects used throughout the `batdetect2.targets` sub-package. It establishes a consistent vocabulary for filtering, transforming, and classifying sound events based on their annotations (Tags). The core component is the `TermRegistry`, which maps unique string keys (aliases) to specific `Term` definitions. This allows users to refer to complex terms using simple, consistent keys in configuration files and code. Terms can be pre-defined, loaded from the `soundevent.terms` library, defined programmatically, or loaded from external configuration files (e.g., YAML). """ from collections.abc import Mapping from inspect import getmembers from typing import Dict, List, Optional from pydantic import BaseModel, Field from soundevent import data, terms from batdetect2.configs import load_config __all__ = [ "call_type", "individual", "data_source", "get_tag_from_info", "TermInfo", "TagInfo", ] # The default key used to reference the 'generic_class' term. # Often used implicitly when defining classification targets. GENERIC_CLASS_KEY = "class" data_source = data.Term( name="soundevent:data_source", label="Data Source", definition=( "A unique identifier for the source of the data, typically " "representing the project, site, or deployment context." ), ) call_type = data.Term( name="soundevent:call_type", label="Call Type", definition=( "A broad categorization of animal vocalizations based on their " "intended function or purpose (e.g., social, distress, mating, " "territorial, echolocation)." ), ) """Term representing the broad functional category of a vocalization.""" individual = data.Term( name="soundevent:individual", label="Individual", definition=( "An id for an individual animal. In the context of bioacoustic " "annotation, this term is used to label vocalizations that are " "attributed to a specific individual." ), ) """Term used for tags identifying a specific individual animal.""" generic_class = data.Term( name="soundevent:class", label="Class", definition=( "A generic term representing the name of a class within a " "classification model. Its specific meaning is determined by " "the model's application." ), ) """Generic term representing a classification model's output class label.""" class TermRegistry(Mapping[str, data.Term]): """Manages a registry mapping unique keys to Term definitions. This class acts as the central repository for the vocabulary of terms used within the target definition process. It allows registering terms with simple string keys and retrieving them consistently. """ def __init__(self, terms: Optional[Dict[str, data.Term]] = None): """Initializes the TermRegistry. Parameters ---------- terms : dict[str, soundevent.data.Term], optional An optional dictionary of initial key-to-Term mappings to populate the registry with. Defaults to an empty registry. """ self._terms: Dict[str, data.Term] = terms or {} def __getitem__(self, key: str) -> data.Term: return self._terms[key] def __len__(self) -> int: return len(self._terms) def __iter__(self): return iter(self._terms) def add_term(self, key: str, term: data.Term) -> None: """Adds a Term object to the registry with the specified key. Parameters ---------- key : str The unique string key to associate with the term. term : soundevent.data.Term The soundevent.data.Term object to register. Raises ------ KeyError If a term with the provided key already exists in the registry. """ if key in self._terms: raise KeyError("A term with the provided key already exists.") self._terms[key] = term def get_term(self, key: str) -> data.Term: """Retrieves a registered term by its unique key. Parameters ---------- key : str The unique string key of the term to retrieve. Returns ------- soundevent.data.Term The corresponding soundevent.data.Term object. Raises ------ KeyError If no term with the specified key is found, with a helpful message suggesting listing available keys. """ try: return self._terms[key] except KeyError as err: raise KeyError( "No term found for key " f"'{key}'. Ensure it is registered or loaded. " f"Available keys: {', '.join(self.get_keys())}" ) from err def add_custom_term( self, key: str, name: Optional[str] = None, uri: Optional[str] = None, label: Optional[str] = None, definition: Optional[str] = None, ) -> data.Term: """Creates a new Term from attributes and adds it to the registry. This is useful for defining terms directly in code or when loading from configuration files where only attributes are provided. If optional fields (`name`, `label`, `definition`) are not provided, reasonable defaults are used (`key` for name/label, "Unknown" for definition). Parameters ---------- key : str The unique string key for the new term. name : str, optional The name for the new term (defaults to `key`). uri : str, optional The URI for the new term (optional). label : str, optional The display label for the new term (defaults to `key`). definition : str, optional The definition for the new term (defaults to "Unknown"). Returns ------- soundevent.data.Term The newly created and registered soundevent.data.Term object. Raises ------ KeyError If a term with the provided key already exists. """ term = data.Term( name=name or key, label=label or key, uri=uri, definition=definition or "Unknown", ) self.add_term(key, term) return term def get_keys(self) -> List[str]: """Returns a list of all keys currently registered. Returns ------- list[str] A list of strings representing the keys of all registered terms. """ return list(self._terms.keys()) def get_terms(self) -> List[data.Term]: """Returns a list of all registered terms. Returns ------- list[soundevent.data.Term] A list containing all registered Term objects. """ return list(self._terms.values()) def remove_key(self, key: str) -> None: del self._terms[key] term_registry = TermRegistry( terms=dict( [ *getmembers(terms, lambda x: isinstance(x, data.Term)), ("event", call_type), ("individual", individual), ("data_source", data_source), (GENERIC_CLASS_KEY, generic_class), ] ) ) """The default, globally accessible TermRegistry instance. It is pre-populated with standard terms from `soundevent.terms` and common terms defined in this module (`call_type`, `individual`, `generic_class`). Functions in this module use this registry by default unless another instance is explicitly passed. """ def get_term_from_key( key: str, term_registry: TermRegistry = term_registry, ) -> data.Term: """Convenience function to retrieve a term by key from a registry. Uses the global default registry unless a specific `term_registry` instance is provided. Parameters ---------- key : str The unique key of the term to retrieve. term_registry : TermRegistry, optional The TermRegistry instance to search in. Defaults to the global `registry`. Returns ------- soundevent.data.Term The corresponding soundevent.data.Term object. Raises ------ KeyError If the key is not found in the specified registry. """ return term_registry.get_term(key) def get_term_keys(term_registry: TermRegistry = term_registry) -> List[str]: """Convenience function to get all registered keys from a registry. Uses the global default registry unless a specific `term_registry` instance is provided. Parameters ---------- term_registry : TermRegistry, optional The TermRegistry instance to query. Defaults to the global `registry`. Returns ------- list[str] A list of strings representing the keys of all registered terms. """ return term_registry.get_keys() def get_terms(term_registry: TermRegistry = term_registry) -> List[data.Term]: """Convenience function to get all registered terms from a registry. Uses the global default registry unless a specific `term_registry` instance is provided. Parameters ---------- term_registry : TermRegistry, optional The TermRegistry instance to query. Defaults to the global `registry`. Returns ------- list[soundevent.data.Term] A list containing all registered Term objects. """ return term_registry.get_terms() class TagInfo(BaseModel): """Represents information needed to define a specific Tag. This model is typically used in configuration files (e.g., YAML) to specify tags used for filtering, target class definition, or associating tags with output classes. It links a tag value to a term definition via the term's registry key. Attributes ---------- value : str The value of the tag (e.g., "Myotis myotis", "Echolocation"). key : str, default="class" The key (alias) of the term associated with this tag, as registered in the TermRegistry. Defaults to "class", implying it represents a classification target label by default. """ value: str key: str = GENERIC_CLASS_KEY def get_tag_from_info( tag_info: TagInfo, term_registry: TermRegistry = term_registry, ) -> data.Tag: """Creates a soundevent.data.Tag object from TagInfo data. Looks up the term using the key in the provided `tag_info` from the specified registry and constructs a Tag object. Parameters ---------- tag_info : TagInfo The TagInfo object containing the value and term key. term_registry : TermRegistry, optional The TermRegistry instance to use for term lookup. Defaults to the global `registry`. Returns ------- soundevent.data.Tag A soundevent.data.Tag object corresponding to the input info. Raises ------ KeyError If the term key specified in `tag_info.key` is not found in the registry. """ term = get_term_from_key(tag_info.key, term_registry=term_registry) return data.Tag(term=term, value=tag_info.value) class TermInfo(BaseModel): """Represents the definition of a Term within a configuration file. This model allows users to define custom terms directly in configuration files (e.g., YAML) which can then be loaded into the TermRegistry. It mirrors the parameters of `TermRegistry.add_custom_term`. Attributes ---------- key : str The unique key (alias) that will be used to register and reference this term. label : str, optional The optional display label for the term. Defaults to `key` if not provided during registration. name : str, optional The optional formal name for the term. Defaults to `key` if not provided during registration. uri : str, optional The optional URI identifying the term (e.g., from a standard vocabulary). definition : str, optional The optional textual definition of the term. Defaults to "Unknown" if not provided during registration. """ key: str label: Optional[str] = None name: Optional[str] = None uri: Optional[str] = None definition: Optional[str] = None class TermConfig(BaseModel): """Pydantic schema for loading a list of term definitions from config. This model typically corresponds to a section in a configuration file (e.g., YAML) containing a list of term definitions to be registered. Attributes ---------- terms : list[TermInfo] A list of TermInfo objects, each defining a term to be registered. Defaults to an empty list. Examples -------- Example YAML structure: ```yaml terms: - key: species uri: dwc:scientificName label: Scientific Name - key: my_custom_term name: My Custom Term definition: Describes a specific project attribute. # ... more TermInfo definitions ``` """ terms: List[TermInfo] = Field(default_factory=list) def load_terms_from_config( path: data.PathLike, field: Optional[str] = None, term_registry: TermRegistry = term_registry, ) -> Dict[str, data.Term]: """Loads term definitions from a configuration file and registers them. Parses a configuration file (e.g., YAML) using the TermConfig schema, extracts the list of TermInfo definitions, and adds each one as a custom term to the specified TermRegistry instance. Parameters ---------- path : data.PathLike The path to the configuration file. field : str, optional Optional key indicating a specific section within the config file where the 'terms' list is located. If None, expects the list directly at the top level or within a structure matching TermConfig schema. term_registry : TermRegistry, optional The TermRegistry instance to add the loaded terms to. Defaults to the global `registry`. Returns ------- dict[str, soundevent.data.Term] A dictionary mapping the keys of the newly added terms to their corresponding Term objects. Raises ------ FileNotFoundError If the config file path does not exist. pydantic.ValidationError If the config file structure does not match the TermConfig schema. KeyError If a term key loaded from the config conflicts with a key already present in the registry. """ data = load_config(path, schema=TermConfig, field=field) return { info.key: term_registry.add_custom_term( info.key, name=info.name, uri=info.uri, label=info.label, definition=info.definition, ) for info in data.terms } def register_term( key: str, term: data.Term, registry: TermRegistry = term_registry ) -> None: registry.add_term(key, term)