From ff00da9a9aaa9c67354fac6227aa3114dcd91d77 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Thu, 3 Apr 2025 16:48:50 +0100 Subject: [PATCH] Removed old data module --- batdetect2/compat/data.py | 86 +------------------------- batdetect2/compat/params.py | 2 +- batdetect2/data.py | 119 ------------------------------------ 3 files changed, 3 insertions(+), 204 deletions(-) delete mode 100644 batdetect2/data.py diff --git a/batdetect2/compat/data.py b/batdetect2/compat/data.py index b05179f..141b7a3 100644 --- a/batdetect2/compat/data.py +++ b/batdetect2/compat/data.py @@ -1,6 +1,5 @@ """Compatibility functions between old and new data structures.""" -import json import os import uuid from pathlib import Path @@ -18,7 +17,8 @@ PathLike = Union[Path, str, os.PathLike] __all__ = [ "convert_to_annotation_group", - "load_annotation_project_from_dir", + "load_file_annotation", + "annotation_to_sound_event", ] SPECIES_TAG_KEY = "species" @@ -304,85 +304,3 @@ def list_file_annotations(path: PathLike) -> List[Path]: """List all annotations in a directory.""" path = Path(path) return [file for file in path.glob("*.json")] - - -def load_annotation_project_from_file( - path: PathLike, - name: Optional[str] = None, - audio_dir: Optional[PathLike] = None, -) -> data.AnnotationProject: - old_annotations = json.loads(Path(path).read_text()) - - annotations = [] - tasks = [] - - for ann in old_annotations: - try: - ann = FileAnnotation.model_validate(ann) - except ValueError: - continue - - try: - clip = file_annotation_to_clip(ann, audio_dir=audio_dir) - except FileNotFoundError: - continue - - annotations.append(file_annotation_to_clip_annotation(ann, clip)) - tasks.append(file_annotation_to_annotation_task(ann, clip)) - - return data.AnnotationProject( - name=name or str(path), - clip_annotations=annotations, - tasks=tasks, - ) - - -def load_annotation_project_from_dir( - path: PathLike, - name: Optional[str] = None, - audio_dir: Optional[PathLike] = None, -) -> data.AnnotationProject: - """Convert annotations to annotation project.""" - audio_dir = audio_dir or Path.cwd() - - paths = list_file_annotations(path) - - if name is None: - name = str(path) - - annotations = [] - tasks = [] - - for p in paths: - try: - file_annotation = load_file_annotation(p) - except FileNotFoundError: - continue - - try: - clip = file_annotation_to_clip( - file_annotation, - audio_dir=audio_dir, - ) - except FileNotFoundError: - continue - - annotations.append( - file_annotation_to_clip_annotation( - file_annotation, - clip, - ) - ) - - tasks.append( - file_annotation_to_annotation_task( - file_annotation, - clip, - ) - ) - - return data.AnnotationProject( - name=name, - clip_annotations=annotations, - tasks=tasks, - ) diff --git a/batdetect2/compat/params.py b/batdetect2/compat/params.py index 0910d09..d19710f 100644 --- a/batdetect2/compat/params.py +++ b/batdetect2/compat/params.py @@ -1,7 +1,6 @@ from batdetect2.preprocess import ( AmplitudeScaleConfig, AudioConfig, - STFTConfig, FrequencyConfig, LogScaleConfig, PcenScaleConfig, @@ -10,6 +9,7 @@ from batdetect2.preprocess import ( Scales, SpecSizeConfig, SpectrogramConfig, + STFTConfig, ) from batdetect2.preprocess.spectrogram import get_spectrogram_resolution from batdetect2.terms import TagInfo diff --git a/batdetect2/data.py b/batdetect2/data.py deleted file mode 100644 index 8d63602..0000000 --- a/batdetect2/data.py +++ /dev/null @@ -1,119 +0,0 @@ -from pathlib import Path -from typing import List, Literal, Optional, Tuple, Union - -from pydantic import Field -from soundevent import data, io - -from batdetect2.compat.data import ( - load_annotation_project_from_dir, - load_annotation_project_from_file, -) -from batdetect2.configs import BaseConfig, load_config - -__all__ = [ - "load_datasets_from_config", -] - - -class BatDetect2AnnotationFiles(BaseConfig): - format: Literal["batdetect2"] = "batdetect2" - path: Path - - -class BatDetect2AnnotationFile(BaseConfig): - format: Literal["batdetect2_file"] = "batdetect2_file" - path: Path - - -class AOEFAnnotationFile(BaseConfig): - format: Literal["aoef"] = "aoef" - path: Path - - -AnnotationFormats = Union[ - BatDetect2AnnotationFiles, - BatDetect2AnnotationFile, - AOEFAnnotationFile, -] - - -class DatasetInfo(BaseConfig): - name: str - audio_dir: Path - annotations: AnnotationFormats = Field(discriminator="format") - - -class DatasetsConfig(BaseConfig): - train: List[DatasetInfo] = Field(default_factory=list) - test: List[DatasetInfo] = Field(default_factory=list) - - -def load_dataset( - info: DatasetInfo, - audio_dir: Optional[Path] = None, - base_dir: Optional[Path] = None, -) -> data.AnnotationProject: - audio_dir = ( - info.audio_dir if base_dir is None else base_dir / info.audio_dir - ) - - path = ( - info.annotations.path - if base_dir is None - else base_dir / info.annotations.path - ) - - if info.annotations.format == "batdetect2": - return load_annotation_project_from_dir( - path, - name=info.name, - audio_dir=audio_dir, - ) - - if info.annotations.format == "batdetect2_file": - return load_annotation_project_from_file( - path, - name=info.name, - audio_dir=audio_dir, - ) - - if info.annotations.format == "aoef": - return io.load( # type: ignore - info.annotations.path, - audio_dir=audio_dir, - ) - - raise NotImplementedError( - f"Unknown annotation format: {info.annotations.name}" - ) - - -def load_datasets( - config: DatasetsConfig, - base_dir: Optional[Path] = None, -) -> Tuple[List[data.ClipAnnotation], List[data.ClipAnnotation]]: - test_annotations = [] - train_annotations = [] - - for dataset in config.train: - project = load_dataset(dataset, base_dir=base_dir) - train_annotations.extend(project.clip_annotations) - - for dataset in config.test: - project = load_dataset(dataset, base_dir=base_dir) - test_annotations.extend(project.clip_annotations) - - return train_annotations, test_annotations - - -def load_datasets_from_config( - path: data.PathLike, - field: Optional[str] = None, - base_dir: Optional[Path] = None, -): - config = load_config( - path=path, - schema=DatasetsConfig, - field=field, - ) - return load_datasets(config, base_dir=base_dir)