Removed old data module

This commit is contained in:
mbsantiago 2025-04-03 16:48:50 +01:00
parent 22cf47ed39
commit ff00da9a9a
3 changed files with 3 additions and 204 deletions

View File

@ -1,6 +1,5 @@
"""Compatibility functions between old and new data structures.""" """Compatibility functions between old and new data structures."""
import json
import os import os
import uuid import uuid
from pathlib import Path from pathlib import Path
@ -18,7 +17,8 @@ PathLike = Union[Path, str, os.PathLike]
__all__ = [ __all__ = [
"convert_to_annotation_group", "convert_to_annotation_group",
"load_annotation_project_from_dir", "load_file_annotation",
"annotation_to_sound_event",
] ]
SPECIES_TAG_KEY = "species" SPECIES_TAG_KEY = "species"
@ -304,85 +304,3 @@ def list_file_annotations(path: PathLike) -> List[Path]:
"""List all annotations in a directory.""" """List all annotations in a directory."""
path = Path(path) path = Path(path)
return [file for file in path.glob("*.json")] return [file for file in path.glob("*.json")]
def load_annotation_project_from_file(
path: PathLike,
name: Optional[str] = None,
audio_dir: Optional[PathLike] = None,
) -> data.AnnotationProject:
old_annotations = json.loads(Path(path).read_text())
annotations = []
tasks = []
for ann in old_annotations:
try:
ann = FileAnnotation.model_validate(ann)
except ValueError:
continue
try:
clip = file_annotation_to_clip(ann, audio_dir=audio_dir)
except FileNotFoundError:
continue
annotations.append(file_annotation_to_clip_annotation(ann, clip))
tasks.append(file_annotation_to_annotation_task(ann, clip))
return data.AnnotationProject(
name=name or str(path),
clip_annotations=annotations,
tasks=tasks,
)
def load_annotation_project_from_dir(
path: PathLike,
name: Optional[str] = None,
audio_dir: Optional[PathLike] = None,
) -> data.AnnotationProject:
"""Convert annotations to annotation project."""
audio_dir = audio_dir or Path.cwd()
paths = list_file_annotations(path)
if name is None:
name = str(path)
annotations = []
tasks = []
for p in paths:
try:
file_annotation = load_file_annotation(p)
except FileNotFoundError:
continue
try:
clip = file_annotation_to_clip(
file_annotation,
audio_dir=audio_dir,
)
except FileNotFoundError:
continue
annotations.append(
file_annotation_to_clip_annotation(
file_annotation,
clip,
)
)
tasks.append(
file_annotation_to_annotation_task(
file_annotation,
clip,
)
)
return data.AnnotationProject(
name=name,
clip_annotations=annotations,
tasks=tasks,
)

View File

@ -1,7 +1,6 @@
from batdetect2.preprocess import ( from batdetect2.preprocess import (
AmplitudeScaleConfig, AmplitudeScaleConfig,
AudioConfig, AudioConfig,
STFTConfig,
FrequencyConfig, FrequencyConfig,
LogScaleConfig, LogScaleConfig,
PcenScaleConfig, PcenScaleConfig,
@ -10,6 +9,7 @@ from batdetect2.preprocess import (
Scales, Scales,
SpecSizeConfig, SpecSizeConfig,
SpectrogramConfig, SpectrogramConfig,
STFTConfig,
) )
from batdetect2.preprocess.spectrogram import get_spectrogram_resolution from batdetect2.preprocess.spectrogram import get_spectrogram_resolution
from batdetect2.terms import TagInfo from batdetect2.terms import TagInfo

View File

@ -1,119 +0,0 @@
from pathlib import Path
from typing import List, Literal, Optional, Tuple, Union
from pydantic import Field
from soundevent import data, io
from batdetect2.compat.data import (
load_annotation_project_from_dir,
load_annotation_project_from_file,
)
from batdetect2.configs import BaseConfig, load_config
__all__ = [
"load_datasets_from_config",
]
class BatDetect2AnnotationFiles(BaseConfig):
format: Literal["batdetect2"] = "batdetect2"
path: Path
class BatDetect2AnnotationFile(BaseConfig):
format: Literal["batdetect2_file"] = "batdetect2_file"
path: Path
class AOEFAnnotationFile(BaseConfig):
format: Literal["aoef"] = "aoef"
path: Path
AnnotationFormats = Union[
BatDetect2AnnotationFiles,
BatDetect2AnnotationFile,
AOEFAnnotationFile,
]
class DatasetInfo(BaseConfig):
name: str
audio_dir: Path
annotations: AnnotationFormats = Field(discriminator="format")
class DatasetsConfig(BaseConfig):
train: List[DatasetInfo] = Field(default_factory=list)
test: List[DatasetInfo] = Field(default_factory=list)
def load_dataset(
info: DatasetInfo,
audio_dir: Optional[Path] = None,
base_dir: Optional[Path] = None,
) -> data.AnnotationProject:
audio_dir = (
info.audio_dir if base_dir is None else base_dir / info.audio_dir
)
path = (
info.annotations.path
if base_dir is None
else base_dir / info.annotations.path
)
if info.annotations.format == "batdetect2":
return load_annotation_project_from_dir(
path,
name=info.name,
audio_dir=audio_dir,
)
if info.annotations.format == "batdetect2_file":
return load_annotation_project_from_file(
path,
name=info.name,
audio_dir=audio_dir,
)
if info.annotations.format == "aoef":
return io.load( # type: ignore
info.annotations.path,
audio_dir=audio_dir,
)
raise NotImplementedError(
f"Unknown annotation format: {info.annotations.name}"
)
def load_datasets(
config: DatasetsConfig,
base_dir: Optional[Path] = None,
) -> Tuple[List[data.ClipAnnotation], List[data.ClipAnnotation]]:
test_annotations = []
train_annotations = []
for dataset in config.train:
project = load_dataset(dataset, base_dir=base_dir)
train_annotations.extend(project.clip_annotations)
for dataset in config.test:
project = load_dataset(dataset, base_dir=base_dir)
test_annotations.extend(project.clip_annotations)
return train_annotations, test_annotations
def load_datasets_from_config(
path: data.PathLike,
field: Optional[str] = None,
base_dir: Optional[Path] = None,
):
config = load_config(
path=path,
schema=DatasetsConfig,
field=field,
)
return load_datasets(config, base_dir=base_dir)