Removed old data module

This commit is contained in:
mbsantiago 2025-04-03 16:48:50 +01:00
parent 22cf47ed39
commit ff00da9a9a
3 changed files with 3 additions and 204 deletions

View File

@ -1,6 +1,5 @@
"""Compatibility functions between old and new data structures."""
import json
import os
import uuid
from pathlib import Path
@ -18,7 +17,8 @@ PathLike = Union[Path, str, os.PathLike]
__all__ = [
"convert_to_annotation_group",
"load_annotation_project_from_dir",
"load_file_annotation",
"annotation_to_sound_event",
]
SPECIES_TAG_KEY = "species"
@ -304,85 +304,3 @@ def list_file_annotations(path: PathLike) -> List[Path]:
"""List all annotations in a directory."""
path = Path(path)
return [file for file in path.glob("*.json")]
def load_annotation_project_from_file(
path: PathLike,
name: Optional[str] = None,
audio_dir: Optional[PathLike] = None,
) -> data.AnnotationProject:
old_annotations = json.loads(Path(path).read_text())
annotations = []
tasks = []
for ann in old_annotations:
try:
ann = FileAnnotation.model_validate(ann)
except ValueError:
continue
try:
clip = file_annotation_to_clip(ann, audio_dir=audio_dir)
except FileNotFoundError:
continue
annotations.append(file_annotation_to_clip_annotation(ann, clip))
tasks.append(file_annotation_to_annotation_task(ann, clip))
return data.AnnotationProject(
name=name or str(path),
clip_annotations=annotations,
tasks=tasks,
)
def load_annotation_project_from_dir(
path: PathLike,
name: Optional[str] = None,
audio_dir: Optional[PathLike] = None,
) -> data.AnnotationProject:
"""Convert annotations to annotation project."""
audio_dir = audio_dir or Path.cwd()
paths = list_file_annotations(path)
if name is None:
name = str(path)
annotations = []
tasks = []
for p in paths:
try:
file_annotation = load_file_annotation(p)
except FileNotFoundError:
continue
try:
clip = file_annotation_to_clip(
file_annotation,
audio_dir=audio_dir,
)
except FileNotFoundError:
continue
annotations.append(
file_annotation_to_clip_annotation(
file_annotation,
clip,
)
)
tasks.append(
file_annotation_to_annotation_task(
file_annotation,
clip,
)
)
return data.AnnotationProject(
name=name,
clip_annotations=annotations,
tasks=tasks,
)

View File

@ -1,7 +1,6 @@
from batdetect2.preprocess import (
AmplitudeScaleConfig,
AudioConfig,
STFTConfig,
FrequencyConfig,
LogScaleConfig,
PcenScaleConfig,
@ -10,6 +9,7 @@ from batdetect2.preprocess import (
Scales,
SpecSizeConfig,
SpectrogramConfig,
STFTConfig,
)
from batdetect2.preprocess.spectrogram import get_spectrogram_resolution
from batdetect2.terms import TagInfo

View File

@ -1,119 +0,0 @@
from pathlib import Path
from typing import List, Literal, Optional, Tuple, Union
from pydantic import Field
from soundevent import data, io
from batdetect2.compat.data import (
load_annotation_project_from_dir,
load_annotation_project_from_file,
)
from batdetect2.configs import BaseConfig, load_config
__all__ = [
"load_datasets_from_config",
]
class BatDetect2AnnotationFiles(BaseConfig):
format: Literal["batdetect2"] = "batdetect2"
path: Path
class BatDetect2AnnotationFile(BaseConfig):
format: Literal["batdetect2_file"] = "batdetect2_file"
path: Path
class AOEFAnnotationFile(BaseConfig):
format: Literal["aoef"] = "aoef"
path: Path
AnnotationFormats = Union[
BatDetect2AnnotationFiles,
BatDetect2AnnotationFile,
AOEFAnnotationFile,
]
class DatasetInfo(BaseConfig):
name: str
audio_dir: Path
annotations: AnnotationFormats = Field(discriminator="format")
class DatasetsConfig(BaseConfig):
train: List[DatasetInfo] = Field(default_factory=list)
test: List[DatasetInfo] = Field(default_factory=list)
def load_dataset(
info: DatasetInfo,
audio_dir: Optional[Path] = None,
base_dir: Optional[Path] = None,
) -> data.AnnotationProject:
audio_dir = (
info.audio_dir if base_dir is None else base_dir / info.audio_dir
)
path = (
info.annotations.path
if base_dir is None
else base_dir / info.annotations.path
)
if info.annotations.format == "batdetect2":
return load_annotation_project_from_dir(
path,
name=info.name,
audio_dir=audio_dir,
)
if info.annotations.format == "batdetect2_file":
return load_annotation_project_from_file(
path,
name=info.name,
audio_dir=audio_dir,
)
if info.annotations.format == "aoef":
return io.load( # type: ignore
info.annotations.path,
audio_dir=audio_dir,
)
raise NotImplementedError(
f"Unknown annotation format: {info.annotations.name}"
)
def load_datasets(
config: DatasetsConfig,
base_dir: Optional[Path] = None,
) -> Tuple[List[data.ClipAnnotation], List[data.ClipAnnotation]]:
test_annotations = []
train_annotations = []
for dataset in config.train:
project = load_dataset(dataset, base_dir=base_dir)
train_annotations.extend(project.clip_annotations)
for dataset in config.test:
project = load_dataset(dataset, base_dir=base_dir)
test_annotations.extend(project.clip_annotations)
return train_annotations, test_annotations
def load_datasets_from_config(
path: data.PathLike,
field: Optional[str] = None,
base_dir: Optional[Path] = None,
):
config = load_config(
path=path,
schema=DatasetsConfig,
field=field,
)
return load_datasets(config, base_dir=base_dir)