mirror of
https://github.com/macaodha/batdetect2.git
synced 2025-06-29 22:51:58 +02:00
Starting to add logging to preprocess
This commit is contained in:
parent
7dd35d6e3e
commit
8a6ed3dec7
@ -2,6 +2,7 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import click
|
||||
from loguru import logger
|
||||
|
||||
from batdetect2.cli.base import cli
|
||||
from batdetect2.data import load_dataset_from_config
|
||||
@ -123,7 +124,7 @@ def train(): ...
|
||||
def preprocess(
|
||||
dataset_config: Path,
|
||||
output: Path,
|
||||
target_config: Path,
|
||||
target_config: Optional[Path] = None,
|
||||
base_dir: Optional[Path] = None,
|
||||
preprocess_config: Optional[Path] = None,
|
||||
label_config: Optional[Path] = None,
|
||||
@ -134,8 +135,13 @@ def preprocess(
|
||||
label_config_field: Optional[str] = None,
|
||||
dataset_field: Optional[str] = None,
|
||||
):
|
||||
logger.info("Starting preprocessing.")
|
||||
|
||||
output = Path(output)
|
||||
logger.info("Will save outputs to {output}", output=output)
|
||||
|
||||
base_dir = base_dir or Path.cwd()
|
||||
logger.debug("Current working directory: {base_dir}", base_dir=base_dir)
|
||||
|
||||
preprocess = (
|
||||
load_preprocessing_config(
|
||||
@ -146,10 +152,14 @@ def preprocess(
|
||||
else None
|
||||
)
|
||||
|
||||
target = load_target_config(
|
||||
target = (
|
||||
load_target_config(
|
||||
target_config,
|
||||
field=target_config_field,
|
||||
)
|
||||
if target_config
|
||||
else None
|
||||
)
|
||||
|
||||
label = (
|
||||
load_label_config(
|
||||
@ -166,13 +176,20 @@ def preprocess(
|
||||
base_dir=base_dir,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Loaded {num_examples} annotated clips from the configured dataset",
|
||||
num_examples=len(dataset),
|
||||
)
|
||||
|
||||
targets = build_targets(config=target)
|
||||
preprocessor = build_preprocessor(config=preprocess)
|
||||
labeller = build_clip_labeler(targets, config=label)
|
||||
|
||||
if not output.exists():
|
||||
logger.debug("Creating directory {directory}", directory=output)
|
||||
output.mkdir(parents=True)
|
||||
|
||||
logger.info("Will start preprocessing")
|
||||
preprocess_annotations(
|
||||
dataset,
|
||||
output_dir=output,
|
||||
|
@ -29,6 +29,7 @@ import os
|
||||
from pathlib import Path
|
||||
from typing import Literal, Optional, Union
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import Field, ValidationError
|
||||
from soundevent import data
|
||||
|
||||
@ -177,6 +178,11 @@ def load_batdetect2_files_annotated_dataset(
|
||||
path = base_dir / path
|
||||
|
||||
paths = list_file_annotations(path)
|
||||
logger.debug(
|
||||
"Found {num_files} files in the annotations directory {path}",
|
||||
num_files=len(paths),
|
||||
path=path,
|
||||
)
|
||||
|
||||
annotations = []
|
||||
|
||||
@ -184,6 +190,7 @@ def load_batdetect2_files_annotated_dataset(
|
||||
try:
|
||||
file_annotation = load_file_annotation(p)
|
||||
except (FileNotFoundError, ValidationError):
|
||||
logger.warning("Could not load annotations in file {path}", path=p)
|
||||
continue
|
||||
|
||||
if (
|
||||
@ -191,6 +198,10 @@ def load_batdetect2_files_annotated_dataset(
|
||||
and dataset.filter.only_annotated
|
||||
and not file_annotation.annotated
|
||||
):
|
||||
logger.debug(
|
||||
"Annotation in file {path} omited: not annotated",
|
||||
path=p,
|
||||
)
|
||||
continue
|
||||
|
||||
if (
|
||||
@ -198,6 +209,10 @@ def load_batdetect2_files_annotated_dataset(
|
||||
and dataset.filter.exclude_issues
|
||||
and file_annotation.issues
|
||||
):
|
||||
logger.debug(
|
||||
"Annotation in file {path} omited: has issues",
|
||||
path=p,
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
@ -205,7 +220,12 @@ def load_batdetect2_files_annotated_dataset(
|
||||
file_annotation,
|
||||
audio_dir=audio_dir,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
except FileNotFoundError as err:
|
||||
logger.warning(
|
||||
"Did not find the audio related to the annotation file {path}. Error: {err}",
|
||||
path=p,
|
||||
err=err,
|
||||
)
|
||||
continue
|
||||
|
||||
annotations.append(
|
||||
|
@ -21,6 +21,7 @@ The core components are:
|
||||
from pathlib import Path
|
||||
from typing import Annotated, List, Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import Field
|
||||
from soundevent import data, io
|
||||
|
||||
@ -115,6 +116,11 @@ def load_dataset(
|
||||
clip_annotations = []
|
||||
for source in dataset.sources:
|
||||
annotated_source = load_annotated_dataset(source, base_dir=base_dir)
|
||||
logger.debug(
|
||||
"Loaded {num_examples} from dataset source '{source_name}'",
|
||||
num_examples=len(annotated_source.clip_annotations),
|
||||
source_name=source.name,
|
||||
)
|
||||
clip_annotations.extend(
|
||||
insert_source_tag(clip_annotation, source)
|
||||
for clip_annotation in annotated_source.clip_annotations
|
||||
|
Loading…
Reference in New Issue
Block a user