From 8a6ed3dec7e39bdfc7c1c32659f0dfb18f91149a Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Thu, 24 Apr 2025 00:20:30 +0100 Subject: [PATCH] Starting to add logging to preprocess --- batdetect2/cli/train.py | 25 +++++++++++++++++++---- batdetect2/data/annotations/batdetect2.py | 22 +++++++++++++++++++- batdetect2/data/datasets.py | 6 ++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/batdetect2/cli/train.py b/batdetect2/cli/train.py index a55ea0d..2c7ebd3 100644 --- a/batdetect2/cli/train.py +++ b/batdetect2/cli/train.py @@ -2,6 +2,7 @@ from pathlib import Path from typing import Optional import click +from loguru import logger from batdetect2.cli.base import cli from batdetect2.data import load_dataset_from_config @@ -123,7 +124,7 @@ def train(): ... def preprocess( dataset_config: Path, output: Path, - target_config: Path, + target_config: Optional[Path] = None, base_dir: Optional[Path] = None, preprocess_config: Optional[Path] = None, label_config: Optional[Path] = None, @@ -134,8 +135,13 @@ def preprocess( label_config_field: Optional[str] = None, dataset_field: Optional[str] = None, ): + logger.info("Starting preprocessing.") + output = Path(output) + logger.info("Will save outputs to {output}", output=output) + base_dir = base_dir or Path.cwd() + logger.debug("Current working directory: {base_dir}", base_dir=base_dir) preprocess = ( load_preprocessing_config( @@ -146,9 +152,13 @@ def preprocess( else None ) - target = load_target_config( - target_config, - field=target_config_field, + target = ( + load_target_config( + target_config, + field=target_config_field, + ) + if target_config + else None ) label = ( @@ -166,13 +176,20 @@ def preprocess( base_dir=base_dir, ) + logger.info( + "Loaded {num_examples} annotated clips from the configured dataset", + num_examples=len(dataset), + ) + targets = build_targets(config=target) preprocessor = build_preprocessor(config=preprocess) labeller = build_clip_labeler(targets, config=label) if not output.exists(): + logger.debug("Creating directory {directory}", directory=output) output.mkdir(parents=True) + logger.info("Will start preprocessing") preprocess_annotations( dataset, output_dir=output, diff --git a/batdetect2/data/annotations/batdetect2.py b/batdetect2/data/annotations/batdetect2.py index 055d84b..fe5697a 100644 --- a/batdetect2/data/annotations/batdetect2.py +++ b/batdetect2/data/annotations/batdetect2.py @@ -29,6 +29,7 @@ import os from pathlib import Path from typing import Literal, Optional, Union +from loguru import logger from pydantic import Field, ValidationError from soundevent import data @@ -177,6 +178,11 @@ def load_batdetect2_files_annotated_dataset( path = base_dir / path paths = list_file_annotations(path) + logger.debug( + "Found {num_files} files in the annotations directory {path}", + num_files=len(paths), + path=path, + ) annotations = [] @@ -184,6 +190,7 @@ def load_batdetect2_files_annotated_dataset( try: file_annotation = load_file_annotation(p) except (FileNotFoundError, ValidationError): + logger.warning("Could not load annotations in file {path}", path=p) continue if ( @@ -191,6 +198,10 @@ def load_batdetect2_files_annotated_dataset( and dataset.filter.only_annotated and not file_annotation.annotated ): + logger.debug( + "Annotation in file {path} omited: not annotated", + path=p, + ) continue if ( @@ -198,6 +209,10 @@ def load_batdetect2_files_annotated_dataset( and dataset.filter.exclude_issues and file_annotation.issues ): + logger.debug( + "Annotation in file {path} omited: has issues", + path=p, + ) continue try: @@ -205,7 +220,12 @@ def load_batdetect2_files_annotated_dataset( file_annotation, audio_dir=audio_dir, ) - except FileNotFoundError: + except FileNotFoundError as err: + logger.warning( + "Did not find the audio related to the annotation file {path}. Error: {err}", + path=p, + err=err, + ) continue annotations.append( diff --git a/batdetect2/data/datasets.py b/batdetect2/data/datasets.py index f0e3278..b960305 100644 --- a/batdetect2/data/datasets.py +++ b/batdetect2/data/datasets.py @@ -21,6 +21,7 @@ The core components are: from pathlib import Path from typing import Annotated, List, Optional +from loguru import logger from pydantic import Field from soundevent import data, io @@ -115,6 +116,11 @@ def load_dataset( clip_annotations = [] for source in dataset.sources: annotated_source = load_annotated_dataset(source, base_dir=base_dir) + logger.debug( + "Loaded {num_examples} from dataset source '{source_name}'", + num_examples=len(annotated_source.clip_annotations), + source_name=source.name, + ) clip_annotations.extend( insert_source_tag(clip_annotation, source) for clip_annotation in annotated_source.clip_annotations