diff --git a/Makefile b/Makefile index 539d992..cabe4e1 100644 --- a/Makefile +++ b/Makefile @@ -98,6 +98,8 @@ example-preprocess: --base-dir . \ --dataset-field datasets.train \ --config config.yaml \ + --force \ + -vv \ config.yaml example_data/preprocessed example-train: diff --git a/src/batdetect2/cli/preprocess.py b/src/batdetect2/cli/preprocess.py index 15a7681..0ab537c 100644 --- a/src/batdetect2/cli/preprocess.py +++ b/src/batdetect2/cli/preprocess.py @@ -1,7 +1,9 @@ +import sys from pathlib import Path from typing import Optional import click +import yaml from loguru import logger from batdetect2.cli.base import cli @@ -83,6 +85,12 @@ __all__ = ["preprocess"] "the program will use all available cores." ), ) +@click.option( + "-v", + "--verbose", + count=True, + help="Increase verbosity. -v for INFO, -vv for DEBUG.", +) def preprocess( dataset_config: Path, output: Path, @@ -92,7 +100,17 @@ def preprocess( force: bool = False, num_workers: Optional[int] = None, dataset_field: Optional[str] = None, + verbose: int = 0, ): + logger.remove() + if verbose == 0: + log_level = "WARNING" + elif verbose == 1: + log_level = "INFO" + else: + log_level = "DEBUG" + logger.add(sys.stderr, level=log_level) + logger.info("Starting preprocessing.") output = Path(output) @@ -101,11 +119,20 @@ def preprocess( base_dir = base_dir or Path.cwd() logger.debug("Current working directory: {base_dir}", base_dir=base_dir) + if config: + logger.info( + "Loading preprocessing config from: {config}", config=config + ) + conf = ( load_train_preprocessing_config(config, field=config_field) if config is not None else TrainPreprocessConfig() ) + logger.debug( + "Preprocessing config:\n{conf}", + conf=yaml.dump(conf.model_dump()), + ) dataset = load_dataset_from_config( dataset_config, diff --git a/src/batdetect2/configs.py b/src/batdetect2/configs.py index 43764f5..e4908bf 100644 --- a/src/batdetect2/configs.py +++ b/src/batdetect2/configs.py @@ -38,7 +38,7 @@ class BaseConfig(BaseModel): Pydantic model configuration dictionary. Set to forbid extra fields. """ - model_config = ConfigDict(extra="allow") + model_config = ConfigDict(extra="ignore") T = TypeVar("T", bound=BaseModel) diff --git a/src/batdetect2/train/preprocess.py b/src/batdetect2/train/preprocess.py index 11c9972..a6f4f92 100644 --- a/src/batdetect2/train/preprocess.py +++ b/src/batdetect2/train/preprocess.py @@ -256,8 +256,17 @@ def preprocess_annotations( output_dir = Path(output_dir) if not output_dir.is_dir(): + logger.info( + "Creating output directory: {output_dir}", output_dir=output_dir + ) output_dir.mkdir(parents=True) + logger.info( + "Starting preprocessing of {num_annotations} annotations with {max_workers} workers.", + num_annotations=len(clip_annotations), + max_workers=max_workers or "all available", + ) + with Pool(max_workers) as pool: list( tqdm( @@ -273,8 +282,10 @@ def preprocess_annotations( clip_annotations, ), total=len(clip_annotations), + desc="Preprocessing annotations", ) ) + logger.info("Finished preprocessing.") def preprocess_single_annotation( @@ -313,11 +324,15 @@ def preprocess_single_annotation( path = output_dir / filename if path.is_file() and not replace: + logger.debug("Skipping existing file: {path}", path=path) return if path.is_file() and replace: + logger.debug("Removing existing file: {path}", path=path) path.unlink() + logger.debug("Processing annotation {uuid}", uuid=clip_annotation.uuid) + try: sample = generate_train_example( clip_annotation, @@ -326,8 +341,9 @@ def preprocess_single_annotation( ) except Exception as error: logger.error( - "Failed to process annotation: {uuid}. Error {error}", + "Failed to process annotation {uuid} to {path}. Error: {error}", uuid=clip_annotation.uuid, + path=path, error=error, ) return