mirror of
https://github.com/macaodha/batdetect2.git
synced 2025-06-29 14:41:58 +02:00
Improve logging of train preprocessing
This commit is contained in:
parent
1384c549f7
commit
22f7d46f46
2
Makefile
2
Makefile
@ -98,6 +98,8 @@ example-preprocess:
|
|||||||
--base-dir . \
|
--base-dir . \
|
||||||
--dataset-field datasets.train \
|
--dataset-field datasets.train \
|
||||||
--config config.yaml \
|
--config config.yaml \
|
||||||
|
--force \
|
||||||
|
-vv \
|
||||||
config.yaml example_data/preprocessed
|
config.yaml example_data/preprocessed
|
||||||
|
|
||||||
example-train:
|
example-train:
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
import yaml
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from batdetect2.cli.base import cli
|
from batdetect2.cli.base import cli
|
||||||
@ -83,6 +85,12 @@ __all__ = ["preprocess"]
|
|||||||
"the program will use all available cores."
|
"the program will use all available cores."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"-v",
|
||||||
|
"--verbose",
|
||||||
|
count=True,
|
||||||
|
help="Increase verbosity. -v for INFO, -vv for DEBUG.",
|
||||||
|
)
|
||||||
def preprocess(
|
def preprocess(
|
||||||
dataset_config: Path,
|
dataset_config: Path,
|
||||||
output: Path,
|
output: Path,
|
||||||
@ -92,7 +100,17 @@ def preprocess(
|
|||||||
force: bool = False,
|
force: bool = False,
|
||||||
num_workers: Optional[int] = None,
|
num_workers: Optional[int] = None,
|
||||||
dataset_field: Optional[str] = None,
|
dataset_field: Optional[str] = None,
|
||||||
|
verbose: int = 0,
|
||||||
):
|
):
|
||||||
|
logger.remove()
|
||||||
|
if verbose == 0:
|
||||||
|
log_level = "WARNING"
|
||||||
|
elif verbose == 1:
|
||||||
|
log_level = "INFO"
|
||||||
|
else:
|
||||||
|
log_level = "DEBUG"
|
||||||
|
logger.add(sys.stderr, level=log_level)
|
||||||
|
|
||||||
logger.info("Starting preprocessing.")
|
logger.info("Starting preprocessing.")
|
||||||
|
|
||||||
output = Path(output)
|
output = Path(output)
|
||||||
@ -101,11 +119,20 @@ def preprocess(
|
|||||||
base_dir = base_dir or Path.cwd()
|
base_dir = base_dir or Path.cwd()
|
||||||
logger.debug("Current working directory: {base_dir}", base_dir=base_dir)
|
logger.debug("Current working directory: {base_dir}", base_dir=base_dir)
|
||||||
|
|
||||||
|
if config:
|
||||||
|
logger.info(
|
||||||
|
"Loading preprocessing config from: {config}", config=config
|
||||||
|
)
|
||||||
|
|
||||||
conf = (
|
conf = (
|
||||||
load_train_preprocessing_config(config, field=config_field)
|
load_train_preprocessing_config(config, field=config_field)
|
||||||
if config is not None
|
if config is not None
|
||||||
else TrainPreprocessConfig()
|
else TrainPreprocessConfig()
|
||||||
)
|
)
|
||||||
|
logger.debug(
|
||||||
|
"Preprocessing config:\n{conf}",
|
||||||
|
conf=yaml.dump(conf.model_dump()),
|
||||||
|
)
|
||||||
|
|
||||||
dataset = load_dataset_from_config(
|
dataset = load_dataset_from_config(
|
||||||
dataset_config,
|
dataset_config,
|
||||||
|
@ -38,7 +38,7 @@ class BaseConfig(BaseModel):
|
|||||||
Pydantic model configuration dictionary. Set to forbid extra fields.
|
Pydantic model configuration dictionary. Set to forbid extra fields.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_config = ConfigDict(extra="allow")
|
model_config = ConfigDict(extra="ignore")
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar("T", bound=BaseModel)
|
T = TypeVar("T", bound=BaseModel)
|
||||||
|
@ -256,8 +256,17 @@ def preprocess_annotations(
|
|||||||
output_dir = Path(output_dir)
|
output_dir = Path(output_dir)
|
||||||
|
|
||||||
if not output_dir.is_dir():
|
if not output_dir.is_dir():
|
||||||
|
logger.info(
|
||||||
|
"Creating output directory: {output_dir}", output_dir=output_dir
|
||||||
|
)
|
||||||
output_dir.mkdir(parents=True)
|
output_dir.mkdir(parents=True)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Starting preprocessing of {num_annotations} annotations with {max_workers} workers.",
|
||||||
|
num_annotations=len(clip_annotations),
|
||||||
|
max_workers=max_workers or "all available",
|
||||||
|
)
|
||||||
|
|
||||||
with Pool(max_workers) as pool:
|
with Pool(max_workers) as pool:
|
||||||
list(
|
list(
|
||||||
tqdm(
|
tqdm(
|
||||||
@ -273,8 +282,10 @@ def preprocess_annotations(
|
|||||||
clip_annotations,
|
clip_annotations,
|
||||||
),
|
),
|
||||||
total=len(clip_annotations),
|
total=len(clip_annotations),
|
||||||
|
desc="Preprocessing annotations",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
logger.info("Finished preprocessing.")
|
||||||
|
|
||||||
|
|
||||||
def preprocess_single_annotation(
|
def preprocess_single_annotation(
|
||||||
@ -313,11 +324,15 @@ def preprocess_single_annotation(
|
|||||||
path = output_dir / filename
|
path = output_dir / filename
|
||||||
|
|
||||||
if path.is_file() and not replace:
|
if path.is_file() and not replace:
|
||||||
|
logger.debug("Skipping existing file: {path}", path=path)
|
||||||
return
|
return
|
||||||
|
|
||||||
if path.is_file() and replace:
|
if path.is_file() and replace:
|
||||||
|
logger.debug("Removing existing file: {path}", path=path)
|
||||||
path.unlink()
|
path.unlink()
|
||||||
|
|
||||||
|
logger.debug("Processing annotation {uuid}", uuid=clip_annotation.uuid)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sample = generate_train_example(
|
sample = generate_train_example(
|
||||||
clip_annotation,
|
clip_annotation,
|
||||||
@ -326,8 +341,9 @@ def preprocess_single_annotation(
|
|||||||
)
|
)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Failed to process annotation: {uuid}. Error {error}",
|
"Failed to process annotation {uuid} to {path}. Error: {error}",
|
||||||
uuid=clip_annotation.uuid,
|
uuid=clip_annotation.uuid,
|
||||||
|
path=path,
|
||||||
error=error,
|
error=error,
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user