mirror of
https://github.com/macaodha/batdetect2.git
synced 2025-06-29 22:51:58 +02:00
Starting to add logging to preprocess
This commit is contained in:
parent
7dd35d6e3e
commit
8a6ed3dec7
@ -2,6 +2,7 @@ from pathlib import Path
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
from batdetect2.cli.base import cli
|
from batdetect2.cli.base import cli
|
||||||
from batdetect2.data import load_dataset_from_config
|
from batdetect2.data import load_dataset_from_config
|
||||||
@ -123,7 +124,7 @@ def train(): ...
|
|||||||
def preprocess(
|
def preprocess(
|
||||||
dataset_config: Path,
|
dataset_config: Path,
|
||||||
output: Path,
|
output: Path,
|
||||||
target_config: Path,
|
target_config: Optional[Path] = None,
|
||||||
base_dir: Optional[Path] = None,
|
base_dir: Optional[Path] = None,
|
||||||
preprocess_config: Optional[Path] = None,
|
preprocess_config: Optional[Path] = None,
|
||||||
label_config: Optional[Path] = None,
|
label_config: Optional[Path] = None,
|
||||||
@ -134,8 +135,13 @@ def preprocess(
|
|||||||
label_config_field: Optional[str] = None,
|
label_config_field: Optional[str] = None,
|
||||||
dataset_field: Optional[str] = None,
|
dataset_field: Optional[str] = None,
|
||||||
):
|
):
|
||||||
|
logger.info("Starting preprocessing.")
|
||||||
|
|
||||||
output = Path(output)
|
output = Path(output)
|
||||||
|
logger.info("Will save outputs to {output}", output=output)
|
||||||
|
|
||||||
base_dir = base_dir or Path.cwd()
|
base_dir = base_dir or Path.cwd()
|
||||||
|
logger.debug("Current working directory: {base_dir}", base_dir=base_dir)
|
||||||
|
|
||||||
preprocess = (
|
preprocess = (
|
||||||
load_preprocessing_config(
|
load_preprocessing_config(
|
||||||
@ -146,9 +152,13 @@ def preprocess(
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
target = load_target_config(
|
target = (
|
||||||
target_config,
|
load_target_config(
|
||||||
field=target_config_field,
|
target_config,
|
||||||
|
field=target_config_field,
|
||||||
|
)
|
||||||
|
if target_config
|
||||||
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
label = (
|
label = (
|
||||||
@ -166,13 +176,20 @@ def preprocess(
|
|||||||
base_dir=base_dir,
|
base_dir=base_dir,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Loaded {num_examples} annotated clips from the configured dataset",
|
||||||
|
num_examples=len(dataset),
|
||||||
|
)
|
||||||
|
|
||||||
targets = build_targets(config=target)
|
targets = build_targets(config=target)
|
||||||
preprocessor = build_preprocessor(config=preprocess)
|
preprocessor = build_preprocessor(config=preprocess)
|
||||||
labeller = build_clip_labeler(targets, config=label)
|
labeller = build_clip_labeler(targets, config=label)
|
||||||
|
|
||||||
if not output.exists():
|
if not output.exists():
|
||||||
|
logger.debug("Creating directory {directory}", directory=output)
|
||||||
output.mkdir(parents=True)
|
output.mkdir(parents=True)
|
||||||
|
|
||||||
|
logger.info("Will start preprocessing")
|
||||||
preprocess_annotations(
|
preprocess_annotations(
|
||||||
dataset,
|
dataset,
|
||||||
output_dir=output,
|
output_dir=output,
|
||||||
|
@ -29,6 +29,7 @@ import os
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Literal, Optional, Union
|
from typing import Literal, Optional, Union
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
from pydantic import Field, ValidationError
|
from pydantic import Field, ValidationError
|
||||||
from soundevent import data
|
from soundevent import data
|
||||||
|
|
||||||
@ -177,6 +178,11 @@ def load_batdetect2_files_annotated_dataset(
|
|||||||
path = base_dir / path
|
path = base_dir / path
|
||||||
|
|
||||||
paths = list_file_annotations(path)
|
paths = list_file_annotations(path)
|
||||||
|
logger.debug(
|
||||||
|
"Found {num_files} files in the annotations directory {path}",
|
||||||
|
num_files=len(paths),
|
||||||
|
path=path,
|
||||||
|
)
|
||||||
|
|
||||||
annotations = []
|
annotations = []
|
||||||
|
|
||||||
@ -184,6 +190,7 @@ def load_batdetect2_files_annotated_dataset(
|
|||||||
try:
|
try:
|
||||||
file_annotation = load_file_annotation(p)
|
file_annotation = load_file_annotation(p)
|
||||||
except (FileNotFoundError, ValidationError):
|
except (FileNotFoundError, ValidationError):
|
||||||
|
logger.warning("Could not load annotations in file {path}", path=p)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if (
|
if (
|
||||||
@ -191,6 +198,10 @@ def load_batdetect2_files_annotated_dataset(
|
|||||||
and dataset.filter.only_annotated
|
and dataset.filter.only_annotated
|
||||||
and not file_annotation.annotated
|
and not file_annotation.annotated
|
||||||
):
|
):
|
||||||
|
logger.debug(
|
||||||
|
"Annotation in file {path} omited: not annotated",
|
||||||
|
path=p,
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if (
|
if (
|
||||||
@ -198,6 +209,10 @@ def load_batdetect2_files_annotated_dataset(
|
|||||||
and dataset.filter.exclude_issues
|
and dataset.filter.exclude_issues
|
||||||
and file_annotation.issues
|
and file_annotation.issues
|
||||||
):
|
):
|
||||||
|
logger.debug(
|
||||||
|
"Annotation in file {path} omited: has issues",
|
||||||
|
path=p,
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -205,7 +220,12 @@ def load_batdetect2_files_annotated_dataset(
|
|||||||
file_annotation,
|
file_annotation,
|
||||||
audio_dir=audio_dir,
|
audio_dir=audio_dir,
|
||||||
)
|
)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError as err:
|
||||||
|
logger.warning(
|
||||||
|
"Did not find the audio related to the annotation file {path}. Error: {err}",
|
||||||
|
path=p,
|
||||||
|
err=err,
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
annotations.append(
|
annotations.append(
|
||||||
|
@ -21,6 +21,7 @@ The core components are:
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, List, Optional
|
from typing import Annotated, List, Optional
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
from soundevent import data, io
|
from soundevent import data, io
|
||||||
|
|
||||||
@ -115,6 +116,11 @@ def load_dataset(
|
|||||||
clip_annotations = []
|
clip_annotations = []
|
||||||
for source in dataset.sources:
|
for source in dataset.sources:
|
||||||
annotated_source = load_annotated_dataset(source, base_dir=base_dir)
|
annotated_source = load_annotated_dataset(source, base_dir=base_dir)
|
||||||
|
logger.debug(
|
||||||
|
"Loaded {num_examples} from dataset source '{source_name}'",
|
||||||
|
num_examples=len(annotated_source.clip_annotations),
|
||||||
|
source_name=source.name,
|
||||||
|
)
|
||||||
clip_annotations.extend(
|
clip_annotations.extend(
|
||||||
insert_source_tag(clip_annotation, source)
|
insert_source_tag(clip_annotation, source)
|
||||||
for clip_annotation in annotated_source.clip_annotations
|
for clip_annotation in annotated_source.clip_annotations
|
||||||
|
Loading…
Reference in New Issue
Block a user