batdetect2/example_data/config.yaml
2025-09-28 16:22:21 +01:00

187 lines
3.7 KiB
YAML

audio:
samplerate: 256000
resample:
enabled: True
method: "poly"
preprocess:
stft:
window_duration: 0.002
window_overlap: 0.75
window_fn: hann
frequencies:
max_freq: 120000
min_freq: 10000
size:
height: 128
resize_factor: 0.5
spectrogram_transforms:
- name: pcen
time_constant: 0.1
gain: 0.98
bias: 2
power: 0.5
- name: spectral_mean_substraction
postprocess:
nms_kernel_size: 9
detection_threshold: 0.01
top_k_per_sec: 200
model:
input_height: 128
in_channels: 1
out_channels: 32
encoder:
layers:
- name: FreqCoordConvDown
out_channels: 32
- name: FreqCoordConvDown
out_channels: 64
- name: LayerGroup
layers:
- name: FreqCoordConvDown
out_channels: 128
- name: ConvBlock
out_channels: 256
bottleneck:
channels: 256
layers:
- name: SelfAttention
attention_channels: 256
decoder:
layers:
- name: FreqCoordConvUp
out_channels: 64
- name: FreqCoordConvUp
out_channels: 32
- name: LayerGroup
layers:
- name: FreqCoordConvUp
out_channels: 32
- name: ConvBlock
out_channels: 32
train:
optimizer:
learning_rate: 0.001
t_max: 100
labels:
sigma: 3
trainer:
max_epochs: 10
check_val_every_n_epoch: 5
train_loader:
batch_size: 8
num_workers: 2
shuffle: True
clipping_strategy:
name: random_subclip
duration: 0.256
augmentations:
enabled: true
audio:
- name: mix_audio
probability: 0.2
min_weight: 0.3
max_weight: 0.7
- name: add_echo
probability: 0.2
max_delay: 0.005
min_weight: 0.0
max_weight: 1.0
spectrogram:
- name: scale_volume
probability: 0.2
min_scaling: 0.0
max_scaling: 2.0
- name: warp
probability: 0.2
delta: 0.04
- name: mask_time
probability: 0.2
max_perc: 0.05
max_masks: 3
- name: mask_freq
probability: 0.2
max_perc: 0.10
max_masks: 3
val_loader:
num_workers: 2
clipping_strategy:
name: whole_audio_padded
chunk_size: 0.256
loss:
detection:
weight: 1.0
focal:
beta: 4
alpha: 2
classification:
weight: 2.0
focal:
beta: 4
alpha: 2
size:
weight: 0.1
logger:
name: csv
validation:
tasks:
- name: sound_event_detection
metrics:
- name: average_precision
- name: sound_event_classification
metrics:
- name: average_precision
evaluation:
tasks:
- name: sound_event_detection
metrics:
- name: average_precision
- name: roc_auc
plots:
- name: pr_curve
- name: score_distribution
- name: example_detection
- name: sound_event_classification
metrics:
- name: average_precision
- name: roc_auc
plots:
- name: pr_curve
- name: top_class_detection
metrics:
- name: average_precision
plots:
- name: pr_curve
- name: confusion_matrix
- name: example_classification
- name: clip_detection
metrics:
- name: average_precision
- name: roc_auc
plots:
- name: pr_curve
- name: roc_curve
- name: score_distribution
- name: clip_classification
metrics:
- name: average_precision
- name: roc_auc
plots:
- name: pr_curve
- name: roc_curve