batdetect2/example_data/config.yaml
2025-09-09 15:31:40 +01:00

171 lines
3.2 KiB
YAML

targets:
detection_target:
name: bat
match_if:
name: all_of
conditions:
- name: has_tag
tag: { key: event, value: Echolocation }
- name: not
condition:
name: has_tag
tag: { key: class, value: Unknown }
assign_tags:
- key: class
value: Bat
classification_targets:
- name: myomys
tags:
- key: class
value: Myotis mystacinus
- name: pippip
tags:
- key: class
value: Pipistrellus pipistrellus
- name: eptser
tags:
- key: class
value: Eptesicus serotinus
- name: rhifer
tags:
- key: class
value: Rhinolophus ferrumequinum
roi:
name: anchor_bbox
anchor: top-left
preprocess:
audio:
samplerate: 256000
resample:
enabled: True
method: "poly"
spectrogram:
stft:
window_duration: 0.002
window_overlap: 0.75
window_fn: hann
frequencies:
max_freq: 120000
min_freq: 10000
size:
height: 128
resize_factor: 0.5
transforms:
- name: pcen
time_constant: 0.1
gain: 0.98
bias: 2
power: 0.5
- name: spectral_mean_substraction
postprocess:
nms_kernel_size: 9
detection_threshold: 0.01
top_k_per_sec: 200
model:
input_height: 128
in_channels: 1
out_channels: 32
encoder:
layers:
- name: FreqCoordConvDown
out_channels: 32
- name: FreqCoordConvDown
out_channels: 64
- name: LayerGroup
layers:
- name: FreqCoordConvDown
out_channels: 128
- name: ConvBlock
out_channels: 256
bottleneck:
channels: 256
layers:
- name: SelfAttention
attention_channels: 256
decoder:
layers:
- name: FreqCoordConvUp
out_channels: 64
- name: FreqCoordConvUp
out_channels: 32
- name: LayerGroup
layers:
- name: FreqCoordConvUp
out_channels: 32
- name: ConvBlock
out_channels: 32
train:
learning_rate: 0.001
t_max: 100
labels:
sigma: 3
trainer:
max_epochs: 5
dataloaders:
train:
batch_size: 8
num_workers: 2
shuffle: True
val:
batch_size: 1
num_workers: 2
loss:
detection:
weight: 1.0
focal:
beta: 4
alpha: 2
classification:
weight: 2.0
focal:
beta: 4
alpha: 2
size:
weight: 0.1
logger:
name: mlflow
tracking_uri: http://10.20.20.211:9000
log_model: true
augmentations:
enabled: true
audio:
- name: mix_audio
probability: 0.2
min_weight: 0.3
max_weight: 0.7
- name: add_echo
probability: 0.2
max_delay: 0.005
min_weight: 0.0
max_weight: 1.0
spectrogram:
- name: scale_volume
probability: 0.2
min_scaling: 0.0
max_scaling: 2.0
- name: warp
probability: 0.2
delta: 0.04
- name: mask_time
probability: 0.2
max_perc: 0.05
max_masks: 3
- name: mask_freq
probability: 0.2
max_perc: 0.10
max_masks: 3