batdetect2/example_data/config.yaml
2025-06-26 13:29:13 -06:00

148 lines
3.0 KiB
YAML

targets:
classes:
classes:
- name: myomys
tags:
- value: Myotis mystacinus
- name: pippip
tags:
- value: Pipistrellus pipistrellus
- name: eptser
tags:
- value: Eptesicus serotinus
- name: rhifer
tags:
- value: Rhinolophus ferrumequinum
generic_class:
- key: class
value: Bat
filtering:
rules:
- match_type: all
tags:
- key: event
value: Echolocation
- match_type: exclude
tags:
- key: class
value: Unknown
preprocess:
audio:
resample:
samplerate: 256000
method: "poly"
scale: false
center: true
duration: null
spectrogram:
stft:
window_duration: 0.002
window_overlap: 0.75
window_fn: hann
frequencies:
max_freq: 120000
min_freq: 10000
pcen:
time_constant: 0.4
gain: 0.98
bias: 2
power: 0.5
scale: "amplitude"
size:
height: 128
resize_factor: 0.5
spectral_mean_substraction: true
peak_normalize: false
postprocess:
nms_kernel_size: 9
detection_threshold: 0.01
min_freq: 10000
max_freq: 120000
top_k_per_sec: 200
labels:
sigma: 3
model:
input_height: 128
in_channels: 1
out_channels: 32
encoder:
layers:
- block_type: FreqCoordConvDown
out_channels: 32
- block_type: FreqCoordConvDown
out_channels: 64
- block_type: LayerGroup
layers:
- block_type: FreqCoordConvDown
out_channels: 128
- block_type: ConvBlock
out_channels: 256
bottleneck:
channels: 256
self_attention: true
decoder:
layers:
- block_type: FreqCoordConvUp
out_channels: 64
- block_type: FreqCoordConvUp
out_channels: 32
- block_type: LayerGroup
layers:
- block_type: FreqCoordConvUp
out_channels: 32
- block_type: ConvBlock
out_channels: 32
train:
batch_size: 8
loss:
detection:
weight: 1.0
focal:
beta: 4
alpha: 2
classification:
weight: 2.0
focal:
beta: 4
alpha: 2
size:
weight: 0.1
optimizer:
learning_rate: 0.001
t_max: 100
logger:
logger_type: dvclive
augmentations:
steps:
- augmentation_type: mix_audio
probability: 0.2
min_weight: 0.3
max_weight: 0.7
- augmentation_type: add_echo
probability: 0.2
max_delay: 0.005
min_weight: 0.0
max_weight: 1.0
- augmentation_type: scale_volume
probability: 0.2
min_scaling: 0.0
max_scaling: 2.0
- augmentation_type: warp
probability: 0.2
delta: 0.04
- augmentation_type: mask_time
probability: 0.2
max_perc: 0.05
max_masks: 3
- augmentation_type: mask_freq
probability: 0.2
max_perc: 0.10
max_masks: 3