mirror of
https://github.com/macaodha/batdetect2.git
synced 2025-06-29 22:51:58 +02:00
147 lines
2.9 KiB
YAML
147 lines
2.9 KiB
YAML
targets:
|
|
classes:
|
|
classes:
|
|
- name: myomys
|
|
tags:
|
|
- value: Myotis mystacinus
|
|
- name: pippip
|
|
tags:
|
|
- value: Pipistrellus pipistrellus
|
|
- name: eptser
|
|
tags:
|
|
- value: Eptesicus serotinus
|
|
- name: rhifer
|
|
tags:
|
|
- value: Rhinolophus ferrumequinum
|
|
generic_class:
|
|
- key: class
|
|
value: Bat
|
|
|
|
filtering:
|
|
rules:
|
|
- match_type: all
|
|
tags:
|
|
- key: event
|
|
value: Echolocation
|
|
- match_type: exclude
|
|
tags:
|
|
- key: class
|
|
value: Unknown
|
|
|
|
preprocess:
|
|
audio:
|
|
resample:
|
|
samplerate: 256000
|
|
method: "poly"
|
|
scale: false
|
|
center: true
|
|
duration: null
|
|
|
|
spectrogram:
|
|
stft:
|
|
window_duration: 0.002
|
|
window_overlap: 0.75
|
|
window_fn: hann
|
|
frequencies:
|
|
max_freq: 120000
|
|
min_freq: 10000
|
|
pcen:
|
|
time_constant: 0.4
|
|
gain: 0.98
|
|
bias: 2
|
|
power: 0.5
|
|
scale: "amplitude"
|
|
size:
|
|
height: 128
|
|
resize_factor: 0.5
|
|
spectral_mean_substraction: true
|
|
peak_normalize: false
|
|
|
|
postprocess:
|
|
nms_kernel_size: 9
|
|
detection_threshold: 0.01
|
|
min_freq: 10000
|
|
max_freq: 120000
|
|
top_k_per_sec: 200
|
|
|
|
labels:
|
|
sigma: 3
|
|
|
|
model:
|
|
input_height: 128
|
|
in_channels: 1
|
|
out_channels: 32
|
|
encoder:
|
|
layers:
|
|
- block_type: FreqCoordConvDown
|
|
out_channels: 32
|
|
- block_type: FreqCoordConvDown
|
|
out_channels: 64
|
|
- block_type: LayerGroup
|
|
layers:
|
|
- block_type: FreqCoordConvDown
|
|
out_channels: 128
|
|
- block_type: ConvBlock
|
|
out_channels: 256
|
|
bottleneck:
|
|
channels: 256
|
|
self_attention: true
|
|
decoder:
|
|
layers:
|
|
- block_type: FreqCoordConvUp
|
|
out_channels: 64
|
|
- block_type: FreqCoordConvUp
|
|
out_channels: 32
|
|
- block_type: LayerGroup
|
|
layers:
|
|
- block_type: FreqCoordConvUp
|
|
out_channels: 32
|
|
- block_type: ConvBlock
|
|
out_channels: 32
|
|
|
|
train:
|
|
batch_size: 8
|
|
learning_rate: 0.001
|
|
t_max: 100
|
|
loss:
|
|
detection:
|
|
weight: 1.0
|
|
focal:
|
|
beta: 4
|
|
alpha: 2
|
|
classification:
|
|
weight: 2.0
|
|
focal:
|
|
beta: 4
|
|
alpha: 2
|
|
size:
|
|
weight: 0.1
|
|
logger:
|
|
logger_type: dvclive
|
|
augmentations:
|
|
steps:
|
|
- augmentation_type: mix_audio
|
|
probability: 0.2
|
|
min_weight: 0.3
|
|
max_weight: 0.7
|
|
- augmentation_type: add_echo
|
|
probability: 0.2
|
|
max_delay: 0.005
|
|
min_weight: 0.0
|
|
max_weight: 1.0
|
|
- augmentation_type: scale_volume
|
|
probability: 0.2
|
|
min_scaling: 0.0
|
|
max_scaling: 2.0
|
|
- augmentation_type: warp
|
|
probability: 0.2
|
|
delta: 0.04
|
|
- augmentation_type: mask_time
|
|
probability: 0.2
|
|
max_perc: 0.05
|
|
max_masks: 3
|
|
- augmentation_type: mask_freq
|
|
probability: 0.2
|
|
max_perc: 0.10
|
|
max_masks: 3
|