mirror of
https://github.com/macaodha/batdetect2.git
synced 2026-01-10 17:19:34 +01:00
171 lines
3.2 KiB
YAML
171 lines
3.2 KiB
YAML
targets:
|
|
detection_target:
|
|
name: bat
|
|
match_if:
|
|
name: all_of
|
|
conditions:
|
|
- name: has_tag
|
|
tag: { key: event, value: Echolocation }
|
|
- name: not
|
|
condition:
|
|
name: has_tag
|
|
tag: { key: class, value: Unknown }
|
|
assign_tags:
|
|
- key: class
|
|
value: Bat
|
|
|
|
classification_targets:
|
|
- name: myomys
|
|
tags:
|
|
- key: class
|
|
value: Myotis mystacinus
|
|
- name: pippip
|
|
tags:
|
|
- key: class
|
|
value: Pipistrellus pipistrellus
|
|
- name: eptser
|
|
tags:
|
|
- key: class
|
|
value: Eptesicus serotinus
|
|
- name: rhifer
|
|
tags:
|
|
- key: class
|
|
value: Rhinolophus ferrumequinum
|
|
|
|
roi:
|
|
name: anchor_bbox
|
|
anchor: top-left
|
|
|
|
preprocess:
|
|
audio:
|
|
samplerate: 256000
|
|
resample:
|
|
enabled: True
|
|
method: "poly"
|
|
|
|
spectrogram:
|
|
stft:
|
|
window_duration: 0.002
|
|
window_overlap: 0.75
|
|
window_fn: hann
|
|
frequencies:
|
|
max_freq: 120000
|
|
min_freq: 10000
|
|
size:
|
|
height: 128
|
|
resize_factor: 0.5
|
|
transforms:
|
|
- name: pcen
|
|
time_constant: 0.1
|
|
gain: 0.98
|
|
bias: 2
|
|
power: 0.5
|
|
- name: spectral_mean_substraction
|
|
|
|
postprocess:
|
|
nms_kernel_size: 9
|
|
detection_threshold: 0.01
|
|
top_k_per_sec: 200
|
|
|
|
model:
|
|
input_height: 128
|
|
in_channels: 1
|
|
out_channels: 32
|
|
encoder:
|
|
layers:
|
|
- name: FreqCoordConvDown
|
|
out_channels: 32
|
|
- name: FreqCoordConvDown
|
|
out_channels: 64
|
|
- name: LayerGroup
|
|
layers:
|
|
- name: FreqCoordConvDown
|
|
out_channels: 128
|
|
- name: ConvBlock
|
|
out_channels: 256
|
|
bottleneck:
|
|
channels: 256
|
|
layers:
|
|
- name: SelfAttention
|
|
attention_channels: 256
|
|
decoder:
|
|
layers:
|
|
- name: FreqCoordConvUp
|
|
out_channels: 64
|
|
- name: FreqCoordConvUp
|
|
out_channels: 32
|
|
- name: LayerGroup
|
|
layers:
|
|
- name: FreqCoordConvUp
|
|
out_channels: 32
|
|
- name: ConvBlock
|
|
out_channels: 32
|
|
|
|
train:
|
|
learning_rate: 0.001
|
|
t_max: 100
|
|
|
|
labels:
|
|
sigma: 3
|
|
|
|
trainer:
|
|
max_epochs: 40
|
|
|
|
dataloaders:
|
|
train:
|
|
batch_size: 8
|
|
num_workers: 2
|
|
shuffle: True
|
|
|
|
val:
|
|
batch_size: 1
|
|
num_workers: 2
|
|
|
|
loss:
|
|
detection:
|
|
weight: 1.0
|
|
focal:
|
|
beta: 4
|
|
alpha: 2
|
|
classification:
|
|
weight: 2.0
|
|
focal:
|
|
beta: 4
|
|
alpha: 2
|
|
size:
|
|
weight: 0.1
|
|
|
|
logger:
|
|
logger_type: csv
|
|
# save_dir: outputs/log/
|
|
# name: logs
|
|
|
|
augmentations:
|
|
enabled: true
|
|
audio:
|
|
- name: mix_audio
|
|
probability: 0.2
|
|
min_weight: 0.3
|
|
max_weight: 0.7
|
|
- name: add_echo
|
|
probability: 0.2
|
|
max_delay: 0.005
|
|
min_weight: 0.0
|
|
max_weight: 1.0
|
|
spectrogram:
|
|
- name: scale_volume
|
|
probability: 0.2
|
|
min_scaling: 0.0
|
|
max_scaling: 2.0
|
|
- name: warp
|
|
probability: 0.2
|
|
delta: 0.04
|
|
- name: mask_time
|
|
probability: 0.2
|
|
max_perc: 0.05
|
|
max_masks: 3
|
|
- name: mask_freq
|
|
probability: 0.2
|
|
max_perc: 0.10
|
|
max_masks: 3
|