mirror of
https://github.com/macaodha/batdetect2.git
synced 2026-01-10 00:59:34 +01:00
187 lines
3.7 KiB
YAML
187 lines
3.7 KiB
YAML
audio:
|
|
samplerate: 256000
|
|
resample:
|
|
enabled: True
|
|
method: "poly"
|
|
|
|
preprocess:
|
|
stft:
|
|
window_duration: 0.002
|
|
window_overlap: 0.75
|
|
window_fn: hann
|
|
frequencies:
|
|
max_freq: 120000
|
|
min_freq: 10000
|
|
size:
|
|
height: 128
|
|
resize_factor: 0.5
|
|
spectrogram_transforms:
|
|
- name: pcen
|
|
time_constant: 0.1
|
|
gain: 0.98
|
|
bias: 2
|
|
power: 0.5
|
|
- name: spectral_mean_substraction
|
|
|
|
postprocess:
|
|
nms_kernel_size: 9
|
|
detection_threshold: 0.01
|
|
top_k_per_sec: 200
|
|
|
|
model:
|
|
input_height: 128
|
|
in_channels: 1
|
|
out_channels: 32
|
|
encoder:
|
|
layers:
|
|
- name: FreqCoordConvDown
|
|
out_channels: 32
|
|
- name: FreqCoordConvDown
|
|
out_channels: 64
|
|
- name: LayerGroup
|
|
layers:
|
|
- name: FreqCoordConvDown
|
|
out_channels: 128
|
|
- name: ConvBlock
|
|
out_channels: 256
|
|
bottleneck:
|
|
channels: 256
|
|
layers:
|
|
- name: SelfAttention
|
|
attention_channels: 256
|
|
decoder:
|
|
layers:
|
|
- name: FreqCoordConvUp
|
|
out_channels: 64
|
|
- name: FreqCoordConvUp
|
|
out_channels: 32
|
|
- name: LayerGroup
|
|
layers:
|
|
- name: FreqCoordConvUp
|
|
out_channels: 32
|
|
- name: ConvBlock
|
|
out_channels: 32
|
|
|
|
train:
|
|
optimizer:
|
|
learning_rate: 0.001
|
|
t_max: 100
|
|
|
|
labels:
|
|
sigma: 3
|
|
|
|
trainer:
|
|
max_epochs: 10
|
|
check_val_every_n_epoch: 5
|
|
|
|
train_loader:
|
|
batch_size: 8
|
|
|
|
num_workers: 2
|
|
|
|
shuffle: True
|
|
|
|
clipping_strategy:
|
|
name: random_subclip
|
|
duration: 0.256
|
|
|
|
augmentations:
|
|
enabled: true
|
|
audio:
|
|
- name: mix_audio
|
|
probability: 0.2
|
|
min_weight: 0.3
|
|
max_weight: 0.7
|
|
- name: add_echo
|
|
probability: 0.2
|
|
max_delay: 0.005
|
|
min_weight: 0.0
|
|
max_weight: 1.0
|
|
spectrogram:
|
|
- name: scale_volume
|
|
probability: 0.2
|
|
min_scaling: 0.0
|
|
max_scaling: 2.0
|
|
- name: warp
|
|
probability: 0.2
|
|
delta: 0.04
|
|
- name: mask_time
|
|
probability: 0.2
|
|
max_perc: 0.05
|
|
max_masks: 3
|
|
- name: mask_freq
|
|
probability: 0.2
|
|
max_perc: 0.10
|
|
max_masks: 3
|
|
|
|
val_loader:
|
|
num_workers: 2
|
|
clipping_strategy:
|
|
name: whole_audio_padded
|
|
chunk_size: 0.256
|
|
|
|
loss:
|
|
detection:
|
|
weight: 1.0
|
|
focal:
|
|
beta: 4
|
|
alpha: 2
|
|
classification:
|
|
weight: 2.0
|
|
focal:
|
|
beta: 4
|
|
alpha: 2
|
|
size:
|
|
weight: 0.1
|
|
|
|
logger:
|
|
name: csv
|
|
|
|
validation:
|
|
tasks:
|
|
- name: sound_event_detection
|
|
metrics:
|
|
- name: average_precision
|
|
- name: sound_event_classification
|
|
metrics:
|
|
- name: average_precision
|
|
|
|
evaluation:
|
|
tasks:
|
|
- name: sound_event_detection
|
|
metrics:
|
|
- name: average_precision
|
|
- name: roc_auc
|
|
plots:
|
|
- name: pr_curve
|
|
- name: score_distribution
|
|
- name: example_detection
|
|
- name: sound_event_classification
|
|
metrics:
|
|
- name: average_precision
|
|
- name: roc_auc
|
|
plots:
|
|
- name: pr_curve
|
|
- name: top_class_detection
|
|
metrics:
|
|
- name: average_precision
|
|
plots:
|
|
- name: pr_curve
|
|
- name: confusion_matrix
|
|
- name: example_classification
|
|
- name: clip_detection
|
|
metrics:
|
|
- name: average_precision
|
|
- name: roc_auc
|
|
plots:
|
|
- name: pr_curve
|
|
- name: roc_curve
|
|
- name: score_distribution
|
|
- name: clip_classification
|
|
metrics:
|
|
- name: average_precision
|
|
- name: roc_auc
|
|
plots:
|
|
- name: pr_curve
|
|
- name: roc_curve
|