targets: detection_target: name: bat match_if: name: all_of conditions: - name: has_tag tag: { key: event, value: Echolocation } - name: not condition: name: has_tag tag: { key: class, value: Unknown } assign_tags: - key: class value: Bat classification_targets: - name: myomys tags: - key: class value: Myotis mystacinus - name: pippip tags: - key: class value: Pipistrellus pipistrellus - name: eptser tags: - key: class value: Eptesicus serotinus - name: rhifer tags: - key: class value: Rhinolophus ferrumequinum roi: name: anchor_bbox anchor: top-left preprocess: audio: samplerate: 256000 resample: enabled: True method: "poly" spectrogram: stft: window_duration: 0.002 window_overlap: 0.75 window_fn: hann frequencies: max_freq: 120000 min_freq: 10000 size: height: 128 resize_factor: 0.5 transforms: - name: pcen time_constant: 0.1 gain: 0.98 bias: 2 power: 0.5 - name: spectral_mean_substraction postprocess: nms_kernel_size: 9 detection_threshold: 0.01 top_k_per_sec: 200 model: input_height: 128 in_channels: 1 out_channels: 32 encoder: layers: - name: FreqCoordConvDown out_channels: 32 - name: FreqCoordConvDown out_channels: 64 - name: LayerGroup layers: - name: FreqCoordConvDown out_channels: 128 - name: ConvBlock out_channels: 256 bottleneck: channels: 256 layers: - name: SelfAttention attention_channels: 256 decoder: layers: - name: FreqCoordConvUp out_channels: 64 - name: FreqCoordConvUp out_channels: 32 - name: LayerGroup layers: - name: FreqCoordConvUp out_channels: 32 - name: ConvBlock out_channels: 32 train: learning_rate: 0.001 t_max: 100 labels: sigma: 3 trainer: max_epochs: 5 train_loader: batch_size: 8 num_workers: 2 shuffle: True clipping_strategy: name: random_subclip duration: 0.256 val_loader: num_workers: 2 clipping_strategy: name: whole_audio_padded chunk_size: 0.256 loss: detection: weight: 1.0 focal: beta: 4 alpha: 2 classification: weight: 2.0 focal: beta: 4 alpha: 2 size: weight: 0.1 logger: name: csv augmentations: enabled: true audio: - name: mix_audio probability: 0.2 min_weight: 0.3 max_weight: 0.7 - name: add_echo probability: 0.2 max_delay: 0.005 min_weight: 0.0 max_weight: 1.0 spectrogram: - name: scale_volume probability: 0.2 min_scaling: 0.0 max_scaling: 2.0 - name: warp probability: 0.2 delta: 0.04 - name: mask_time probability: 0.2 max_perc: 0.05 max_masks: 3 - name: mask_freq probability: 0.2 max_perc: 0.10 max_masks: 3