In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from typing import List, Optional
import torch

import pytorch_lightning as pl
from batdetect2.train.modules import DetectorModel
from batdetect2.train.augmentations import (
    add_echo,
    select_random_subclip,
    warp_spectrogram,
)
from batdetect2.train.dataset import LabeledDataset, get_files
from batdetect2.train.preprocess import PreprocessingConfig
from soundevent import data
import matplotlib.pyplot as plt
from soundevent.types import ClassMapper
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


## Training Datasets

In [3]:
data_dir = Path.cwd().parent / "example_data"

In [4]:
files = get_files(data_dir / "preprocessed")

In [5]:
train_dataset = LabeledDataset(files)

In [6]:
train_dataloader = DataLoader(
    train_dataset,
    shuffle=True,
    batch_size=32,
    num_workers=4,
)

In [7]:
# List of all possible classes
class Mapper(ClassMapper):
    class_labels = [
        "Eptesicus serotinus",
        "Myotis mystacinus",
        "Pipistrellus pipistrellus",
        "Rhinolophus ferrumequinum",
    ]

    def encode(self, x: data.SoundEventAnnotation) -> Optional[str]:
        event_tag = data.find_tag(x.tags, "event")

        if event_tag.value == "Social":
            return "social"

        if event_tag.value != "Echolocation":
            # Ignore all other types of calls
            return None

        species_tag = data.find_tag(x.tags, "class")
        return species_tag.value

    def decode(self, class_name: str) -> List[data.Tag]:
        if class_name == "social":
            return [data.Tag(key="event", value="social")]

        return [data.Tag(key="class", value=class_name)]

In [8]:
detector = DetectorModel(class_mapper=Mapper())

In [9]:
trainer = pl.Trainer(
    limit_train_batches=100,
    max_epochs=2,
    log_every_n_steps=1,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [10]:
trainer.fit(detector, train_dataloaders=train_dataloader)


  | Name              | Type      | Params | Mode 
--------------------------------------------------------
0 | feature_extractor | Net2DFast | 119 K  | train
1 | classifier        | Conv2d    | 54     | train
2 | bbox              | Conv2d    | 18     | train
--------------------------------------------------------
119 K     Trainable params
448       Non-trainable params
119 K     Total params
0.480     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|                                                                                                                                                                                                                                                    | 0/1 [00:00<?, ?it/s]class heatmap shape torch.Size([3, 4, 128, 512])
class props shape torch.Size([3, 5, 128, 512])


RuntimeError: The size of tensor a (5) must match the size of tensor b (4) at non-singleton dimension 1

In [None]:
clip_annotation = train_dataset.get_clip_annotation(0)

In [None]:
spec = detector.compute_spectrogram(clip_annotation.clip)
outputs = detector(torch.tensor(spec.values).unsqueeze(0).unsqueeze(0))

In [None]:
_, ax= plt.subplots(figsize=(15, 5))
spec.plot(ax=ax, add_colorbar=False)
ax.pcolormesh(spec.time, spec.frequency, outputs.detection_probs.detach().squeeze())

In [None]:
print(f"Num predicted soundevents: {len(predictions.sound_events)}")