In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from typing import List, Optional

import pytorch_lightning as pl
from soundevent import data
from torch.utils.data import DataLoader

from batdetect2.data.labels import ClassMapper
from batdetect2.models.detectors import DetectorModel
from batdetect2.train.augmentations import (
 add_echo,
 select_random_subclip,
 warp_spectrogram,
)
from batdetect2.train.dataset import LabeledDataset, get_files
from batdetect2.train.preprocess import PreprocessingConfig

 from .autonotebook import tqdm as notebook_tqdm


## Training Datasets

In [3]:
data_dir = Path.cwd().parent / "example_data"

In [4]:
files = get_files(data_dir / "preprocessed")

In [5]:
train_dataset = LabeledDataset(files)

In [6]:
train_dataloader = DataLoader(
 train_dataset,
 shuffle=True,
 batch_size=32,
 num_workers=4,
)

In [7]:
# List of all possible classes
class Mapper(ClassMapper):
 class_labels = [
 "Eptesicus serotinus",
 "Myotis mystacinus",
 "Pipistrellus pipistrellus",
 "Rhinolophus ferrumequinum",
 "social",
 ]

 def encode(self, x: data.SoundEventAnnotation) -> Optional[str]:
 event_tag = data.find_tag(x.tags, "event")

 if event_tag.value == "Social":
 return "social"

 if event_tag.value != "Echolocation":
 # Ignore all other types of calls
 return None

 species_tag = data.find_tag(x.tags, "class")
 return species_tag.value

 def decode(self, class_name: str) -> List[data.Tag]:
 if class_name == "social":
 return [data.Tag(key="event", value="social")]

 return [data.Tag(key="class", value=class_name)]

In [8]:
detector = DetectorModel(class_mapper=Mapper())

In [9]:
trainer = pl.Trainer(
 limit_train_batches=100,
 max_epochs=2,
 log_every_n_steps=1,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
trainer.fit(detector, train_dataloaders=train_dataloader)


 | Name | Type | Params
------------------------------------------------
0 | feature_extractor | Net2DFast | 119 K 
1 | classifier | Conv2d | 54 
2 | bbox | Conv2d | 18 
------------------------------------------------
119 K Trainable params
448 Non-trainable params
119 K Total params
0.480 Total estimated model params size (MB)


Epoch 1: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.59it/s, v_num=13]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.54it/s, v_num=13]


In [11]:
clip_annotation = train_dataset.get_clip_annotation(0)

In [12]:
predictions = detector.compute_clip_predictions(clip_annotation.clip)

In [18]:
print(f"Num predicted soundevents: {len(predictions.sound_events)}")

Num predicted soundevents: 50
