batdetect2/bat_detect/detector/model_helpers.py
2023-01-25 19:17:38 +00:00

169 lines
4.8 KiB
Python

import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
class SelfAttention(nn.Module):
def __init__(self, ip_dim, att_dim):
super(SelfAttention, self).__init__()
# Note, does not encode position information (absolute or realtive)
self.temperature = 1.0
self.att_dim = att_dim
self.key_fun = nn.Linear(ip_dim, att_dim)
self.val_fun = nn.Linear(ip_dim, att_dim)
self.que_fun = nn.Linear(ip_dim, att_dim)
self.pro_fun = nn.Linear(att_dim, ip_dim)
def forward(self, x):
x = x.squeeze(2).permute(0, 2, 1)
kk = torch.matmul(
x, self.key_fun.weight.T
) + self.key_fun.bias.unsqueeze(0).unsqueeze(0)
qq = torch.matmul(
x, self.que_fun.weight.T
) + self.que_fun.bias.unsqueeze(0).unsqueeze(0)
vv = torch.matmul(
x, self.val_fun.weight.T
) + self.val_fun.bias.unsqueeze(0).unsqueeze(0)
kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / (
self.temperature * self.att_dim
)
att_weights = F.softmax(
kk_qq, 1
) # each col of each attention matrix sums to 1
att = torch.bmm(vv.permute(0, 2, 1), att_weights)
op = torch.matmul(
att.permute(0, 2, 1), self.pro_fun.weight.T
) + self.pro_fun.bias.unsqueeze(0).unsqueeze(0)
op = op.permute(0, 2, 1).unsqueeze(2)
return op
class ConvBlockDownCoordF(nn.Module):
def __init__(
self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1
):
super(ConvBlockDownCoordF, self).__init__()
self.coords = nn.Parameter(
torch.linspace(-1, 1, ip_height)[None, None, ..., None],
requires_grad=False,
)
self.conv = nn.Conv2d(
in_chn + 1,
out_chn,
kernel_size=k_size,
padding=pad_size,
stride=stride,
)
self.conv_bn = nn.BatchNorm2d(out_chn)
def forward(self, x):
freq_info = self.coords.repeat(x.shape[0], 1, 1, x.shape[3])
x = torch.cat((x, freq_info), 1)
x = F.max_pool2d(self.conv(x), 2, 2)
x = F.relu(self.conv_bn(x), inplace=True)
return x
class ConvBlockDownStandard(nn.Module):
def __init__(
self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1
):
super(ConvBlockDownStandard, self).__init__()
self.conv = nn.Conv2d(
in_chn,
out_chn,
kernel_size=k_size,
padding=pad_size,
stride=stride,
)
self.conv_bn = nn.BatchNorm2d(out_chn)
def forward(self, x):
x = F.max_pool2d(self.conv(x), 2, 2)
x = F.relu(self.conv_bn(x), inplace=True)
return x
class ConvBlockUpF(nn.Module):
def __init__(
self,
in_chn,
out_chn,
ip_height,
k_size=3,
pad_size=1,
up_mode="bilinear",
up_scale=(2, 2),
):
super(ConvBlockUpF, self).__init__()
self.up_scale = up_scale
self.up_mode = up_mode
self.coords = nn.Parameter(
torch.linspace(-1, 1, ip_height * up_scale[0])[
None, None, ..., None
],
requires_grad=False,
)
self.conv = nn.Conv2d(
in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size
)
self.conv_bn = nn.BatchNorm2d(out_chn)
def forward(self, x):
op = F.interpolate(
x,
size=(
x.shape[-2] * self.up_scale[0],
x.shape[-1] * self.up_scale[1],
),
mode=self.up_mode,
align_corners=False,
)
freq_info = self.coords.repeat(op.shape[0], 1, 1, op.shape[3])
op = torch.cat((op, freq_info), 1)
op = self.conv(op)
op = F.relu(self.conv_bn(op), inplace=True)
return op
class ConvBlockUpStandard(nn.Module):
def __init__(
self,
in_chn,
out_chn,
ip_height=None,
k_size=3,
pad_size=1,
up_mode="bilinear",
up_scale=(2, 2),
):
super(ConvBlockUpStandard, self).__init__()
self.up_scale = up_scale
self.up_mode = up_mode
self.conv = nn.Conv2d(
in_chn, out_chn, kernel_size=k_size, padding=pad_size
)
self.conv_bn = nn.BatchNorm2d(out_chn)
def forward(self, x):
op = F.interpolate(
x,
size=(
x.shape[-2] * self.up_scale[0],
x.shape[-1] * self.up_scale[1],
),
mode=self.up_mode,
align_corners=False,
)
op = self.conv(op)
op = F.relu(self.conv_bn(op), inplace=True)
return op