mirror of
https://github.com/macaodha/batdetect2.git
synced 2025-06-29 14:41:58 +02:00
formatted with black and isort
This commit is contained in:
parent
bf8230b2f7
commit
9cb6b20949
4
app.py
4
app.py
@ -82,9 +82,7 @@ def generate_results_image(audio_file, anns):
|
|||||||
duration = audio.shape[0] / sampling_rate
|
duration = audio.shape[0] / sampling_rate
|
||||||
|
|
||||||
# generate spec
|
# generate spec
|
||||||
spec, spec_viz = au.generate_spectrogram(
|
spec, spec_viz = au.generate_spectrogram(audio, sampling_rate, params, True, False)
|
||||||
audio, sampling_rate, params, True, False
|
|
||||||
)
|
|
||||||
|
|
||||||
# create fig
|
# create fig
|
||||||
plt.close("all")
|
plt.close("all")
|
||||||
|
@ -3,9 +3,7 @@ import numpy as np
|
|||||||
|
|
||||||
def convert_int_to_freq(spec_ind, spec_height, min_freq, max_freq):
|
def convert_int_to_freq(spec_ind, spec_height, min_freq, max_freq):
|
||||||
spec_ind = spec_height - spec_ind
|
spec_ind = spec_height - spec_ind
|
||||||
return round(
|
return round((spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2)
|
||||||
(spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_spec_slices(spec, pred_nms, params):
|
def extract_spec_slices(spec, pred_nms, params):
|
||||||
@ -27,9 +25,7 @@ def extract_spec_slices(spec, pred_nms, params):
|
|||||||
for ff in range(len(pred_nms["det_probs"])):
|
for ff in range(len(pred_nms["det_probs"])):
|
||||||
x_start = int(np.maximum(0, x_pos_pad[ff]))
|
x_start = int(np.maximum(0, x_pos_pad[ff]))
|
||||||
x_end = int(
|
x_end = int(
|
||||||
np.minimum(
|
np.minimum(spec.shape[1] - 1, np.round(x_pos_pad[ff] + bb_width_pad[ff]))
|
||||||
spec.shape[1] - 1, np.round(x_pos_pad[ff] + bb_width_pad[ff])
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
slices.append(spec[:, x_start:x_end].astype(np.float16))
|
slices.append(spec[:, x_start:x_end].astype(np.float16))
|
||||||
return slices
|
return slices
|
||||||
@ -66,15 +62,11 @@ def get_feats(spec, pred_nms, params):
|
|||||||
|
|
||||||
feature_names = get_feature_names()
|
feature_names = get_feature_names()
|
||||||
num_detections = len(pred_nms["det_probs"])
|
num_detections = len(pred_nms["det_probs"])
|
||||||
features = (
|
features = np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1
|
||||||
np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1
|
|
||||||
)
|
|
||||||
|
|
||||||
for ff in range(num_detections):
|
for ff in range(num_detections):
|
||||||
x_start = int(np.maximum(0, x_pos[ff]))
|
x_start = int(np.maximum(0, x_pos[ff]))
|
||||||
x_end = int(
|
x_end = int(np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff])))
|
||||||
np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff]))
|
|
||||||
)
|
|
||||||
# y low is the lowest freq but it will have a higher value due to array starting at 0 at top
|
# y low is the lowest freq but it will have a higher value due to array starting at 0 at top
|
||||||
y_low = int(np.minimum(spec.shape[0] - 1, y_pos[ff]))
|
y_low = int(np.minimum(spec.shape[0] - 1, y_pos[ff]))
|
||||||
y_high = int(np.maximum(0, np.round(y_pos[ff] - bb_height[ff])))
|
y_high = int(np.maximum(0, np.round(y_pos[ff] - bb_height[ff])))
|
||||||
@ -126,8 +118,7 @@ def get_feats(spec, pred_nms, params):
|
|||||||
|
|
||||||
if ff > 0:
|
if ff > 0:
|
||||||
features[ff, 8] = round(
|
features[ff, 8] = round(
|
||||||
pred_nms["start_times"][ff]
|
pred_nms["start_times"][ff] - pred_nms["start_times"][ff - 1],
|
||||||
- pred_nms["start_times"][ff - 1],
|
|
||||||
5,
|
5,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,22 +20,18 @@ class SelfAttention(nn.Module):
|
|||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
x = x.squeeze(2).permute(0, 2, 1)
|
x = x.squeeze(2).permute(0, 2, 1)
|
||||||
|
|
||||||
kk = torch.matmul(
|
kk = torch.matmul(x, self.key_fun.weight.T) + self.key_fun.bias.unsqueeze(
|
||||||
x, self.key_fun.weight.T
|
0
|
||||||
) + self.key_fun.bias.unsqueeze(0).unsqueeze(0)
|
).unsqueeze(0)
|
||||||
qq = torch.matmul(
|
qq = torch.matmul(x, self.que_fun.weight.T) + self.que_fun.bias.unsqueeze(
|
||||||
x, self.que_fun.weight.T
|
0
|
||||||
) + self.que_fun.bias.unsqueeze(0).unsqueeze(0)
|
).unsqueeze(0)
|
||||||
vv = torch.matmul(
|
vv = torch.matmul(x, self.val_fun.weight.T) + self.val_fun.bias.unsqueeze(
|
||||||
x, self.val_fun.weight.T
|
0
|
||||||
) + self.val_fun.bias.unsqueeze(0).unsqueeze(0)
|
).unsqueeze(0)
|
||||||
|
|
||||||
kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / (
|
kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / (self.temperature * self.att_dim)
|
||||||
self.temperature * self.att_dim
|
att_weights = F.softmax(kk_qq, 1) # each col of each attention matrix sums to 1
|
||||||
)
|
|
||||||
att_weights = F.softmax(
|
|
||||||
kk_qq, 1
|
|
||||||
) # each col of each attention matrix sums to 1
|
|
||||||
att = torch.bmm(vv.permute(0, 2, 1), att_weights)
|
att = torch.bmm(vv.permute(0, 2, 1), att_weights)
|
||||||
|
|
||||||
op = torch.matmul(
|
op = torch.matmul(
|
||||||
@ -47,9 +43,7 @@ class SelfAttention(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class ConvBlockDownCoordF(nn.Module):
|
class ConvBlockDownCoordF(nn.Module):
|
||||||
def __init__(
|
def __init__(self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1):
|
||||||
self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1
|
|
||||||
):
|
|
||||||
super(ConvBlockDownCoordF, self).__init__()
|
super(ConvBlockDownCoordF, self).__init__()
|
||||||
self.coords = nn.Parameter(
|
self.coords = nn.Parameter(
|
||||||
torch.linspace(-1, 1, ip_height)[None, None, ..., None],
|
torch.linspace(-1, 1, ip_height)[None, None, ..., None],
|
||||||
@ -73,9 +67,7 @@ class ConvBlockDownCoordF(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class ConvBlockDownStandard(nn.Module):
|
class ConvBlockDownStandard(nn.Module):
|
||||||
def __init__(
|
def __init__(self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1):
|
||||||
self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1
|
|
||||||
):
|
|
||||||
super(ConvBlockDownStandard, self).__init__()
|
super(ConvBlockDownStandard, self).__init__()
|
||||||
self.conv = nn.Conv2d(
|
self.conv = nn.Conv2d(
|
||||||
in_chn,
|
in_chn,
|
||||||
@ -107,14 +99,10 @@ class ConvBlockUpF(nn.Module):
|
|||||||
self.up_scale = up_scale
|
self.up_scale = up_scale
|
||||||
self.up_mode = up_mode
|
self.up_mode = up_mode
|
||||||
self.coords = nn.Parameter(
|
self.coords = nn.Parameter(
|
||||||
torch.linspace(-1, 1, ip_height * up_scale[0])[
|
torch.linspace(-1, 1, ip_height * up_scale[0])[None, None, ..., None],
|
||||||
None, None, ..., None
|
|
||||||
],
|
|
||||||
requires_grad=False,
|
requires_grad=False,
|
||||||
)
|
)
|
||||||
self.conv = nn.Conv2d(
|
self.conv = nn.Conv2d(in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size)
|
||||||
in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size
|
|
||||||
)
|
|
||||||
self.conv_bn = nn.BatchNorm2d(out_chn)
|
self.conv_bn = nn.BatchNorm2d(out_chn)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
@ -148,9 +136,7 @@ class ConvBlockUpStandard(nn.Module):
|
|||||||
super(ConvBlockUpStandard, self).__init__()
|
super(ConvBlockUpStandard, self).__init__()
|
||||||
self.up_scale = up_scale
|
self.up_scale = up_scale
|
||||||
self.up_mode = up_mode
|
self.up_mode = up_mode
|
||||||
self.conv = nn.Conv2d(
|
self.conv = nn.Conv2d(in_chn, out_chn, kernel_size=k_size, padding=pad_size)
|
||||||
in_chn, out_chn, kernel_size=k_size, padding=pad_size
|
|
||||||
)
|
|
||||||
self.conv_bn = nn.BatchNorm2d(out_chn)
|
self.conv_bn = nn.BatchNorm2d(out_chn)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
|
@ -81,17 +81,13 @@ class Net2DFast(nn.Module):
|
|||||||
num_filts // 4, num_filts // 4, kernel_size=3, padding=1
|
num_filts // 4, num_filts // 4, kernel_size=3, padding=1
|
||||||
)
|
)
|
||||||
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
|
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
|
||||||
self.conv_size_op = nn.Conv2d(
|
self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0)
|
||||||
num_filts // 4, 2, kernel_size=1, padding=0
|
|
||||||
)
|
|
||||||
self.conv_classes_op = nn.Conv2d(
|
self.conv_classes_op = nn.Conv2d(
|
||||||
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
|
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.emb_dim > 0:
|
if self.emb_dim > 0:
|
||||||
self.conv_emb = nn.Conv2d(
|
self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0)
|
||||||
num_filts, self.emb_dim, kernel_size=1, padding=0
|
|
||||||
)
|
|
||||||
|
|
||||||
def forward(self, ip, return_feats=False):
|
def forward(self, ip, return_feats=False):
|
||||||
|
|
||||||
@ -198,17 +194,13 @@ class Net2DFastNoAttn(nn.Module):
|
|||||||
num_filts // 4, num_filts // 4, kernel_size=3, padding=1
|
num_filts // 4, num_filts // 4, kernel_size=3, padding=1
|
||||||
)
|
)
|
||||||
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
|
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
|
||||||
self.conv_size_op = nn.Conv2d(
|
self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0)
|
||||||
num_filts // 4, 2, kernel_size=1, padding=0
|
|
||||||
)
|
|
||||||
self.conv_classes_op = nn.Conv2d(
|
self.conv_classes_op = nn.Conv2d(
|
||||||
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
|
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.emb_dim > 0:
|
if self.emb_dim > 0:
|
||||||
self.conv_emb = nn.Conv2d(
|
self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0)
|
||||||
num_filts, self.emb_dim, kernel_size=1, padding=0
|
|
||||||
)
|
|
||||||
|
|
||||||
def forward(self, ip, return_feats=False):
|
def forward(self, ip, return_feats=False):
|
||||||
|
|
||||||
@ -312,17 +304,13 @@ class Net2DFastNoCoordConv(nn.Module):
|
|||||||
num_filts // 4, num_filts // 4, kernel_size=3, padding=1
|
num_filts // 4, num_filts // 4, kernel_size=3, padding=1
|
||||||
)
|
)
|
||||||
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
|
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
|
||||||
self.conv_size_op = nn.Conv2d(
|
self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0)
|
||||||
num_filts // 4, 2, kernel_size=1, padding=0
|
|
||||||
)
|
|
||||||
self.conv_classes_op = nn.Conv2d(
|
self.conv_classes_op = nn.Conv2d(
|
||||||
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
|
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.emb_dim > 0:
|
if self.emb_dim > 0:
|
||||||
self.conv_emb = nn.Conv2d(
|
self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0)
|
||||||
num_filts, self.emb_dim, kernel_size=1, padding=0
|
|
||||||
)
|
|
||||||
|
|
||||||
def forward(self, ip, return_feats=False):
|
def forward(self, ip, return_feats=False):
|
||||||
|
|
||||||
|
@ -22,9 +22,7 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
|
|||||||
params["experiment"] = os.path.join(exps_dir, now_str, "")
|
params["experiment"] = os.path.join(exps_dir, now_str, "")
|
||||||
params["model_file_name"] = os.path.join(params["experiment"], model_name)
|
params["model_file_name"] = os.path.join(params["experiment"], model_name)
|
||||||
params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "")
|
params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "")
|
||||||
params["op_im_dir_test"] = os.path.join(
|
params["op_im_dir_test"] = os.path.join(params["experiment"], "op_ims_test", "")
|
||||||
params["experiment"], "op_ims_test", ""
|
|
||||||
)
|
|
||||||
# params['notes'] = '' # can save notes about an experiment here
|
# params['notes'] = '' # can save notes about an experiment here
|
||||||
|
|
||||||
# spec parameters
|
# spec parameters
|
||||||
@ -36,12 +34,8 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
|
|||||||
) # in milliseconds, amount of time per stft time step
|
) # in milliseconds, amount of time per stft time step
|
||||||
params["fft_overlap"] = 0.75 # stft window overlap
|
params["fft_overlap"] = 0.75 # stft window overlap
|
||||||
|
|
||||||
params[
|
params["max_freq"] = 120000 # in Hz, everything above this will be discarded
|
||||||
"max_freq"
|
params["min_freq"] = 10000 # in Hz, everything below this will be discarded
|
||||||
] = 120000 # in Hz, everything above this will be discarded
|
|
||||||
params[
|
|
||||||
"min_freq"
|
|
||||||
] = 10000 # in Hz, everything below this will be discarded
|
|
||||||
|
|
||||||
params[
|
params[
|
||||||
"resize_factor"
|
"resize_factor"
|
||||||
@ -57,13 +51,9 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
|
|||||||
] = 32 # spectrogram should be divisible by this amount in width and height
|
] = 32 # spectrogram should be divisible by this amount in width and height
|
||||||
|
|
||||||
# spec processing params
|
# spec processing params
|
||||||
params[
|
params["denoise_spec_avg"] = True # removes the mean for each frequency band
|
||||||
"denoise_spec_avg"
|
|
||||||
] = True # removes the mean for each frequency band
|
|
||||||
params["scale_raw_audio"] = False # scales the raw audio to [-1, 1]
|
params["scale_raw_audio"] = False # scales the raw audio to [-1, 1]
|
||||||
params[
|
params["max_scale_spec"] = False # scales the spectrogram so that it is max 1
|
||||||
"max_scale_spec"
|
|
||||||
] = False # scales the spectrogram so that it is max 1
|
|
||||||
params["spec_scale"] = "pcen" # 'log', 'pcen', 'none'
|
params["spec_scale"] = "pcen" # 'log', 'pcen', 'none'
|
||||||
|
|
||||||
# detection params
|
# detection params
|
||||||
@ -83,21 +73,13 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
|
|||||||
params["target_sigma"] = 2.0
|
params["target_sigma"] = 2.0
|
||||||
|
|
||||||
# augmentation params
|
# augmentation params
|
||||||
params[
|
params["aug_prob"] = 0.20 # augmentations will be performed with this probability
|
||||||
"aug_prob"
|
|
||||||
] = 0.20 # augmentations will be performed with this probability
|
|
||||||
params["augment_at_train"] = True
|
params["augment_at_train"] = True
|
||||||
params["augment_at_train_combine"] = True
|
params["augment_at_train_combine"] = True
|
||||||
params[
|
params["echo_max_delay"] = 0.005 # simulate echo by adding copy of raw audio
|
||||||
"echo_max_delay"
|
|
||||||
] = 0.005 # simulate echo by adding copy of raw audio
|
|
||||||
params["stretch_squeeze_delta"] = 0.04 # stretch or squeeze spec
|
params["stretch_squeeze_delta"] = 0.04 # stretch or squeeze spec
|
||||||
params[
|
params["mask_max_time_perc"] = 0.05 # max mask size - here percentage, not ideal
|
||||||
"mask_max_time_perc"
|
params["mask_max_freq_perc"] = 0.10 # max mask size - here percentage, not ideal
|
||||||
] = 0.05 # max mask size - here percentage, not ideal
|
|
||||||
params[
|
|
||||||
"mask_max_freq_perc"
|
|
||||||
] = 0.10 # max mask size - here percentage, not ideal
|
|
||||||
params[
|
params[
|
||||||
"spec_amp_scaling"
|
"spec_amp_scaling"
|
||||||
] = 2.0 # multiply the "volume" by 0:X times current amount
|
] = 2.0 # multiply the "volume" by 0:X times current amount
|
||||||
@ -113,16 +95,12 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
|
|||||||
|
|
||||||
# loss params
|
# loss params
|
||||||
params["train_loss"] = "focal" # mse or focal
|
params["train_loss"] = "focal" # mse or focal
|
||||||
params[
|
params["det_loss_weight"] = 1.0 # weight for the detection part of the loss
|
||||||
"det_loss_weight"
|
|
||||||
] = 1.0 # weight for the detection part of the loss
|
|
||||||
params["size_loss_weight"] = 0.1 # weight for the bbox size loss
|
params["size_loss_weight"] = 0.1 # weight for the bbox size loss
|
||||||
params["class_loss_weight"] = 2.0 # weight for the classification loss
|
params["class_loss_weight"] = 2.0 # weight for the classification loss
|
||||||
params["individual_loss_weight"] = 0.0 # not used
|
params["individual_loss_weight"] = 0.0 # not used
|
||||||
if params["individual_loss_weight"] == 0.0:
|
if params["individual_loss_weight"] == 0.0:
|
||||||
params[
|
params["emb_dim"] = 0 # number of dimensions used for individual id embedding
|
||||||
"emb_dim"
|
|
||||||
] = 0 # number of dimensions used for individual id embedding
|
|
||||||
else:
|
else:
|
||||||
params["emb_dim"] = 3
|
params["emb_dim"] = 3
|
||||||
|
|
||||||
|
@ -24,9 +24,7 @@ def run_nms(outputs, params, sampling_rate):
|
|||||||
pred_size = outputs["pred_size"] # box size
|
pred_size = outputs["pred_size"] # box size
|
||||||
|
|
||||||
pred_det_nms = non_max_suppression(pred_det, params["nms_kernel_size"])
|
pred_det_nms = non_max_suppression(pred_det, params["nms_kernel_size"])
|
||||||
freq_rescale = (params["max_freq"] - params["min_freq"]) / pred_det.shape[
|
freq_rescale = (params["max_freq"] - params["min_freq"]) / pred_det.shape[-2]
|
||||||
-2
|
|
||||||
]
|
|
||||||
|
|
||||||
# NOTE there will be small differences depending on which sampling rate is chosen
|
# NOTE there will be small differences depending on which sampling rate is chosen
|
||||||
# as we are choosing the same sampling rate for the entire batch
|
# as we are choosing the same sampling rate for the entire batch
|
||||||
@ -62,8 +60,7 @@ def run_nms(outputs, params, sampling_rate):
|
|||||||
params["fft_overlap"],
|
params["fft_overlap"],
|
||||||
)
|
)
|
||||||
pred["end_times"] = x_coords_to_time(
|
pred["end_times"] = x_coords_to_time(
|
||||||
(pred["x_pos"].float() + pred["bb_width"])
|
(pred["x_pos"].float() + pred["bb_width"]) / params["resize_factor"],
|
||||||
/ params["resize_factor"],
|
|
||||||
sampling_rate[ii].item(),
|
sampling_rate[ii].item(),
|
||||||
params["fft_win_length"],
|
params["fft_win_length"],
|
||||||
params["fft_overlap"],
|
params["fft_overlap"],
|
||||||
@ -71,9 +68,7 @@ def run_nms(outputs, params, sampling_rate):
|
|||||||
pred["low_freqs"] = (
|
pred["low_freqs"] = (
|
||||||
pred_size[ii].shape[1] - pred["y_pos"].float()
|
pred_size[ii].shape[1] - pred["y_pos"].float()
|
||||||
) * freq_rescale + params["min_freq"]
|
) * freq_rescale + params["min_freq"]
|
||||||
pred["high_freqs"] = (
|
pred["high_freqs"] = pred["low_freqs"] + pred["bb_height"] * freq_rescale
|
||||||
pred["low_freqs"] + pred["bb_height"] * freq_rescale
|
|
||||||
)
|
|
||||||
|
|
||||||
# extract the per class votes
|
# extract the per class votes
|
||||||
if "pred_class" in outputs:
|
if "pred_class" in outputs:
|
||||||
|
@ -207,9 +207,7 @@ def load_sonobat_preds(dataset, id, sb_meta, set_class_name=None):
|
|||||||
ann_c["class"] = file_res[id]["species_1"]
|
ann_c["class"] = file_res[id]["species_1"]
|
||||||
else:
|
else:
|
||||||
ann_c["class"] = set_class_name
|
ann_c["class"] = set_class_name
|
||||||
ann_c["start_time"] = np.round(
|
ann_c["start_time"] = np.round(da_c.iloc[aa]["TimeInFile"] / 1000.0, 5)
|
||||||
da_c.iloc[aa]["TimeInFile"] / 1000.0, 5
|
|
||||||
)
|
|
||||||
ann_c["end_time"] = np.round(
|
ann_c["end_time"] = np.round(
|
||||||
ann_c["start_time"] + da_c.iloc[aa]["CallDuration"] / 1000.0, 5
|
ann_c["start_time"] + da_c.iloc[aa]["CallDuration"] / 1000.0, 5
|
||||||
)
|
)
|
||||||
@ -267,9 +265,7 @@ def assign_to_gt(gt, pred, iou_thresh):
|
|||||||
iou_m = np.zeros((num_preds, num_gts))
|
iou_m = np.zeros((num_preds, num_gts))
|
||||||
for ii in range(num_preds):
|
for ii in range(num_preds):
|
||||||
for jj in range(num_gts):
|
for jj in range(num_gts):
|
||||||
iou_m[ii, jj] = bb_overlap(
|
iou_m[ii, jj] = bb_overlap(gt["annotation"][jj], pred["annotation"][ii])
|
||||||
gt["annotation"][jj], pred["annotation"][ii]
|
|
||||||
)
|
|
||||||
|
|
||||||
# greedily assign detections to ground truths
|
# greedily assign detections to ground truths
|
||||||
# needs to be greater than some threshold and we cannot assign GT
|
# needs to be greater than some threshold and we cannot assign GT
|
||||||
@ -278,9 +274,7 @@ def assign_to_gt(gt, pred, iou_thresh):
|
|||||||
for jj in range(num_gts):
|
for jj in range(num_gts):
|
||||||
max_iou = np.argmax(iou_m[:, jj])
|
max_iou = np.argmax(iou_m[:, jj])
|
||||||
if iou_m[max_iou, jj] > iou_thresh:
|
if iou_m[max_iou, jj] > iou_thresh:
|
||||||
pred["annotation"][max_iou]["class"] = gt["annotation"][jj][
|
pred["annotation"][max_iou]["class"] = gt["annotation"][jj]["class"]
|
||||||
"class"
|
|
||||||
]
|
|
||||||
iou_m[max_iou, :] = -1.0
|
iou_m[max_iou, :] = -1.0
|
||||||
|
|
||||||
return pred
|
return pred
|
||||||
@ -290,25 +284,17 @@ def parse_data(data, class_names, non_event_classes, is_pred=False):
|
|||||||
class_names_all = class_names + non_event_classes
|
class_names_all = class_names + non_event_classes
|
||||||
|
|
||||||
data["class_names"] = np.array([aa["class"] for aa in data["annotation"]])
|
data["class_names"] = np.array([aa["class"] for aa in data["annotation"]])
|
||||||
data["start_times"] = np.array(
|
data["start_times"] = np.array([aa["start_time"] for aa in data["annotation"]])
|
||||||
[aa["start_time"] for aa in data["annotation"]]
|
|
||||||
)
|
|
||||||
data["end_times"] = np.array([aa["end_time"] for aa in data["annotation"]])
|
data["end_times"] = np.array([aa["end_time"] for aa in data["annotation"]])
|
||||||
data["high_freqs"] = np.array(
|
data["high_freqs"] = np.array([float(aa["high_freq"]) for aa in data["annotation"]])
|
||||||
[float(aa["high_freq"]) for aa in data["annotation"]]
|
data["low_freqs"] = np.array([float(aa["low_freq"]) for aa in data["annotation"]])
|
||||||
)
|
|
||||||
data["low_freqs"] = np.array(
|
|
||||||
[float(aa["low_freq"]) for aa in data["annotation"]]
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_pred:
|
if is_pred:
|
||||||
# when loading predictions
|
# when loading predictions
|
||||||
data["det_probs"] = np.array(
|
data["det_probs"] = np.array(
|
||||||
[float(aa["det_prob"]) for aa in data["annotation"]]
|
[float(aa["det_prob"]) for aa in data["annotation"]]
|
||||||
)
|
)
|
||||||
data["class_probs"] = np.zeros(
|
data["class_probs"] = np.zeros((len(class_names) + 1, len(data["annotation"])))
|
||||||
(len(class_names) + 1, len(data["annotation"]))
|
|
||||||
)
|
|
||||||
data["class_ids"] = np.array(
|
data["class_ids"] = np.array(
|
||||||
[class_names_all.index(aa["class"]) for aa in data["annotation"]]
|
[class_names_all.index(aa["class"]) for aa in data["annotation"]]
|
||||||
).astype(np.int32)
|
).astype(np.int32)
|
||||||
@ -334,8 +320,7 @@ def load_gt_data(datasets, events_of_interest, class_names, classes_to_ignore):
|
|||||||
[dd], events_of_interest=events_of_interest, verbose=True
|
[dd], events_of_interest=events_of_interest, verbose=True
|
||||||
)
|
)
|
||||||
gt_dataset = [
|
gt_dataset = [
|
||||||
parse_data(gg, class_names, classes_to_ignore, False)
|
parse_data(gg, class_names, classes_to_ignore, False) for gg in gt_dataset
|
||||||
for gg in gt_dataset
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for gt in gt_dataset:
|
for gt in gt_dataset:
|
||||||
@ -371,9 +356,7 @@ def eval_rf_model(clf, pred, un_train_class, num_classes):
|
|||||||
# stores the prediction in place
|
# stores the prediction in place
|
||||||
if pred["feats"].shape[0] > 0:
|
if pred["feats"].shape[0] > 0:
|
||||||
pred["class_probs"] = np.zeros((num_classes, pred["feats"].shape[0]))
|
pred["class_probs"] = np.zeros((num_classes, pred["feats"].shape[0]))
|
||||||
pred["class_probs"][un_train_class, :] = clf.predict_proba(
|
pred["class_probs"][un_train_class, :] = clf.predict_proba(pred["feats"]).T
|
||||||
pred["feats"]
|
|
||||||
).T
|
|
||||||
pred["det_probs"] = pred["class_probs"][:-1, :].sum(0)
|
pred["det_probs"] = pred["class_probs"][:-1, :].sum(0)
|
||||||
else:
|
else:
|
||||||
pred["class_probs"] = np.zeros((num_classes, 0))
|
pred["class_probs"] = np.zeros((num_classes, 0))
|
||||||
@ -474,12 +457,8 @@ if __name__ == "__main__":
|
|||||||
help="Output directory for plots",
|
help="Output directory for plots",
|
||||||
)
|
)
|
||||||
parser.add_argument("data_dir", type=str, help="Path to root of datasets")
|
parser.add_argument("data_dir", type=str, help="Path to root of datasets")
|
||||||
parser.add_argument(
|
parser.add_argument("ann_dir", type=str, help="Path to extracted annotations")
|
||||||
"ann_dir", type=str, help="Path to extracted annotations"
|
parser.add_argument("bd_model_path", type=str, help="Path to BatDetect model")
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"bd_model_path", type=str, help="Path to BatDetect model"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--test_file",
|
"--test_file",
|
||||||
type=str,
|
type=str,
|
||||||
@ -519,9 +498,7 @@ if __name__ == "__main__":
|
|||||||
default="",
|
default="",
|
||||||
help="Text to add as title of plots",
|
help="Text to add as title of plots",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument("--rand_seed", type=int, default=2001, help="Random seed")
|
||||||
"--rand_seed", type=int, default=2001, help="Random seed"
|
|
||||||
)
|
|
||||||
args = vars(parser.parse_args())
|
args = vars(parser.parse_args())
|
||||||
|
|
||||||
np.random.seed(args["rand_seed"])
|
np.random.seed(args["rand_seed"])
|
||||||
@ -554,9 +531,7 @@ if __name__ == "__main__":
|
|||||||
test_dict["dataset_name"] = args["test_file"].replace(".json", "")
|
test_dict["dataset_name"] = args["test_file"].replace(".json", "")
|
||||||
test_dict["is_test"] = True
|
test_dict["is_test"] = True
|
||||||
test_dict["is_binary"] = True
|
test_dict["is_binary"] = True
|
||||||
test_dict["ann_path"] = os.path.join(
|
test_dict["ann_path"] = os.path.join(args["ann_dir"], args["test_file"])
|
||||||
args["ann_dir"], args["test_file"]
|
|
||||||
)
|
|
||||||
test_dict["wav_path"] = args["data_dir"]
|
test_dict["wav_path"] = args["data_dir"]
|
||||||
test_sets = [test_dict]
|
test_sets = [test_dict]
|
||||||
|
|
||||||
@ -607,9 +582,7 @@ if __name__ == "__main__":
|
|||||||
for ii, gt in enumerate(gt_test):
|
for ii, gt in enumerate(gt_test):
|
||||||
sb_pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta)
|
sb_pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta)
|
||||||
if sb_pred["class_name"] != "":
|
if sb_pred["class_name"] != "":
|
||||||
sb_pred = parse_data(
|
sb_pred = parse_data(sb_pred, class_names, classes_to_ignore, True)
|
||||||
sb_pred, class_names, classes_to_ignore, True
|
|
||||||
)
|
|
||||||
sb_pred["class_probs"][
|
sb_pred["class_probs"][
|
||||||
sb_pred["class_ids"],
|
sb_pred["class_ids"],
|
||||||
np.arange(sb_pred["class_probs"].shape[1]),
|
np.arange(sb_pred["class_probs"].shape[1]),
|
||||||
@ -644,9 +617,7 @@ if __name__ == "__main__":
|
|||||||
x_train = []
|
x_train = []
|
||||||
y_train = []
|
y_train = []
|
||||||
for gt in gt_train:
|
for gt in gt_train:
|
||||||
pred = load_sonobat_preds(
|
pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta, "Not Bat")
|
||||||
gt["dataset_name"], gt["id"], sb_meta, "Not Bat"
|
|
||||||
)
|
|
||||||
|
|
||||||
if len(pred["annotation"]) > 0:
|
if len(pred["annotation"]) > 0:
|
||||||
# compute detection overlap with ground truth to determine which are the TP detections
|
# compute detection overlap with ground truth to determine which are the TP detections
|
||||||
@ -663,9 +634,7 @@ if __name__ == "__main__":
|
|||||||
# run the model on the test set
|
# run the model on the test set
|
||||||
preds_sb_rf = []
|
preds_sb_rf = []
|
||||||
for gt in gt_test:
|
for gt in gt_test:
|
||||||
pred = load_sonobat_preds(
|
pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta, "Not Bat")
|
||||||
gt["dataset_name"], gt["id"], sb_meta, "Not Bat"
|
|
||||||
)
|
|
||||||
pred = parse_data(pred, class_names, classes_to_ignore, True)
|
pred = parse_data(pred, class_names, classes_to_ignore, True)
|
||||||
pred = eval_rf_model(clf_sb, pred, un_train_class, num_classes)
|
pred = eval_rf_model(clf_sb, pred, un_train_class, num_classes)
|
||||||
preds_sb_rf.append(pred)
|
preds_sb_rf.append(pred)
|
||||||
@ -697,9 +666,7 @@ if __name__ == "__main__":
|
|||||||
x_train = []
|
x_train = []
|
||||||
y_train = []
|
y_train = []
|
||||||
for gt in gt_train:
|
for gt in gt_train:
|
||||||
pred = load_tadarida_pred(
|
pred = load_tadarida_pred(args["td_ip_dir"], gt["dataset_name"], gt["id"])
|
||||||
args["td_ip_dir"], gt["dataset_name"], gt["id"]
|
|
||||||
)
|
|
||||||
# compute detection overlap with ground truth to determine which are the TP detections
|
# compute detection overlap with ground truth to determine which are the TP detections
|
||||||
assign_to_gt(gt, pred, args["iou_thresh"])
|
assign_to_gt(gt, pred, args["iou_thresh"])
|
||||||
pred = parse_data(pred, class_names, classes_to_ignore, True)
|
pred = parse_data(pred, class_names, classes_to_ignore, True)
|
||||||
@ -714,9 +681,7 @@ if __name__ == "__main__":
|
|||||||
# run the model on the test set
|
# run the model on the test set
|
||||||
preds_td = []
|
preds_td = []
|
||||||
for gt in gt_test:
|
for gt in gt_test:
|
||||||
pred = load_tadarida_pred(
|
pred = load_tadarida_pred(args["td_ip_dir"], gt["dataset_name"], gt["id"])
|
||||||
args["td_ip_dir"], gt["dataset_name"], gt["id"]
|
|
||||||
)
|
|
||||||
pred = parse_data(pred, class_names, classes_to_ignore, True)
|
pred = parse_data(pred, class_names, classes_to_ignore, True)
|
||||||
pred = eval_rf_model(clf_td, pred, un_train_class, num_classes)
|
pred = eval_rf_model(clf_td, pred, un_train_class, num_classes)
|
||||||
preds_td.append(pred)
|
preds_td.append(pred)
|
||||||
|
@ -28,9 +28,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
print(info_str)
|
print(info_str)
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument("audio_path", type=str, help="Input directory for audio")
|
||||||
"audio_path", type=str, help="Input directory for audio"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"train_ann_path",
|
"train_ann_path",
|
||||||
type=str,
|
type=str,
|
||||||
@ -41,9 +39,7 @@ if __name__ == "__main__":
|
|||||||
type=str,
|
type=str,
|
||||||
help="Path to where test annotation file is stored",
|
help="Path to where test annotation file is stored",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument("model_path", type=str, help="Path to pretrained model")
|
||||||
"model_path", type=str, help="Path to pretrained model"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--op_model_name",
|
"--op_model_name",
|
||||||
type=str,
|
type=str,
|
||||||
@ -82,9 +78,7 @@ if __name__ == "__main__":
|
|||||||
params["device"] = "cuda"
|
params["device"] = "cuda"
|
||||||
else:
|
else:
|
||||||
params["device"] = "cpu"
|
params["device"] = "cpu"
|
||||||
print(
|
print("\nNote, this will be a lot faster if you use computer with a GPU.\n")
|
||||||
"\nNote, this will be a lot faster if you use computer with a GPU.\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
print("\nAudio directory: " + args["audio_path"])
|
print("\nAudio directory: " + args["audio_path"])
|
||||||
print("Train file: " + args["train_ann_path"])
|
print("Train file: " + args["train_ann_path"])
|
||||||
@ -98,9 +92,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
if args["train_from_scratch"]:
|
if args["train_from_scratch"]:
|
||||||
print(
|
print("\nTraining model from scratch i.e. not using pretrained weights")
|
||||||
"\nTraining model from scratch i.e. not using pretrained weights"
|
|
||||||
)
|
|
||||||
model, params_train = du.load_model(args["model_path"], False)
|
model, params_train = du.load_model(args["model_path"], False)
|
||||||
else:
|
else:
|
||||||
model, params_train = du.load_model(args["model_path"], True)
|
model, params_train = du.load_model(args["model_path"], True)
|
||||||
@ -137,17 +129,13 @@ if __name__ == "__main__":
|
|||||||
data_train,
|
data_train,
|
||||||
params["class_names"],
|
params["class_names"],
|
||||||
params["class_inv_freq"],
|
params["class_inv_freq"],
|
||||||
) = tu.load_set_of_anns(
|
) = tu.load_set_of_anns(train_sets, classes_to_ignore, params["events_of_interest"])
|
||||||
train_sets, classes_to_ignore, params["events_of_interest"]
|
|
||||||
)
|
|
||||||
print("Number of files", len(data_train))
|
print("Number of files", len(data_train))
|
||||||
|
|
||||||
params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping(
|
params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping(
|
||||||
params["class_names"]
|
params["class_names"]
|
||||||
)
|
)
|
||||||
params["class_names_short"] = tu.get_short_class_names(
|
params["class_names_short"] = tu.get_short_class_names(params["class_names"])
|
||||||
params["class_names"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# load test annotations
|
# load test annotations
|
||||||
test_sets = []
|
test_sets = []
|
||||||
@ -230,9 +218,7 @@ if __name__ == "__main__":
|
|||||||
param.requires_grad = False
|
param.requires_grad = False
|
||||||
|
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
|
optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
|
||||||
scheduler = CosineAnnealingLR(
|
scheduler = CosineAnnealingLR(optimizer, params["num_epochs"] * len(train_loader))
|
||||||
optimizer, params["num_epochs"] * len(train_loader)
|
|
||||||
)
|
|
||||||
if params["train_loss"] == "mse":
|
if params["train_loss"] == "mse":
|
||||||
det_criterion = losses.mse_loss
|
det_criterion = losses.mse_loss
|
||||||
elif params["train_loss"] == "focal":
|
elif params["train_loss"] == "focal":
|
||||||
@ -307,9 +293,7 @@ if __name__ == "__main__":
|
|||||||
test_plt_class.update_and_save(
|
test_plt_class.update_and_save(
|
||||||
epoch, [rs["avg_prec"] for rs in test_res["class_pr"]]
|
epoch, [rs["avg_prec"] for rs in test_res["class_pr"]]
|
||||||
)
|
)
|
||||||
pu.plot_pr_curve_class(
|
pu.plot_pr_curve_class(params["experiment"], "test_pr", "test_pr", test_res)
|
||||||
params["experiment"], "test_pr", "test_pr", test_res
|
|
||||||
)
|
|
||||||
|
|
||||||
# save finetuned model
|
# save finetuned model
|
||||||
print("saving model to: " + params["model_file_name"])
|
print("saving model to: " + params["model_file_name"])
|
||||||
|
@ -58,12 +58,8 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
print(info_str)
|
print(info_str)
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument("dataset_name", type=str, help="Name to call your dataset")
|
||||||
"dataset_name", type=str, help="Name to call your dataset"
|
parser.add_argument("audio_dir", type=str, help="Input directory for audio")
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"audio_dir", type=str, help="Input directory for audio"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"ann_dir",
|
"ann_dir",
|
||||||
type=str,
|
type=str,
|
||||||
@ -151,14 +147,10 @@ if __name__ == "__main__":
|
|||||||
test_files = load_file_names(args["test_file"])
|
test_files = load_file_names(args["test_file"])
|
||||||
file_names_all = [dd["id"] for dd in data_all]
|
file_names_all = [dd["id"] for dd in data_all]
|
||||||
train_inds = [
|
train_inds = [
|
||||||
file_names_all.index(ff)
|
file_names_all.index(ff) for ff in train_files if ff in file_names_all
|
||||||
for ff in train_files
|
|
||||||
if ff in file_names_all
|
|
||||||
]
|
]
|
||||||
test_inds = [
|
test_inds = [
|
||||||
file_names_all.index(ff)
|
file_names_all.index(ff) for ff in test_files if ff in file_names_all
|
||||||
for ff in test_files
|
|
||||||
if ff in file_names_all
|
|
||||||
]
|
]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -181,9 +173,7 @@ if __name__ == "__main__":
|
|||||||
op_name_train = op_name + "_TRAIN.json"
|
op_name_train = op_name + "_TRAIN.json"
|
||||||
op_name_test = op_name + "_TEST.json"
|
op_name_test = op_name + "_TEST.json"
|
||||||
|
|
||||||
class_un_train = print_dataset_stats(
|
class_un_train = print_dataset_stats(data_train, "Train", classes_to_ignore)
|
||||||
data_train, "Train", classes_to_ignore
|
|
||||||
)
|
|
||||||
class_un_test = print_dataset_stats(data_test, "Test", classes_to_ignore)
|
class_un_test = print_dataset_stats(data_test, "Test", classes_to_ignore)
|
||||||
|
|
||||||
if len(data_train) > 0 and len(data_test) > 0:
|
if len(data_train) > 0 and len(data_test) > 0:
|
||||||
|
@ -73,9 +73,7 @@ def generate_gt_heatmaps(spec_op_shape, sampling_rate, ann, params):
|
|||||||
y_2d_det = np.zeros((1, op_height, op_width), dtype=np.float32)
|
y_2d_det = np.zeros((1, op_height, op_width), dtype=np.float32)
|
||||||
y_2d_size = np.zeros((2, op_height, op_width), dtype=np.float32)
|
y_2d_size = np.zeros((2, op_height, op_width), dtype=np.float32)
|
||||||
# num classes and "background" class
|
# num classes and "background" class
|
||||||
y_2d_classes = np.zeros(
|
y_2d_classes = np.zeros((num_classes + 1, op_height, op_width), dtype=np.float32)
|
||||||
(num_classes + 1, op_height, op_width), dtype=np.float32
|
|
||||||
)
|
|
||||||
|
|
||||||
# create 2D ground truth heatmaps
|
# create 2D ground truth heatmaps
|
||||||
for ii in valid_inds:
|
for ii in valid_inds:
|
||||||
@ -128,8 +126,7 @@ def draw_gaussian(heatmap, center, sigmax, sigmay=None):
|
|||||||
x0 = y0 = size // 2
|
x0 = y0 = size // 2
|
||||||
# g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
|
# g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
|
||||||
g = np.exp(
|
g = np.exp(
|
||||||
-((x - x0) ** 2) / (2 * sigmax**2)
|
-((x - x0) ** 2) / (2 * sigmax**2) - ((y - y0) ** 2) / (2 * sigmay**2)
|
||||||
- ((y - y0) ** 2) / (2 * sigmay**2)
|
|
||||||
)
|
)
|
||||||
g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
|
g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
|
||||||
g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
|
g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
|
||||||
@ -278,9 +275,7 @@ def combine_audio_aug(audio, sampling_rate, ann, audio2, sampling_rate2, ann2):
|
|||||||
# when combining calls from different files, assume they come from different individuals
|
# when combining calls from different files, assume they come from different individuals
|
||||||
if kk == "individual_ids":
|
if kk == "individual_ids":
|
||||||
if (ann[kk] > -1).sum() > 0:
|
if (ann[kk] > -1).sum() > 0:
|
||||||
ann2[kk][ann2[kk] > -1] += (
|
ann2[kk][ann2[kk] > -1] += np.max(ann[kk][ann[kk] > -1]) + 1
|
||||||
np.max(ann[kk][ann[kk] > -1]) + 1
|
|
||||||
)
|
|
||||||
|
|
||||||
if (kk != "class_id_file") and (kk != "annotated"):
|
if (kk != "class_id_file") and (kk != "annotated"):
|
||||||
ann[kk] = np.hstack((ann[kk], ann2[kk]))[inds]
|
ann[kk] = np.hstack((ann[kk], ann2[kk]))[inds]
|
||||||
@ -289,9 +284,7 @@ def combine_audio_aug(audio, sampling_rate, ann, audio2, sampling_rate2, ann2):
|
|||||||
|
|
||||||
|
|
||||||
class AudioLoader(torch.utils.data.Dataset):
|
class AudioLoader(torch.utils.data.Dataset):
|
||||||
def __init__(
|
def __init__(self, data_anns_ip, params, dataset_name=None, is_train=False):
|
||||||
self, data_anns_ip, params, dataset_name=None, is_train=False
|
|
||||||
):
|
|
||||||
|
|
||||||
self.data_anns = []
|
self.data_anns = []
|
||||||
self.is_train = is_train
|
self.is_train = is_train
|
||||||
@ -314,9 +307,7 @@ class AudioLoader(torch.utils.data.Dataset):
|
|||||||
|
|
||||||
# convert class name into class label
|
# convert class name into class label
|
||||||
if aa["class"] in self.params["class_names"]:
|
if aa["class"] in self.params["class_names"]:
|
||||||
aa["class_id"] = self.params["class_names"].index(
|
aa["class_id"] = self.params["class_names"].index(aa["class"])
|
||||||
aa["class"]
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
aa["class_id"] = -1
|
aa["class_id"] = -1
|
||||||
|
|
||||||
@ -324,12 +315,8 @@ class AudioLoader(torch.utils.data.Dataset):
|
|||||||
filtered_annotations.append(aa)
|
filtered_annotations.append(aa)
|
||||||
|
|
||||||
dd["annotation"] = filtered_annotations
|
dd["annotation"] = filtered_annotations
|
||||||
dd["start_times"] = np.array(
|
dd["start_times"] = np.array([aa["start_time"] for aa in dd["annotation"]])
|
||||||
[aa["start_time"] for aa in dd["annotation"]]
|
dd["end_times"] = np.array([aa["end_time"] for aa in dd["annotation"]])
|
||||||
)
|
|
||||||
dd["end_times"] = np.array(
|
|
||||||
[aa["end_time"] for aa in dd["annotation"]]
|
|
||||||
)
|
|
||||||
dd["high_freqs"] = np.array(
|
dd["high_freqs"] = np.array(
|
||||||
[float(aa["high_freq"]) for aa in dd["annotation"]]
|
[float(aa["high_freq"]) for aa in dd["annotation"]]
|
||||||
)
|
)
|
||||||
@ -406,18 +393,12 @@ class AudioLoader(torch.utils.data.Dataset):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if audio_raw.shape[0] - length_samples > 0:
|
if audio_raw.shape[0] - length_samples > 0:
|
||||||
sample_crop = np.random.randint(
|
sample_crop = np.random.randint(audio_raw.shape[0] - length_samples)
|
||||||
audio_raw.shape[0] - length_samples
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
sample_crop = 0
|
sample_crop = 0
|
||||||
audio_raw = audio_raw[sample_crop : sample_crop + length_samples]
|
audio_raw = audio_raw[sample_crop : sample_crop + length_samples]
|
||||||
ann["start_times"] = ann["start_times"] - sample_crop / float(
|
ann["start_times"] = ann["start_times"] - sample_crop / float(sampling_rate)
|
||||||
sampling_rate
|
ann["end_times"] = ann["end_times"] - sample_crop / float(sampling_rate)
|
||||||
)
|
|
||||||
ann["end_times"] = ann["end_times"] - sample_crop / float(
|
|
||||||
sampling_rate
|
|
||||||
)
|
|
||||||
|
|
||||||
# pad audio
|
# pad audio
|
||||||
if self.is_train:
|
if self.is_train:
|
||||||
@ -496,9 +477,7 @@ class AudioLoader(torch.utils.data.Dataset):
|
|||||||
spec = scale_vol_aug(spec, self.params)
|
spec = scale_vol_aug(spec, self.params)
|
||||||
|
|
||||||
if np.random.random() < self.params["aug_prob"]:
|
if np.random.random() < self.params["aug_prob"]:
|
||||||
spec = warp_spec_aug(
|
spec = warp_spec_aug(spec, ann, self.return_spec_for_viz, self.params)
|
||||||
spec, ann, self.return_spec_for_viz, self.params
|
|
||||||
)
|
|
||||||
|
|
||||||
if np.random.random() < self.params["aug_prob"]:
|
if np.random.random() < self.params["aug_prob"]:
|
||||||
spec = mask_time_aug(spec, self.params)
|
spec = mask_time_aug(spec, self.params)
|
||||||
@ -509,9 +488,7 @@ class AudioLoader(torch.utils.data.Dataset):
|
|||||||
outputs = {}
|
outputs = {}
|
||||||
outputs["spec"] = spec
|
outputs["spec"] = spec
|
||||||
if self.return_spec_for_viz:
|
if self.return_spec_for_viz:
|
||||||
outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze(
|
outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze(0)
|
||||||
0
|
|
||||||
)
|
|
||||||
|
|
||||||
# create ground truth heatmaps
|
# create ground truth heatmaps
|
||||||
(
|
(
|
||||||
@ -519,9 +496,7 @@ class AudioLoader(torch.utils.data.Dataset):
|
|||||||
outputs["y_2d_size"],
|
outputs["y_2d_size"],
|
||||||
outputs["y_2d_classes"],
|
outputs["y_2d_classes"],
|
||||||
ann_aug,
|
ann_aug,
|
||||||
) = generate_gt_heatmaps(
|
) = generate_gt_heatmaps(spec_op_shape, sampling_rate, ann, self.params)
|
||||||
spec_op_shape, sampling_rate, ann, self.params
|
|
||||||
)
|
|
||||||
|
|
||||||
# hack to get around requirement that all vectors are the same length in
|
# hack to get around requirement that all vectors are the same length in
|
||||||
# the output batch
|
# the output batch
|
||||||
|
@ -1,10 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.metrics import (
|
from sklearn.metrics import accuracy_score, auc, balanced_accuracy_score, roc_curve
|
||||||
accuracy_score,
|
|
||||||
auc,
|
|
||||||
balanced_accuracy_score,
|
|
||||||
roc_curve,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def compute_error_auc(op_str, gt, pred, prob):
|
def compute_error_auc(op_str, gt, pred, prob):
|
||||||
@ -17,10 +12,7 @@ def compute_error_auc(op_str, gt, pred, prob):
|
|||||||
fpr, tpr, thresholds = roc_curve(gt, pred)
|
fpr, tpr, thresholds = roc_curve(gt, pred)
|
||||||
roc_auc = auc(fpr, tpr)
|
roc_auc = auc(fpr, tpr)
|
||||||
|
|
||||||
print(
|
print(op_str + ", class acc = {:.3f}, ROC AUC = {:.3f}".format(class_acc, roc_auc))
|
||||||
op_str
|
|
||||||
+ ", class acc = {:.3f}, ROC AUC = {:.3f}".format(class_acc, roc_auc)
|
|
||||||
)
|
|
||||||
# return class_acc, roc_auc
|
# return class_acc, roc_auc
|
||||||
|
|
||||||
|
|
||||||
@ -114,14 +106,10 @@ def compute_pre_rec(
|
|||||||
confidence.append(pp["det_probs"][valid_inds])
|
confidence.append(pp["det_probs"][valid_inds])
|
||||||
elif eval_mode == "per_class":
|
elif eval_mode == "per_class":
|
||||||
# per class
|
# per class
|
||||||
confidence.append(
|
confidence.append(pp["class_probs"].T[valid_inds, class_of_interest])
|
||||||
pp["class_probs"].T[valid_inds, class_of_interest]
|
|
||||||
)
|
|
||||||
elif eval_mode == "top_class":
|
elif eval_mode == "top_class":
|
||||||
# per class - note that sometimes 'class_probs' can be num_classes+1 in size
|
# per class - note that sometimes 'class_probs' can be num_classes+1 in size
|
||||||
top_class = np.argmax(
|
top_class = np.argmax(pp["class_probs"].T[valid_inds, :num_classes], 1)
|
||||||
pp["class_probs"].T[valid_inds, :num_classes], 1
|
|
||||||
)
|
|
||||||
confidence.append(pp["class_probs"].T[valid_inds, top_class])
|
confidence.append(pp["class_probs"].T[valid_inds, top_class])
|
||||||
pred_class.append(top_class)
|
pred_class.append(top_class)
|
||||||
|
|
||||||
@ -170,9 +158,7 @@ def compute_pre_rec(
|
|||||||
num_positives += len(gg["start_times"][valid_inds])
|
num_positives += len(gg["start_times"][valid_inds])
|
||||||
elif eval_mode == "per_class":
|
elif eval_mode == "per_class":
|
||||||
# all valid ones with class of interest
|
# all valid ones with class of interest
|
||||||
num_positives += (
|
num_positives += (gg["class_ids"][valid_inds] == class_of_interest).sum()
|
||||||
gg["class_ids"][valid_inds] == class_of_interest
|
|
||||||
).sum()
|
|
||||||
elif eval_mode == "top_class":
|
elif eval_mode == "top_class":
|
||||||
# all valid ones with non generic class
|
# all valid ones with non generic class
|
||||||
num_positives += (gg["class_ids"][valid_inds] > -1).sum()
|
num_positives += (gg["class_ids"][valid_inds] > -1).sum()
|
||||||
@ -254,9 +240,7 @@ def compute_pre_rec(
|
|||||||
results["avg_prec"] = np.nan
|
results["avg_prec"] = np.nan
|
||||||
results["rec_at_x"] = np.nan
|
results["rec_at_x"] = np.nan
|
||||||
else:
|
else:
|
||||||
results["avg_prec"] = np.round(
|
results["avg_prec"] = np.round(calc_average_precision(recall, precision), 5)
|
||||||
calc_average_precision(recall, precision), 5
|
|
||||||
)
|
|
||||||
results["rec_at_x"] = np.round(calc_recall_at_x(recall, precision), 5)
|
results["rec_at_x"] = np.round(calc_recall_at_x(recall, precision), 5)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
@ -299,20 +283,12 @@ def compute_file_accuracy(gts, preds, num_classes):
|
|||||||
|
|
||||||
# compute min and max scoring range - then threshold
|
# compute min and max scoring range - then threshold
|
||||||
min_val = 0
|
min_val = 0
|
||||||
mins = [
|
mins = [pp["class_probs"].min() for pp in preds if pp["class_probs"].shape[1] > 0]
|
||||||
pp["class_probs"].min()
|
|
||||||
for pp in preds
|
|
||||||
if pp["class_probs"].shape[1] > 0
|
|
||||||
]
|
|
||||||
if len(mins) > 0:
|
if len(mins) > 0:
|
||||||
min_val = np.min(mins)
|
min_val = np.min(mins)
|
||||||
|
|
||||||
max_val = 1.0
|
max_val = 1.0
|
||||||
maxes = [
|
maxes = [pp["class_probs"].max() for pp in preds if pp["class_probs"].shape[1] > 0]
|
||||||
pp["class_probs"].max()
|
|
||||||
for pp in preds
|
|
||||||
if pp["class_probs"].shape[1] > 0
|
|
||||||
]
|
|
||||||
if len(maxes) > 0:
|
if len(maxes) > 0:
|
||||||
max_val = np.max(maxes)
|
max_val = np.max(maxes)
|
||||||
|
|
||||||
@ -334,9 +310,7 @@ def compute_file_accuracy(gts, preds, num_classes):
|
|||||||
|
|
||||||
# pick the result corresponding to the overall best threshold
|
# pick the result corresponding to the overall best threshold
|
||||||
pred_valid_all = np.vstack(pred_valid_all)
|
pred_valid_all = np.vstack(pred_valid_all)
|
||||||
acc_per_thresh = (
|
acc_per_thresh = (np.array(gt_valid)[..., np.newaxis] == pred_valid_all).mean(0)
|
||||||
np.array(gt_valid)[..., np.newaxis] == pred_valid_all
|
|
||||||
).mean(0)
|
|
||||||
best_thresh = np.argmax(acc_per_thresh)
|
best_thresh = np.argmax(acc_per_thresh)
|
||||||
best_acc = acc_per_thresh[best_thresh]
|
best_acc = acc_per_thresh[best_thresh]
|
||||||
pred_valid = pred_valid_all[:, best_thresh].astype(np.int).tolist()
|
pred_valid = pred_valid_all[:, best_thresh].astype(np.int).tolist()
|
||||||
|
@ -62,9 +62,7 @@ def save_images_batch(model, data_loader, params):
|
|||||||
data_loader.dataset.return_spec_for_viz = False
|
data_loader.dataset.return_spec_for_viz = False
|
||||||
|
|
||||||
|
|
||||||
def save_image(
|
def save_image(spec_viz, outputs, ind, inputs, params, op_file_name, plot_title):
|
||||||
spec_viz, outputs, ind, inputs, params, op_file_name, plot_title
|
|
||||||
):
|
|
||||||
pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float())
|
pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float())
|
||||||
pred_hm = outputs["pred_det"][ind, 0, :].data.cpu().numpy()
|
pred_hm = outputs["pred_det"][ind, 0, :].data.cpu().numpy()
|
||||||
spec_viz = spec_viz[ind, 0, :]
|
spec_viz = spec_viz[ind, 0, :]
|
||||||
@ -87,14 +85,10 @@ def save_image(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def loss_fun(
|
def loss_fun(outputs, gt_det, gt_size, gt_class, det_criterion, params, class_inv_freq):
|
||||||
outputs, gt_det, gt_size, gt_class, det_criterion, params, class_inv_freq
|
|
||||||
):
|
|
||||||
|
|
||||||
# detection loss
|
# detection loss
|
||||||
loss = params["det_loss_weight"] * det_criterion(
|
loss = params["det_loss_weight"] * det_criterion(outputs["pred_det"], gt_det)
|
||||||
outputs["pred_det"], gt_det
|
|
||||||
)
|
|
||||||
|
|
||||||
# bounding box size loss
|
# bounding box size loss
|
||||||
loss += params["size_loss_weight"] * losses.bbox_size_loss(
|
loss += params["size_loss_weight"] * losses.bbox_size_loss(
|
||||||
@ -111,9 +105,7 @@ def loss_fun(
|
|||||||
return loss
|
return loss
|
||||||
|
|
||||||
|
|
||||||
def train(
|
def train(model, epoch, data_loader, det_criterion, optimizer, scheduler, params):
|
||||||
model, epoch, data_loader, det_criterion, optimizer, scheduler, params
|
|
||||||
):
|
|
||||||
|
|
||||||
model.train()
|
model.train()
|
||||||
|
|
||||||
@ -226,9 +218,7 @@ def test(model, epoch, data_loader, det_criterion, params):
|
|||||||
test_loss.update(loss.item(), data.shape[0])
|
test_loss.update(loss.item(), data.shape[0])
|
||||||
|
|
||||||
# do NMS
|
# do NMS
|
||||||
pred_nms, _ = pp.run_nms(
|
pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float())
|
||||||
outputs, params, inputs["sampling_rate"].float()
|
|
||||||
)
|
|
||||||
predictions.extend(pred_nms)
|
predictions.extend(pred_nms)
|
||||||
|
|
||||||
ground_truths.extend(parse_gt_data(inputs))
|
ground_truths.extend(parse_gt_data(inputs))
|
||||||
@ -338,9 +328,7 @@ if __name__ == "__main__":
|
|||||||
# setup arg parser and populate it with exiting parameters - will not work with lists
|
# setup arg parser and populate it with exiting parameters - will not work with lists
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("data_dir", type=str, help="Path to root of datasets")
|
parser.add_argument("data_dir", type=str, help="Path to root of datasets")
|
||||||
parser.add_argument(
|
parser.add_argument("ann_dir", type=str, help="Path to extracted annotations")
|
||||||
"ann_dir", type=str, help="Path to extracted annotations"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--train_split",
|
"--train_split",
|
||||||
type=str,
|
type=str,
|
||||||
@ -367,9 +355,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# save notes file
|
# save notes file
|
||||||
if params["notes"] != "":
|
if params["notes"] != "":
|
||||||
tu.write_notes_file(
|
tu.write_notes_file(params["experiment"] + "notes.txt", params["notes"])
|
||||||
params["experiment"] + "notes.txt", params["notes"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# load the training and test meta data - there are different splits defined
|
# load the training and test meta data - there are different splits defined
|
||||||
train_sets, test_sets = ts.get_train_test_data(
|
train_sets, test_sets = ts.get_train_test_data(
|
||||||
@ -401,14 +387,12 @@ if __name__ == "__main__":
|
|||||||
params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping(
|
params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping(
|
||||||
params["class_names"]
|
params["class_names"]
|
||||||
)
|
)
|
||||||
params["class_names_short"] = tu.get_short_class_names(
|
params["class_names_short"] = tu.get_short_class_names(params["class_names"])
|
||||||
params["class_names"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# standardize the low and high frequency value for specified classes
|
# standardize the low and high frequency value for specified classes
|
||||||
params["standardize_classs_names"] = params[
|
params["standardize_classs_names"] = params["standardize_classs_names_ip"].split(
|
||||||
"standardize_classs_names_ip"
|
";"
|
||||||
].split(";")
|
)
|
||||||
for cc in params["standardize_classs_names"]:
|
for cc in params["standardize_classs_names"]:
|
||||||
if cc in params["class_names"]:
|
if cc in params["class_names"]:
|
||||||
data_train = tu.standardize_low_freq(data_train, cc)
|
data_train = tu.standardize_low_freq(data_train, cc)
|
||||||
@ -458,9 +442,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
|
optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
|
||||||
# optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=0.9)
|
# optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=0.9)
|
||||||
scheduler = CosineAnnealingLR(
|
scheduler = CosineAnnealingLR(optimizer, params["num_epochs"] * len(train_loader))
|
||||||
optimizer, params["num_epochs"] * len(train_loader)
|
|
||||||
)
|
|
||||||
if params["train_loss"] == "mse":
|
if params["train_loss"] == "mse":
|
||||||
det_criterion = losses.mse_loss
|
det_criterion = losses.mse_loss
|
||||||
elif params["train_loss"] == "focal":
|
elif params["train_loss"] == "focal":
|
||||||
@ -523,9 +505,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
if epoch % params["num_eval_epochs"] == 0:
|
if epoch % params["num_eval_epochs"] == 0:
|
||||||
# detection accuracy on test set
|
# detection accuracy on test set
|
||||||
test_res, test_loss = test(
|
test_res, test_loss = test(model, epoch, test_loader, det_criterion, params)
|
||||||
model, epoch, test_loader, det_criterion, params
|
|
||||||
)
|
|
||||||
test_plt_ls.update_and_save(epoch, [test_loss["test_loss"]])
|
test_plt_ls.update_and_save(epoch, [test_loss["test_loss"]])
|
||||||
test_plt.update_and_save(
|
test_plt.update_and_save(
|
||||||
epoch,
|
epoch,
|
||||||
@ -540,9 +520,7 @@ if __name__ == "__main__":
|
|||||||
test_plt_class.update_and_save(
|
test_plt_class.update_and_save(
|
||||||
epoch, [rs["avg_prec"] for rs in test_res["class_pr"]]
|
epoch, [rs["avg_prec"] for rs in test_res["class_pr"]]
|
||||||
)
|
)
|
||||||
pu.plot_pr_curve_class(
|
pu.plot_pr_curve_class(params["experiment"], "test_pr", "test_pr", test_res)
|
||||||
params["experiment"], "test_pr", "test_pr", test_res
|
|
||||||
)
|
|
||||||
|
|
||||||
# save trained model
|
# save trained model
|
||||||
print("saving model to: " + params["model_file_name"])
|
print("saving model to: " + params["model_file_name"])
|
||||||
|
@ -24,8 +24,7 @@ def split_diff(ann_dir, wav_dir, load_extra=True):
|
|||||||
"dataset_name": "BatDetective",
|
"dataset_name": "BatDetective",
|
||||||
"is_test": False,
|
"is_test": False,
|
||||||
"is_binary": True, # just a bat / not bat dataset ie no classes
|
"is_binary": True, # just a bat / not bat dataset ie no classes
|
||||||
"ann_path": ann_dir
|
"ann_path": ann_dir + "train_set_bulgaria_batdetective_with_bbs.json",
|
||||||
+ "train_set_bulgaria_batdetective_with_bbs.json",
|
|
||||||
"wav_path": wav_dir + "bat_detective/audio/",
|
"wav_path": wav_dir + "bat_detective/audio/",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -152,8 +151,7 @@ def split_same(ann_dir, wav_dir, load_extra=True):
|
|||||||
"dataset_name": "BatDetective",
|
"dataset_name": "BatDetective",
|
||||||
"is_test": False,
|
"is_test": False,
|
||||||
"is_binary": True,
|
"is_binary": True,
|
||||||
"ann_path": ann_dir
|
"ann_path": ann_dir + "train_set_bulgaria_batdetective_with_bbs.json",
|
||||||
+ "train_set_bulgaria_batdetective_with_bbs.json",
|
|
||||||
"wav_path": wav_dir + "bat_detective/audio/",
|
"wav_path": wav_dir + "bat_detective/audio/",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -25,9 +25,7 @@ def get_blank_dataset_dict(dataset_name, is_test, ann_path, wav_path):
|
|||||||
def get_short_class_names(class_names, str_len=3):
|
def get_short_class_names(class_names, str_len=3):
|
||||||
class_names_short = []
|
class_names_short = []
|
||||||
for cc in class_names:
|
for cc in class_names:
|
||||||
class_names_short.append(
|
class_names_short.append(" ".join([sp[:str_len] for sp in cc.split(" ")]))
|
||||||
" ".join([sp[:str_len] for sp in cc.split(" ")])
|
|
||||||
)
|
|
||||||
return class_names_short
|
return class_names_short
|
||||||
|
|
||||||
|
|
||||||
@ -157,9 +155,7 @@ def load_set_of_anns(
|
|||||||
str_len = np.max([len(cc) for cc in class_names]) + 5
|
str_len = np.max([len(cc) for cc in class_names]) + 5
|
||||||
for cc in range(len(class_names)):
|
for cc in range(len(class_names)):
|
||||||
print(
|
print(
|
||||||
str(cc).ljust(5)
|
str(cc).ljust(5) + class_names[cc].ljust(str_len) + str(class_cnts[cc])
|
||||||
+ class_names[cc].ljust(str_len)
|
|
||||||
+ str(class_cnts[cc])
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(classes_to_ignore) == 0:
|
if len(classes_to_ignore) == 0:
|
||||||
|
@ -39,9 +39,7 @@ def generate_spectrogram(
|
|||||||
min_freq = round(params["min_freq"] * params["fft_win_length"])
|
min_freq = round(params["min_freq"] * params["fft_win_length"])
|
||||||
if spec.shape[0] < max_freq:
|
if spec.shape[0] < max_freq:
|
||||||
freq_pad = max_freq - spec.shape[0]
|
freq_pad = max_freq - spec.shape[0]
|
||||||
spec = np.vstack(
|
spec = np.vstack((np.zeros((freq_pad, spec.shape[1]), dtype=spec.dtype), spec))
|
||||||
(np.zeros((freq_pad, spec.shape[1]), dtype=spec.dtype), spec)
|
|
||||||
)
|
|
||||||
spec_cropped = spec[-max_freq : spec.shape[0] - min_freq, :]
|
spec_cropped = spec[-max_freq : spec.shape[0] - min_freq, :]
|
||||||
|
|
||||||
if params["spec_scale"] == "log":
|
if params["spec_scale"] == "log":
|
||||||
@ -51,11 +49,7 @@ def generate_spectrogram(
|
|||||||
* (
|
* (
|
||||||
1.0
|
1.0
|
||||||
/ (
|
/ (
|
||||||
np.abs(
|
np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate)))
|
||||||
np.hanning(
|
|
||||||
int(params["fft_win_length"] * sampling_rate)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
** 2
|
** 2
|
||||||
).sum()
|
).sum()
|
||||||
)
|
)
|
||||||
@ -88,11 +82,7 @@ def generate_spectrogram(
|
|||||||
* (
|
* (
|
||||||
1.0
|
1.0
|
||||||
/ (
|
/ (
|
||||||
np.abs(
|
np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate)))
|
||||||
np.hanning(
|
|
||||||
int(params["fft_win_length"] * sampling_rate)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
** 2
|
** 2
|
||||||
).sum()
|
).sum()
|
||||||
)
|
)
|
||||||
@ -132,9 +122,7 @@ def load_audio_file(
|
|||||||
|
|
||||||
# clipping maximum duration
|
# clipping maximum duration
|
||||||
if max_duration is not False:
|
if max_duration is not False:
|
||||||
max_duration = np.minimum(
|
max_duration = np.minimum(int(sampling_rate * max_duration), audio_raw.shape[0])
|
||||||
int(sampling_rate * max_duration), audio_raw.shape[0]
|
|
||||||
)
|
|
||||||
audio_raw = audio_raw[:max_duration]
|
audio_raw = audio_raw[:max_duration]
|
||||||
|
|
||||||
# convert to float32 and scale
|
# convert to float32 and scale
|
||||||
@ -171,9 +159,7 @@ def pad_audio(
|
|||||||
# too small
|
# too small
|
||||||
# used during training to ensure all the batches are the same size
|
# used during training to ensure all the batches are the same size
|
||||||
diff = fixed_width * step + noverlap - audio_raw.shape[0]
|
diff = fixed_width * step + noverlap - audio_raw.shape[0]
|
||||||
audio_raw = np.hstack(
|
audio_raw = np.hstack((audio_raw, np.zeros(diff, dtype=audio_raw.dtype)))
|
||||||
(audio_raw, np.zeros(diff, dtype=audio_raw.dtype))
|
|
||||||
)
|
|
||||||
|
|
||||||
elif fixed_width is not None and spec_width > fixed_width:
|
elif fixed_width is not None and spec_width > fixed_width:
|
||||||
# too big
|
# too big
|
||||||
@ -181,18 +167,13 @@ def pad_audio(
|
|||||||
diff = fixed_width * step + noverlap - audio_raw.shape[0]
|
diff = fixed_width * step + noverlap - audio_raw.shape[0]
|
||||||
audio_raw = audio_raw[:diff]
|
audio_raw = audio_raw[:diff]
|
||||||
|
|
||||||
elif (
|
elif spec_width_rs < min_size or (np.floor(spec_width_rs) % divide_factor) != 0:
|
||||||
spec_width_rs < min_size
|
|
||||||
or (np.floor(spec_width_rs) % divide_factor) != 0
|
|
||||||
):
|
|
||||||
# need to be at least min_size
|
# need to be at least min_size
|
||||||
div_amt = np.ceil(spec_width_rs / float(divide_factor))
|
div_amt = np.ceil(spec_width_rs / float(divide_factor))
|
||||||
div_amt = np.maximum(1, div_amt)
|
div_amt = np.maximum(1, div_amt)
|
||||||
target_size = int(div_amt * divide_factor * (1.0 / resize_factor))
|
target_size = int(div_amt * divide_factor * (1.0 / resize_factor))
|
||||||
diff = target_size * step + noverlap - audio_raw.shape[0]
|
diff = target_size * step + noverlap - audio_raw.shape[0]
|
||||||
audio_raw = np.hstack(
|
audio_raw = np.hstack((audio_raw, np.zeros(diff, dtype=audio_raw.dtype)))
|
||||||
(audio_raw, np.zeros(diff, dtype=audio_raw.dtype))
|
|
||||||
)
|
|
||||||
|
|
||||||
return audio_raw
|
return audio_raw
|
||||||
|
|
||||||
@ -235,7 +216,7 @@ def gen_mag_spectrogram_pt(x, fs, ms, overlap_perc):
|
|||||||
|
|
||||||
def pcen(spec_cropped, sampling_rate):
|
def pcen(spec_cropped, sampling_rate):
|
||||||
# TODO should be passing hop_length too i.e. step
|
# TODO should be passing hop_length too i.e. step
|
||||||
spec = librosa.pcen(
|
spec = librosa.pcen(spec_cropped * (2**31), sr=sampling_rate / 10).astype(
|
||||||
spec_cropped * (2**31), sr=sampling_rate / 10
|
np.float32
|
||||||
).astype(np.float32)
|
)
|
||||||
return spec
|
return spec
|
||||||
|
@ -158,9 +158,7 @@ def convert_results(
|
|||||||
results["spec_feat_names"] = feats.get_feature_names()
|
results["spec_feat_names"] = feats.get_feature_names()
|
||||||
if len(cnn_feats) > 0:
|
if len(cnn_feats) > 0:
|
||||||
results["cnn_feats"] = cnn_feats
|
results["cnn_feats"] = cnn_feats
|
||||||
results["cnn_feat_names"] = [
|
results["cnn_feat_names"] = [str(ii) for ii in range(cnn_feats.shape[1])]
|
||||||
str(ii) for ii in range(cnn_feats.shape[1])
|
|
||||||
]
|
|
||||||
if len(spec_slices) > 0:
|
if len(spec_slices) > 0:
|
||||||
results["spec_slices"] = spec_slices
|
results["spec_slices"] = spec_slices
|
||||||
|
|
||||||
@ -194,9 +192,7 @@ def save_results_to_file(results, op_path):
|
|||||||
|
|
||||||
# save features
|
# save features
|
||||||
if "spec_feats" in results.keys():
|
if "spec_feats" in results.keys():
|
||||||
df = pd.DataFrame(
|
df = pd.DataFrame(results["spec_feats"], columns=results["spec_feat_names"])
|
||||||
results["spec_feats"], columns=results["spec_feat_names"]
|
|
||||||
)
|
|
||||||
df.to_csv(
|
df.to_csv(
|
||||||
op_path + "_spec_features.csv",
|
op_path + "_spec_features.csv",
|
||||||
sep=",",
|
sep=",",
|
||||||
@ -205,9 +201,7 @@ def save_results_to_file(results, op_path):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if "cnn_feats" in results.keys():
|
if "cnn_feats" in results.keys():
|
||||||
df = pd.DataFrame(
|
df = pd.DataFrame(results["cnn_feats"], columns=results["cnn_feat_names"])
|
||||||
results["cnn_feats"], columns=results["cnn_feat_names"]
|
|
||||||
)
|
|
||||||
df.to_csv(
|
df.to_csv(
|
||||||
op_path + "_cnn_features.csv",
|
op_path + "_cnn_features.csv",
|
||||||
sep=",",
|
sep=",",
|
||||||
@ -243,9 +237,7 @@ def compute_spectrogram(audio, sampling_rate, params, return_np=False):
|
|||||||
# resize the spec
|
# resize the spec
|
||||||
rs = params["resize_factor"]
|
rs = params["resize_factor"]
|
||||||
spec_op_shape = (int(params["spec_height"] * rs), int(spec.shape[-1] * rs))
|
spec_op_shape = (int(params["spec_height"] * rs), int(spec.shape[-1] * rs))
|
||||||
spec = F.interpolate(
|
spec = F.interpolate(spec, size=spec_op_shape, mode="bilinear", align_corners=False)
|
||||||
spec, size=spec_op_shape, mode="bilinear", align_corners=False
|
|
||||||
)
|
|
||||||
|
|
||||||
if return_np:
|
if return_np:
|
||||||
spec_np = spec[0, 0, :].cpu().data.numpy()
|
spec_np = spec[0, 0, :].cpu().data.numpy()
|
||||||
@ -306,9 +298,7 @@ def process_file(
|
|||||||
chunk_time = args["chunk_size"] * chunk_id
|
chunk_time = args["chunk_size"] * chunk_id
|
||||||
chunk_length = int(sampling_rate * args["chunk_size"])
|
chunk_length = int(sampling_rate * args["chunk_size"])
|
||||||
start_sample = chunk_id * chunk_length
|
start_sample = chunk_id * chunk_length
|
||||||
end_sample = np.minimum(
|
end_sample = np.minimum((chunk_id + 1) * chunk_length, audio_full.shape[0])
|
||||||
(chunk_id + 1) * chunk_length, audio_full.shape[0]
|
|
||||||
)
|
|
||||||
audio = audio_full[start_sample:end_sample]
|
audio = audio_full[start_sample:end_sample]
|
||||||
|
|
||||||
# load audio file and compute spectrogram
|
# load audio file and compute spectrogram
|
||||||
@ -343,9 +333,7 @@ def process_file(
|
|||||||
cnn_feats.append(features[0])
|
cnn_feats.append(features[0])
|
||||||
|
|
||||||
if args["spec_slices"]:
|
if args["spec_slices"]:
|
||||||
spec_slices.extend(
|
spec_slices.extend(feats.extract_spec_slices(spec_np, pred_nms, params))
|
||||||
feats.extract_spec_slices(spec_np, pred_nms, params)
|
|
||||||
)
|
|
||||||
|
|
||||||
# convert the predictions into output dictionary
|
# convert the predictions into output dictionary
|
||||||
file_id = os.path.basename(audio_file)
|
file_id = os.path.basename(audio_file)
|
||||||
@ -366,10 +354,7 @@ def process_file(
|
|||||||
# summarize results
|
# summarize results
|
||||||
if not args["quiet"]:
|
if not args["quiet"]:
|
||||||
num_detections = len(results["pred_dict"]["annotation"])
|
num_detections = len(results["pred_dict"]["annotation"])
|
||||||
print(
|
print("{}".format(num_detections) + " call(s) detected above the threshold.")
|
||||||
"{}".format(num_detections)
|
|
||||||
+ " call(s) detected above the threshold."
|
|
||||||
)
|
|
||||||
|
|
||||||
# print results for top n classes
|
# print results for top n classes
|
||||||
if not args["quiet"] and (num_detections > 0):
|
if not args["quiet"] and (num_detections > 0):
|
||||||
@ -379,8 +364,7 @@ def process_file(
|
|||||||
print("species name".ljust(30) + "probablity present")
|
print("species name".ljust(30) + "probablity present")
|
||||||
for cc in np.argsort(class_overall)[::-1][:top_n]:
|
for cc in np.argsort(class_overall)[::-1][:top_n]:
|
||||||
print(
|
print(
|
||||||
params["class_names"][cc].ljust(30)
|
params["class_names"][cc].ljust(30) + str(round(class_overall[cc], 3))
|
||||||
+ str(round(class_overall[cc], 3))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if return_raw_preds:
|
if return_raw_preds:
|
||||||
|
@ -57,9 +57,7 @@ def create_box_image(
|
|||||||
|
|
||||||
if plot_class_names:
|
if plot_class_names:
|
||||||
for ii, bb in enumerate(boxes):
|
for ii, bb in enumerate(boxes):
|
||||||
txt = " ".join(
|
txt = " ".join([sp[:3] for sp in detections_ip[ii]["class"].split(" ")])
|
||||||
[sp[:3] for sp in detections_ip[ii]["class"].split(" ")]
|
|
||||||
)
|
|
||||||
font_info = {
|
font_info = {
|
||||||
"color": "white",
|
"color": "white",
|
||||||
"size": 10,
|
"size": 10,
|
||||||
@ -89,9 +87,7 @@ def save_ann_spec(
|
|||||||
y_extent = [0, duration, min_freq, max_freq]
|
y_extent = [0, duration, min_freq, max_freq]
|
||||||
|
|
||||||
plt.close("all")
|
plt.close("all")
|
||||||
fig = plt.figure(
|
fig = plt.figure(0, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100)
|
||||||
0, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100
|
|
||||||
)
|
|
||||||
plt.imshow(
|
plt.imshow(
|
||||||
spec,
|
spec,
|
||||||
aspect="auto",
|
aspect="auto",
|
||||||
@ -128,16 +124,12 @@ def save_ann_spec(
|
|||||||
plt.savefig(op_path)
|
plt.savefig(op_path)
|
||||||
|
|
||||||
|
|
||||||
def plot_pts(
|
def plot_pts(fig_id, feats, class_names, colors, marker_size=4.0, plot_legend=False):
|
||||||
fig_id, feats, class_names, colors, marker_size=4.0, plot_legend=False
|
|
||||||
):
|
|
||||||
plt.figure(fig_id)
|
plt.figure(fig_id)
|
||||||
un_class, labels = np.unique(class_names, return_inverse=True)
|
un_class, labels = np.unique(class_names, return_inverse=True)
|
||||||
un_labels = np.unique(labels)
|
un_labels = np.unique(labels)
|
||||||
if un_labels.shape[0] > len(colors):
|
if un_labels.shape[0] > len(colors):
|
||||||
colors = [
|
colors = [plt.cm.jet(float(ii) / un_labels.shape[0]) for ii in un_labels]
|
||||||
plt.cm.jet(float(ii) / un_labels.shape[0]) for ii in un_labels
|
|
||||||
]
|
|
||||||
|
|
||||||
for ii, u in enumerate(un_labels):
|
for ii, u in enumerate(un_labels):
|
||||||
inds = np.where(labels == u)[0]
|
inds = np.where(labels == u)[0]
|
||||||
@ -244,9 +236,7 @@ def plot_spec(
|
|||||||
ax0.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent)
|
ax0.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent)
|
||||||
ax0.xaxis.set_ticklabels([])
|
ax0.xaxis.set_ticklabels([])
|
||||||
font_info = {"color": "white", "size": 12, "weight": "bold"}
|
font_info = {"color": "white", "size": 12, "weight": "bold"}
|
||||||
ax0.text(
|
ax0.text(0, params["min_freq"] // freq_scale, "Ground Truth", fontdict=font_info)
|
||||||
0, params["min_freq"] // freq_scale, "Ground Truth", fontdict=font_info
|
|
||||||
)
|
|
||||||
|
|
||||||
plt.grid(False)
|
plt.grid(False)
|
||||||
if plot_boxes:
|
if plot_boxes:
|
||||||
@ -271,9 +261,7 @@ def plot_spec(
|
|||||||
ax1.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent)
|
ax1.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent)
|
||||||
ax1.xaxis.set_ticklabels([])
|
ax1.xaxis.set_ticklabels([])
|
||||||
font_info = {"color": "white", "size": 12, "weight": "bold"}
|
font_info = {"color": "white", "size": 12, "weight": "bold"}
|
||||||
ax1.text(
|
ax1.text(0, params["min_freq"] // freq_scale, "Prediction", fontdict=font_info)
|
||||||
0, params["min_freq"] // freq_scale, "Prediction", fontdict=font_info
|
|
||||||
)
|
|
||||||
|
|
||||||
plt.grid(False)
|
plt.grid(False)
|
||||||
if plot_boxes:
|
if plot_boxes:
|
||||||
@ -308,9 +296,7 @@ def plot_spec(
|
|||||||
)
|
)
|
||||||
# ax2.xaxis.set_ticklabels([])
|
# ax2.xaxis.set_ticklabels([])
|
||||||
font_info = {"color": "white", "size": 12, "weight": "bold"}
|
font_info = {"color": "white", "size": 12, "weight": "bold"}
|
||||||
ax2.text(
|
ax2.text(0, params["min_freq"] // freq_scale, "Heatmap", fontdict=font_info)
|
||||||
0, params["min_freq"] // freq_scale, "Heatmap", fontdict=font_info
|
|
||||||
)
|
|
||||||
|
|
||||||
plt.grid(False)
|
plt.grid(False)
|
||||||
|
|
||||||
@ -408,21 +394,15 @@ def plot_confusion_matrix(
|
|||||||
# shorten the class names for plotting
|
# shorten the class names for plotting
|
||||||
class_names = []
|
class_names = []
|
||||||
for cc in class_names_long:
|
for cc in class_names_long:
|
||||||
class_name_sm = "".join([cc_sm[:3] + " " for cc_sm in cc.split(" ")])[
|
class_name_sm = "".join([cc_sm[:3] + " " for cc_sm in cc.split(" ")])[:-1]
|
||||||
:-1
|
|
||||||
]
|
|
||||||
class_names.append(class_name_sm)
|
class_names.append(class_name_sm)
|
||||||
|
|
||||||
num_classes = len(class_names)
|
num_classes = len(class_names)
|
||||||
cm = confusion_matrix(gt, pred, labels=np.arange(num_classes)).astype(
|
cm = confusion_matrix(gt, pred, labels=np.arange(num_classes)).astype(np.float32)
|
||||||
np.float32
|
|
||||||
)
|
|
||||||
cm_norm = cm.sum(1)
|
cm_norm = cm.sum(1)
|
||||||
|
|
||||||
valid_inds = np.where(cm_norm > 0)[0]
|
valid_inds = np.where(cm_norm > 0)[0]
|
||||||
cm[valid_inds, :] = (
|
cm[valid_inds, :] = cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis]
|
||||||
cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis]
|
|
||||||
)
|
|
||||||
cm[np.where(cm_norm == -0)[0], :] = np.nan
|
cm[np.where(cm_norm == -0)[0], :] = np.nan
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
@ -507,9 +487,7 @@ class LossPlotter(object):
|
|||||||
if self.logy:
|
if self.logy:
|
||||||
plt.gca().set_yscale("log")
|
plt.gca().set_yscale("log")
|
||||||
plt.grid(True)
|
plt.grid(True)
|
||||||
plt.legend(
|
plt.legend(bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0.0)
|
||||||
bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0.0
|
|
||||||
)
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(self.op_file_name)
|
plt.savefig(self.op_file_name)
|
||||||
plt.close(0)
|
plt.close(0)
|
||||||
@ -524,19 +502,15 @@ class LossPlotter(object):
|
|||||||
|
|
||||||
def save_confusion_matrix(self, gt, pred):
|
def save_confusion_matrix(self, gt, pred):
|
||||||
plt.figure(0)
|
plt.figure(0)
|
||||||
cm = confusion_matrix(
|
cm = confusion_matrix(gt, pred, np.arange(len(self.class_names))).astype(
|
||||||
gt, pred, np.arange(len(self.class_names))
|
np.float32
|
||||||
).astype(np.float32)
|
)
|
||||||
cm_norm = cm.sum(1)
|
cm_norm = cm.sum(1)
|
||||||
valid_inds = np.where(cm_norm > 0)[0]
|
valid_inds = np.where(cm_norm > 0)[0]
|
||||||
cm[valid_inds, :] = (
|
cm[valid_inds, :] = cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis]
|
||||||
cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis]
|
|
||||||
)
|
|
||||||
plt.imshow(cm, vmin=0, vmax=1, cmap="plasma")
|
plt.imshow(cm, vmin=0, vmax=1, cmap="plasma")
|
||||||
plt.colorbar()
|
plt.colorbar()
|
||||||
plt.xticks(
|
plt.xticks(np.arange(cm.shape[1]), self.class_names, rotation="vertical")
|
||||||
np.arange(cm.shape[1]), self.class_names, rotation="vertical"
|
|
||||||
)
|
|
||||||
plt.yticks(np.arange(cm.shape[0]), self.class_names)
|
plt.yticks(np.arange(cm.shape[0]), self.class_names)
|
||||||
plt.xlabel("Predicted")
|
plt.xlabel("Predicted")
|
||||||
plt.ylabel("Ground Truth")
|
plt.ylabel("Ground Truth")
|
||||||
|
@ -56,25 +56,19 @@ class InteractivePlotter:
|
|||||||
self.annotated = np.zeros(
|
self.annotated = np.zeros(
|
||||||
self.labels.shape[0], dtype=np.int
|
self.labels.shape[0], dtype=np.int
|
||||||
) # can populate this with 1's where we have labels
|
) # can populate this with 1's where we have labels
|
||||||
self.labels_cols = [
|
self.labels_cols = [colors[self.labels[ii]] for ii in range(len(self.labels))]
|
||||||
colors[self.labels[ii]] for ii in range(len(self.labels))
|
|
||||||
]
|
|
||||||
self.freq_lims = freq_lims
|
self.freq_lims = freq_lims
|
||||||
|
|
||||||
self.allow_training = allow_training
|
self.allow_training = allow_training
|
||||||
self.pt_size = 5.0
|
self.pt_size = 5.0
|
||||||
self.spec_pad = (
|
self.spec_pad = 0.2 # this much padding has been applied to the spec slices
|
||||||
0.2 # this much padding has been applied to the spec slices
|
|
||||||
)
|
|
||||||
self.fig_width = 12
|
self.fig_width = 12
|
||||||
self.fig_height = 8
|
self.fig_height = 8
|
||||||
|
|
||||||
self.current_id = 0
|
self.current_id = 0
|
||||||
max_ind = np.argmax([ss.shape[1] for ss in self.spec_slices])
|
max_ind = np.argmax([ss.shape[1] for ss in self.spec_slices])
|
||||||
self.max_width = self.spec_slices[max_ind].shape[1]
|
self.max_width = self.spec_slices[max_ind].shape[1]
|
||||||
self.blank_spec = np.zeros(
|
self.blank_spec = np.zeros((self.spec_slices[0].shape[0], self.max_width))
|
||||||
(self.spec_slices[0].shape[0], self.max_width)
|
|
||||||
)
|
|
||||||
|
|
||||||
def plot(self, fig_id):
|
def plot(self, fig_id):
|
||||||
self.fig, self.ax = plt.subplots(
|
self.fig, self.ax = plt.subplots(
|
||||||
@ -147,17 +141,16 @@ class InteractivePlotter:
|
|||||||
) // 2
|
) // 2
|
||||||
new_spec[
|
new_spec[
|
||||||
:,
|
:,
|
||||||
w_diff : self.spec_slices[self.current_id].shape[1]
|
w_diff : self.spec_slices[self.current_id].shape[1] + w_diff,
|
||||||
+ w_diff,
|
|
||||||
] = self.spec_slices[self.current_id]
|
] = self.spec_slices[self.current_id]
|
||||||
self.spec_im.set_data(new_spec)
|
self.spec_im.set_data(new_spec)
|
||||||
self.spec_im.set_clim(vmin=0, vmax=new_spec.max())
|
self.spec_im.set_clim(vmin=0, vmax=new_spec.max())
|
||||||
|
|
||||||
# draw bounding box around call
|
# draw bounding box around call
|
||||||
self.ax[1].patches[0].remove()
|
self.ax[1].patches[0].remove()
|
||||||
spec_width_orig = self.spec_slices[self.current_id].shape[
|
spec_width_orig = self.spec_slices[self.current_id].shape[1] / (
|
||||||
1
|
1.0 + 2.0 * self.spec_pad
|
||||||
] / (1.0 + 2.0 * self.spec_pad)
|
)
|
||||||
xx = w_diff + self.spec_pad * spec_width_orig
|
xx = w_diff + self.spec_pad * spec_width_orig
|
||||||
ww = spec_width_orig
|
ww = spec_width_orig
|
||||||
yy = self.call_info[self.current_id]["low_freq"] / 1000
|
yy = self.call_info[self.current_id]["low_freq"] / 1000
|
||||||
@ -179,13 +172,9 @@ class InteractivePlotter:
|
|||||||
info_str = (
|
info_str = (
|
||||||
self.call_info[self.current_id]["file_name"]
|
self.call_info[self.current_id]["file_name"]
|
||||||
+ ", time="
|
+ ", time="
|
||||||
+ str(
|
+ str(round(self.call_info[self.current_id]["start_time"], 3))
|
||||||
round(self.call_info[self.current_id]["start_time"], 3)
|
|
||||||
)
|
|
||||||
+ ", prob="
|
+ ", prob="
|
||||||
+ str(
|
+ str(round(self.call_info[self.current_id]["det_prob"], 3))
|
||||||
round(self.call_info[self.current_id]["det_prob"], 3)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
self.ax[0].set_xlabel(info_str)
|
self.ax[0].set_xlabel(info_str)
|
||||||
|
|
||||||
|
@ -235,9 +235,7 @@ def write(filename, rate, data):
|
|||||||
# kind of numeric data in the numpy array
|
# kind of numeric data in the numpy array
|
||||||
dkind = data.dtype.kind
|
dkind = data.dtype.kind
|
||||||
if not (
|
if not (
|
||||||
dkind == "i"
|
dkind == "i" or dkind == "f" or (dkind == "u" and data.dtype.itemsize == 1)
|
||||||
or dkind == "f"
|
|
||||||
or (dkind == "u" and data.dtype.itemsize == 1)
|
|
||||||
):
|
):
|
||||||
raise ValueError("Unsupported data type '%s'" % data.dtype)
|
raise ValueError("Unsupported data type '%s'" % data.dtype)
|
||||||
|
|
||||||
@ -270,9 +268,7 @@ def write(filename, rate, data):
|
|||||||
# Write the data (16, comp, noc, etc) in the correct binary format
|
# Write the data (16, comp, noc, etc) in the correct binary format
|
||||||
# for the wav header. the string format (first arg) specifies how many bytes for each
|
# for the wav header. the string format (first arg) specifies how many bytes for each
|
||||||
# value.
|
# value.
|
||||||
fid.write(
|
fid.write(struct.pack("<ihHIIHH", 16, comp, noc, rate, sbytes, ba, bits))
|
||||||
struct.pack("<ihHIIHH", 16, comp, noc, rate, sbytes, ba, bits)
|
|
||||||
)
|
|
||||||
# data chunk: the word 'data' followed by the size followed by the actual data
|
# data chunk: the word 'data' followed by the size followed by the actual data
|
||||||
fid.write(b"data")
|
fid.write(b"data")
|
||||||
fid.write(struct.pack("<i", data.nbytes))
|
fid.write(struct.pack("<i", data.nbytes))
|
||||||
|
@ -56,9 +56,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# setup the arguments\n",
|
"# setup the arguments\n",
|
||||||
"args = du.get_default_bd_args()\n",
|
"args = du.get_default_bd_args()\n",
|
||||||
"args['detection_threshold'] = 0.3\n",
|
"args[\"detection_threshold\"] = 0.3\n",
|
||||||
"args['time_expansion_factor'] = 1\n",
|
"args[\"time_expansion_factor\"] = 1\n",
|
||||||
"args['model_path'] = 'models/Net2DFast_UK_same.pth.tar'\n",
|
"args[\"model_path\"] = \"models/Net2DFast_UK_same.pth.tar\"\n",
|
||||||
"max_duration = 2.0"
|
"max_duration = 2.0"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -69,7 +69,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# load the model\n",
|
"# load the model\n",
|
||||||
"model, params = du.load_model(args['model_path'])"
|
"model, params = du.load_model(args[\"model_path\"])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -86,13 +86,13 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# choose an audio file\n",
|
"# choose an audio file\n",
|
||||||
"audio_file = 'example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav'\n",
|
"audio_file = \"example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# the following lines are only needed in Colab\n",
|
"# the following lines are only needed in Colab\n",
|
||||||
"# alternatively you can upload your own file\n",
|
"# alternatively you can upload your own file\n",
|
||||||
"#from google.colab import files\n",
|
"# from google.colab import files\n",
|
||||||
"#uploaded = files.upload()\n",
|
"# uploaded = files.upload()\n",
|
||||||
"#audio_file = list(uploaded.keys())[0]"
|
"# audio_file = list(uploaded.keys())[0]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -144,13 +144,17 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# print summary info for the individual detections \n",
|
"# print summary info for the individual detections\n",
|
||||||
"print('Results for ' + results['pred_dict']['id'])\n",
|
"print(\"Results for \" + results[\"pred_dict\"][\"id\"])\n",
|
||||||
"print('{} calls detected\\n'.format(len(results['pred_dict']['annotation'])))\n",
|
"print(\"{} calls detected\\n\".format(len(results[\"pred_dict\"][\"annotation\"])))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print('time\\tprob\\tlfreq\\tspecies_name')\n",
|
"print(\"time\\tprob\\tlfreq\\tspecies_name\")\n",
|
||||||
"for ann in results['pred_dict']['annotation']:\n",
|
"for ann in results[\"pred_dict\"][\"annotation\"]:\n",
|
||||||
" print('{}\\t{}\\t{}\\t{}'.format(ann['start_time'], ann['class_prob'], ann['low_freq'], ann['class']))"
|
" print(\n",
|
||||||
|
" \"{}\\t{}\\t{}\\t{}\".format(\n",
|
||||||
|
" ann[\"start_time\"], ann[\"class_prob\"], ann[\"low_freq\"], ann[\"class\"]\n",
|
||||||
|
" )\n",
|
||||||
|
" )"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -174,10 +178,16 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# read the audio file \n",
|
"# read the audio file\n",
|
||||||
"sampling_rate, audio = au.load_audio_file(audio_file, args['time_expansion_factor'], params['target_samp_rate'], params['scale_raw_audio'], max_duration=max_duration)\n",
|
"sampling_rate, audio = au.load_audio_file(\n",
|
||||||
|
" audio_file,\n",
|
||||||
|
" args[\"time_expansion_factor\"],\n",
|
||||||
|
" params[\"target_samp_rate\"],\n",
|
||||||
|
" params[\"scale_raw_audio\"],\n",
|
||||||
|
" max_duration=max_duration,\n",
|
||||||
|
")\n",
|
||||||
"duration = audio.shape[0] / sampling_rate\n",
|
"duration = audio.shape[0] / sampling_rate\n",
|
||||||
"print('File duration: {} seconds'.format(duration))"
|
"print(\"File duration: {} seconds\".format(duration))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -210,12 +220,27 @@
|
|||||||
"# display the detections on top of the spectrogram\n",
|
"# display the detections on top of the spectrogram\n",
|
||||||
"# note, if the audio file is very long, this image will be very large - best to crop the audio first\n",
|
"# note, if the audio file is very long, this image will be very large - best to crop the audio first\n",
|
||||||
"start_time = 0.0\n",
|
"start_time = 0.0\n",
|
||||||
"detections = [ann for ann in results['pred_dict']['annotation']]\n",
|
"detections = [ann for ann in results[\"pred_dict\"][\"annotation\"]]\n",
|
||||||
"fig = plt.figure(1, figsize=(spec.shape[1]/100, spec.shape[0]/100), dpi=100, frameon=False)\n",
|
"fig = plt.figure(\n",
|
||||||
"spec_duration = au.x_coords_to_time(spec.shape[1], sampling_rate, params['fft_win_length'], params['fft_overlap'])\n",
|
" 1, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100, frameon=False\n",
|
||||||
"viz.create_box_image(spec, fig, detections, start_time, start_time+spec_duration, spec_duration, params, spec.max()*1.1, False, True)\n",
|
")\n",
|
||||||
"plt.ylabel('Freq - kHz')\n",
|
"spec_duration = au.x_coords_to_time(\n",
|
||||||
"plt.xlabel('Time - secs')\n",
|
" spec.shape[1], sampling_rate, params[\"fft_win_length\"], params[\"fft_overlap\"]\n",
|
||||||
|
")\n",
|
||||||
|
"viz.create_box_image(\n",
|
||||||
|
" spec,\n",
|
||||||
|
" fig,\n",
|
||||||
|
" detections,\n",
|
||||||
|
" start_time,\n",
|
||||||
|
" start_time + spec_duration,\n",
|
||||||
|
" spec_duration,\n",
|
||||||
|
" params,\n",
|
||||||
|
" spec.max() * 1.1,\n",
|
||||||
|
" False,\n",
|
||||||
|
" True,\n",
|
||||||
|
")\n",
|
||||||
|
"plt.ylabel(\"Freq - kHz\")\n",
|
||||||
|
"plt.xlabel(\"Time - secs\")\n",
|
||||||
"plt.title(os.path.basename(audio_file))\n",
|
"plt.title(os.path.basename(audio_file))\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
|
@ -23,9 +23,7 @@ def main(args):
|
|||||||
if args["save_preds_if_empty"] or (
|
if args["save_preds_if_empty"] or (
|
||||||
len(results["pred_dict"]["annotation"]) > 0
|
len(results["pred_dict"]["annotation"]) > 0
|
||||||
):
|
):
|
||||||
results_path = audio_file.replace(
|
results_path = audio_file.replace(args["audio_dir"], args["ann_dir"])
|
||||||
args["audio_dir"], args["ann_dir"]
|
|
||||||
)
|
|
||||||
du.save_results_to_file(results, results_path)
|
du.save_results_to_file(results, results_path)
|
||||||
except:
|
except:
|
||||||
error_files.append(audio_file)
|
error_files.append(audio_file)
|
||||||
@ -50,9 +48,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
print(info_str)
|
print(info_str)
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument("audio_dir", type=str, help="Input directory for audio")
|
||||||
"audio_dir", type=str, help="Input directory for audio"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"ann_dir",
|
"ann_dir",
|
||||||
type=str,
|
type=str,
|
||||||
|
@ -20,9 +20,7 @@ import bat_detect.utils.audio_utils as au
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument("audio_path", type=str, help="Input directory for audio")
|
||||||
"audio_path", type=str, help="Input directory for audio"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"op_dir",
|
"op_dir",
|
||||||
type=str,
|
type=str,
|
||||||
@ -33,9 +31,7 @@ if __name__ == "__main__":
|
|||||||
type=str,
|
type=str,
|
||||||
help="Path to where single annotation json file is stored",
|
help="Path to where single annotation json file is stored",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument("--uk_split", type=str, default="", help="Set as: diff or same")
|
||||||
"--uk_split", type=str, default="", help="Set as: diff or same"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--file_type",
|
"--file_type",
|
||||||
type=str,
|
type=str,
|
||||||
@ -67,9 +63,7 @@ if __name__ == "__main__":
|
|||||||
else:
|
else:
|
||||||
# load uk data - special case
|
# load uk data - special case
|
||||||
print("\nLoading:", args["uk_split"], "\n")
|
print("\nLoading:", args["uk_split"], "\n")
|
||||||
dataset_name = (
|
dataset_name = "uk_" + args["uk_split"] # should be uk_diff, or uk_same
|
||||||
"uk_" + args["uk_split"]
|
|
||||||
) # should be uk_diff, or uk_same
|
|
||||||
datasets, _ = ts.get_train_test_data(
|
datasets, _ = ts.get_train_test_data(
|
||||||
args["ann_file"],
|
args["ann_file"],
|
||||||
args["audio_path"],
|
args["audio_path"],
|
||||||
@ -90,9 +84,7 @@ if __name__ == "__main__":
|
|||||||
norm_type=params["norm_type"],
|
norm_type=params["norm_type"],
|
||||||
)
|
)
|
||||||
|
|
||||||
op_file_name = os.path.join(
|
op_file_name = os.path.join(args["op_dir"], dataset_name + "." + args["file_type"])
|
||||||
args["op_dir"], dataset_name + "." + args["file_type"]
|
|
||||||
)
|
|
||||||
vz.save_summary_image(
|
vz.save_summary_image(
|
||||||
x_train, y_train, class_names, params, op_file_name, class_names_order
|
x_train, y_train, class_names, params, op_file_name, class_names_order
|
||||||
)
|
)
|
||||||
|
@ -25,9 +25,7 @@ import bat_detect.utils.plot_utils as viz
|
|||||||
def filter_anns(anns, start_time, stop_time):
|
def filter_anns(anns, start_time, stop_time):
|
||||||
anns_op = []
|
anns_op = []
|
||||||
for aa in anns:
|
for aa in anns:
|
||||||
if (aa["start_time"] >= start_time) and (
|
if (aa["start_time"] >= start_time) and (aa["start_time"] < stop_time - 0.02):
|
||||||
aa["start_time"] < stop_time - 0.02
|
|
||||||
):
|
|
||||||
anns_op.append(aa)
|
anns_op.append(aa)
|
||||||
return anns_op
|
return anns_op
|
||||||
|
|
||||||
@ -132,14 +130,10 @@ if __name__ == "__main__":
|
|||||||
print("File duration: {} seconds".format(duration))
|
print("File duration: {} seconds".format(duration))
|
||||||
|
|
||||||
# create spec for viz
|
# create spec for viz
|
||||||
spec, _ = au.generate_spectrogram(
|
spec, _ = au.generate_spectrogram(audio, sampling_rate, params_bd, True, False)
|
||||||
audio, sampling_rate, params_bd, True, False
|
|
||||||
)
|
|
||||||
|
|
||||||
# run model and filter detections so only keep ones in relevant time range
|
# run model and filter detections so only keep ones in relevant time range
|
||||||
results = du.process_file(
|
results = du.process_file(args_cmd["audio_file"], model, params_bd, bd_args)
|
||||||
args_cmd["audio_file"], model, params_bd, bd_args
|
|
||||||
)
|
|
||||||
pred_anns = filter_anns(
|
pred_anns = filter_anns(
|
||||||
results["pred_dict"]["annotation"],
|
results["pred_dict"]["annotation"],
|
||||||
args_cmd["start_time"],
|
args_cmd["start_time"],
|
||||||
@ -159,9 +153,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
op_path_clean = os.path.join(args_cmd["op_dir"], op_path_clean)
|
op_path_clean = os.path.join(args_cmd["op_dir"], op_path_clean)
|
||||||
op_path_pred = (
|
op_path_pred = (
|
||||||
os.path.basename(args_cmd["audio_file"])[:-4]
|
os.path.basename(args_cmd["audio_file"])[:-4] + "_pred." + args_cmd["file_type"]
|
||||||
+ "_pred."
|
|
||||||
+ args_cmd["file_type"]
|
|
||||||
)
|
)
|
||||||
op_path_pred = os.path.join(args_cmd["op_dir"], op_path_pred)
|
op_path_pred = os.path.join(args_cmd["op_dir"], op_path_pred)
|
||||||
|
|
||||||
|
@ -26,12 +26,8 @@ import bat_detect.utils.plot_utils as viz
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument("audio_file", type=str, help="Path to input audio file")
|
||||||
"audio_file", type=str, help="Path to input audio file"
|
parser.add_argument("model_path", type=str, help="Path to trained BatDetect model")
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"model_path", type=str, help="Path to trained BatDetect model"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--op_dir",
|
"--op_dir",
|
||||||
type=str,
|
type=str,
|
||||||
@ -46,9 +42,7 @@ if __name__ == "__main__":
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Do not plot class names",
|
help="Do not plot class names",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument("--disable_axis", action="store_true", help="Do not plot axis")
|
||||||
"--disable_axis", action="store_true", help="Do not plot axis"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--detection_threshold",
|
"--detection_threshold",
|
||||||
type=float,
|
type=float,
|
||||||
@ -135,9 +129,7 @@ if __name__ == "__main__":
|
|||||||
detections.append(bb)
|
detections.append(bb)
|
||||||
|
|
||||||
# plot boxes
|
# plot boxes
|
||||||
fig = plt.figure(
|
fig = plt.figure(1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi)
|
||||||
1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi
|
|
||||||
)
|
|
||||||
duration = au.x_coords_to_time(
|
duration = au.x_coords_to_time(
|
||||||
spec.shape[1],
|
spec.shape[1],
|
||||||
sampling_rate,
|
sampling_rate,
|
||||||
@ -196,9 +188,7 @@ if __name__ == "__main__":
|
|||||||
if ii > 0:
|
if ii > 0:
|
||||||
spec_op[:, int(col), :] = 1.0
|
spec_op[:, int(col), :] = 1.0
|
||||||
if reveal_boxes:
|
if reveal_boxes:
|
||||||
spec_op[:, int(col) + 1 :, :] = spec_blank[
|
spec_op[:, int(col) + 1 :, :] = spec_blank[:, int(col) + 1 :, :]
|
||||||
:, int(col) + 1 :, :
|
|
||||||
]
|
|
||||||
elif ii == 0 and reveal_boxes:
|
elif ii == 0 and reveal_boxes:
|
||||||
spec_op = spec_blank
|
spec_op = spec_blank
|
||||||
|
|
||||||
|
@ -23,9 +23,7 @@ def generate_spectrogram_data(
|
|||||||
# spec = au.gen_mag_spectrogram_pt(audio, sampling_rate, params['fft_win_length'], params['fft_overlap']).numpy()
|
# spec = au.gen_mag_spectrogram_pt(audio, sampling_rate, params['fft_win_length'], params['fft_overlap']).numpy()
|
||||||
if spec.shape[0] < max_freq:
|
if spec.shape[0] < max_freq:
|
||||||
freq_pad = max_freq - spec.shape[0]
|
freq_pad = max_freq - spec.shape[0]
|
||||||
spec = np.vstack(
|
spec = np.vstack((np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec))
|
||||||
(np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec)
|
|
||||||
)
|
|
||||||
spec = spec[-max_freq : spec.shape[0] - min_freq, :]
|
spec = spec[-max_freq : spec.shape[0] - min_freq, :]
|
||||||
|
|
||||||
if norm_type == "log":
|
if norm_type == "log":
|
||||||
@ -35,11 +33,7 @@ def generate_spectrogram_data(
|
|||||||
* (
|
* (
|
||||||
1.0
|
1.0
|
||||||
/ (
|
/ (
|
||||||
np.abs(
|
np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate)))
|
||||||
np.hanning(
|
|
||||||
int(params["fft_win_length"] * sampling_rate)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
** 2
|
** 2
|
||||||
).sum()
|
).sum()
|
||||||
)
|
)
|
||||||
@ -112,9 +106,7 @@ def load_data(
|
|||||||
max_samps = params["spec_width"] * (nfft - noverlap) + noverlap
|
max_samps = params["spec_width"] * (nfft - noverlap) + noverlap
|
||||||
|
|
||||||
if max_samps > audio.shape[0]:
|
if max_samps > audio.shape[0]:
|
||||||
audio = np.hstack(
|
audio = np.hstack((audio, np.zeros(max_samps - audio.shape[0])))
|
||||||
(audio, np.zeros(max_samps - audio.shape[0]))
|
|
||||||
)
|
|
||||||
audio = audio[:max_samps].astype(np.float32)
|
audio = audio[:max_samps].astype(np.float32)
|
||||||
|
|
||||||
audio = au.pad_audio(
|
audio = au.pad_audio(
|
||||||
@ -147,9 +139,7 @@ def load_data(
|
|||||||
params["fft_overlap"],
|
params["fft_overlap"],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
y1 = (ann["low_freq"] - params["min_freq"]) * params[
|
y1 = (ann["low_freq"] - params["min_freq"]) * params["fft_win_length"]
|
||||||
"fft_win_length"
|
|
||||||
]
|
|
||||||
coords.append((y1, x1))
|
coords.append((y1, x1))
|
||||||
|
|
||||||
_, file_ids = np.unique(file_names, return_inverse=True)
|
_, file_ids = np.unique(file_names, return_inverse=True)
|
||||||
@ -215,9 +205,7 @@ def save_summary_image(
|
|||||||
)
|
)
|
||||||
col.grid(color="w", alpha=0.3, linewidth=0.3)
|
col.grid(color="w", alpha=0.3, linewidth=0.3)
|
||||||
col.set_xticks([])
|
col.set_xticks([])
|
||||||
col.title.set_text(
|
col.title.set_text(str(ii + 1) + " " + species_names[order[ii]])
|
||||||
str(ii + 1) + " " + species_names[order[ii]]
|
|
||||||
)
|
|
||||||
col.tick_params(axis="both", which="major", labelsize=7)
|
col.tick_params(axis="both", which="major", labelsize=7)
|
||||||
ii += 1
|
ii += 1
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user