formatted with black and isort

This commit is contained in:
Santiago Martinez 2023-02-22 15:06:02 +00:00
parent eaecf7ba45
commit 53100f51e0
26 changed files with 461 additions and 165 deletions

4
app.py
View File

@ -82,7 +82,9 @@ def generate_results_image(audio_file, anns):
duration = audio.shape[0] / sampling_rate duration = audio.shape[0] / sampling_rate
# generate spec # generate spec
spec, spec_viz = au.generate_spectrogram(audio, sampling_rate, params, True, False) spec, spec_viz = au.generate_spectrogram(
audio, sampling_rate, params, True, False
)
# create fig # create fig
plt.close("all") plt.close("all")

View File

@ -99,7 +99,9 @@ def main():
if args["save_preds_if_empty"] or ( if args["save_preds_if_empty"] or (
len(results["pred_dict"]["annotation"]) > 0 len(results["pred_dict"]["annotation"]) > 0
): ):
results_path = audio_file.replace(args["audio_dir"], args["ann_dir"]) results_path = audio_file.replace(
args["audio_dir"], args["ann_dir"]
)
du.save_results_to_file(results, results_path) du.save_results_to_file(results, results_path)
except: except:
error_files.append(audio_file) error_files.append(audio_file)

View File

@ -3,7 +3,9 @@ import numpy as np
def convert_int_to_freq(spec_ind, spec_height, min_freq, max_freq): def convert_int_to_freq(spec_ind, spec_height, min_freq, max_freq):
spec_ind = spec_height - spec_ind spec_ind = spec_height - spec_ind
return round((spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2) return round(
(spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2
)
def extract_spec_slices(spec, pred_nms, params): def extract_spec_slices(spec, pred_nms, params):
@ -25,7 +27,9 @@ def extract_spec_slices(spec, pred_nms, params):
for ff in range(len(pred_nms["det_probs"])): for ff in range(len(pred_nms["det_probs"])):
x_start = int(np.maximum(0, x_pos_pad[ff])) x_start = int(np.maximum(0, x_pos_pad[ff]))
x_end = int( x_end = int(
np.minimum(spec.shape[1] - 1, np.round(x_pos_pad[ff] + bb_width_pad[ff])) np.minimum(
spec.shape[1] - 1, np.round(x_pos_pad[ff] + bb_width_pad[ff])
)
) )
slices.append(spec[:, x_start:x_end].astype(np.float16)) slices.append(spec[:, x_start:x_end].astype(np.float16))
return slices return slices
@ -62,11 +66,15 @@ def get_feats(spec, pred_nms, params):
feature_names = get_feature_names() feature_names = get_feature_names()
num_detections = len(pred_nms["det_probs"]) num_detections = len(pred_nms["det_probs"])
features = np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1 features = (
np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1
)
for ff in range(num_detections): for ff in range(num_detections):
x_start = int(np.maximum(0, x_pos[ff])) x_start = int(np.maximum(0, x_pos[ff]))
x_end = int(np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff]))) x_end = int(
np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff]))
)
# y low is the lowest freq but it will have a higher value due to array starting at 0 at top # y low is the lowest freq but it will have a higher value due to array starting at 0 at top
y_low = int(np.minimum(spec.shape[0] - 1, y_pos[ff])) y_low = int(np.minimum(spec.shape[0] - 1, y_pos[ff]))
y_high = int(np.maximum(0, np.round(y_pos[ff] - bb_height[ff]))) y_high = int(np.maximum(0, np.round(y_pos[ff] - bb_height[ff])))
@ -118,7 +126,8 @@ def get_feats(spec, pred_nms, params):
if ff > 0: if ff > 0:
features[ff, 8] = round( features[ff, 8] = round(
pred_nms["start_times"][ff] - pred_nms["start_times"][ff - 1], pred_nms["start_times"][ff]
- pred_nms["start_times"][ff - 1],
5, 5,
) )

View File

@ -1,7 +1,6 @@
import torch import torch
from torch import nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn
__all__ = [ __all__ = [
"SelfAttention", "SelfAttention",
@ -26,18 +25,22 @@ class SelfAttention(nn.Module):
def forward(self, x): def forward(self, x):
x = x.squeeze(2).permute(0, 2, 1) x = x.squeeze(2).permute(0, 2, 1)
kk = torch.matmul(x, self.key_fun.weight.T) + self.key_fun.bias.unsqueeze( kk = torch.matmul(
0 x, self.key_fun.weight.T
).unsqueeze(0) ) + self.key_fun.bias.unsqueeze(0).unsqueeze(0)
qq = torch.matmul(x, self.que_fun.weight.T) + self.que_fun.bias.unsqueeze( qq = torch.matmul(
0 x, self.que_fun.weight.T
).unsqueeze(0) ) + self.que_fun.bias.unsqueeze(0).unsqueeze(0)
vv = torch.matmul(x, self.val_fun.weight.T) + self.val_fun.bias.unsqueeze( vv = torch.matmul(
0 x, self.val_fun.weight.T
).unsqueeze(0) ) + self.val_fun.bias.unsqueeze(0).unsqueeze(0)
kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / (self.temperature * self.att_dim) kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / (
att_weights = F.softmax(kk_qq, 1) # each col of each attention matrix sums to 1 self.temperature * self.att_dim
)
att_weights = F.softmax(
kk_qq, 1
) # each col of each attention matrix sums to 1
att = torch.bmm(vv.permute(0, 2, 1), att_weights) att = torch.bmm(vv.permute(0, 2, 1), att_weights)
op = torch.matmul( op = torch.matmul(
@ -49,7 +52,9 @@ class SelfAttention(nn.Module):
class ConvBlockDownCoordF(nn.Module): class ConvBlockDownCoordF(nn.Module):
def __init__(self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1): def __init__(
self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1
):
super(ConvBlockDownCoordF, self).__init__() super(ConvBlockDownCoordF, self).__init__()
self.coords = nn.Parameter( self.coords = nn.Parameter(
torch.linspace(-1, 1, ip_height)[None, None, ..., None], torch.linspace(-1, 1, ip_height)[None, None, ..., None],
@ -73,7 +78,9 @@ class ConvBlockDownCoordF(nn.Module):
class ConvBlockDownStandard(nn.Module): class ConvBlockDownStandard(nn.Module):
def __init__(self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1): def __init__(
self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1
):
super(ConvBlockDownStandard, self).__init__() super(ConvBlockDownStandard, self).__init__()
self.conv = nn.Conv2d( self.conv = nn.Conv2d(
in_chn, in_chn,
@ -105,10 +112,14 @@ class ConvBlockUpF(nn.Module):
self.up_scale = up_scale self.up_scale = up_scale
self.up_mode = up_mode self.up_mode = up_mode
self.coords = nn.Parameter( self.coords = nn.Parameter(
torch.linspace(-1, 1, ip_height * up_scale[0])[None, None, ..., None], torch.linspace(-1, 1, ip_height * up_scale[0])[
None, None, ..., None
],
requires_grad=False, requires_grad=False,
) )
self.conv = nn.Conv2d(in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size) self.conv = nn.Conv2d(
in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size
)
self.conv_bn = nn.BatchNorm2d(out_chn) self.conv_bn = nn.BatchNorm2d(out_chn)
def forward(self, x): def forward(self, x):
@ -142,7 +153,9 @@ class ConvBlockUpStandard(nn.Module):
super(ConvBlockUpStandard, self).__init__() super(ConvBlockUpStandard, self).__init__()
self.up_scale = up_scale self.up_scale = up_scale
self.up_mode = up_mode self.up_mode = up_mode
self.conv = nn.Conv2d(in_chn, out_chn, kernel_size=k_size, padding=pad_size) self.conv = nn.Conv2d(
in_chn, out_chn, kernel_size=k_size, padding=pad_size
)
self.conv_bn = nn.BatchNorm2d(out_chn) self.conv_bn = nn.BatchNorm2d(out_chn)
def forward(self, x): def forward(self, x):

View File

@ -4,14 +4,13 @@ import torch.nn.functional as F
from torch import nn from torch import nn
from .model_helpers import ( from .model_helpers import (
SelfAttention,
ConvBlockDownCoordF, ConvBlockDownCoordF,
ConvBlockDownStandard, ConvBlockDownStandard,
ConvBlockUpF, ConvBlockUpF,
ConvBlockUpStandard, ConvBlockUpStandard,
SelfAttention,
) )
__all__ = [ __all__ = [
"Net2DFast", "Net2DFast",
"Net2DFastNoAttn", "Net2DFastNoAttn",
@ -91,13 +90,17 @@ class Net2DFast(nn.Module):
num_filts // 4, num_filts // 4, kernel_size=3, padding=1 num_filts // 4, num_filts // 4, kernel_size=3, padding=1
) )
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) self.conv_size_op = nn.Conv2d(
num_filts // 4, 2, kernel_size=1, padding=0
)
self.conv_classes_op = nn.Conv2d( self.conv_classes_op = nn.Conv2d(
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
) )
if self.emb_dim > 0: if self.emb_dim > 0:
self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) self.conv_emb = nn.Conv2d(
num_filts, self.emb_dim, kernel_size=1, padding=0
)
def forward(self, ip, return_feats=False): def forward(self, ip, return_feats=False):
@ -204,13 +207,17 @@ class Net2DFastNoAttn(nn.Module):
num_filts // 4, num_filts // 4, kernel_size=3, padding=1 num_filts // 4, num_filts // 4, kernel_size=3, padding=1
) )
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) self.conv_size_op = nn.Conv2d(
num_filts // 4, 2, kernel_size=1, padding=0
)
self.conv_classes_op = nn.Conv2d( self.conv_classes_op = nn.Conv2d(
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
) )
if self.emb_dim > 0: if self.emb_dim > 0:
self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) self.conv_emb = nn.Conv2d(
num_filts, self.emb_dim, kernel_size=1, padding=0
)
def forward(self, ip, return_feats=False): def forward(self, ip, return_feats=False):
@ -314,13 +321,17 @@ class Net2DFastNoCoordConv(nn.Module):
num_filts // 4, num_filts // 4, kernel_size=3, padding=1 num_filts // 4, num_filts // 4, kernel_size=3, padding=1
) )
self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4)
self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) self.conv_size_op = nn.Conv2d(
num_filts // 4, 2, kernel_size=1, padding=0
)
self.conv_classes_op = nn.Conv2d( self.conv_classes_op = nn.Conv2d(
num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0
) )
if self.emb_dim > 0: if self.emb_dim > 0:
self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) self.conv_emb = nn.Conv2d(
num_filts, self.emb_dim, kernel_size=1, padding=0
)
def forward(self, ip, return_feats=False): def forward(self, ip, return_feats=False):

View File

@ -22,7 +22,9 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
params["experiment"] = os.path.join(exps_dir, now_str, "") params["experiment"] = os.path.join(exps_dir, now_str, "")
params["model_file_name"] = os.path.join(params["experiment"], model_name) params["model_file_name"] = os.path.join(params["experiment"], model_name)
params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "") params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "")
params["op_im_dir_test"] = os.path.join(params["experiment"], "op_ims_test", "") params["op_im_dir_test"] = os.path.join(
params["experiment"], "op_ims_test", ""
)
# params['notes'] = '' # can save notes about an experiment here # params['notes'] = '' # can save notes about an experiment here
# spec parameters # spec parameters
@ -34,7 +36,9 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
) # in milliseconds, amount of time per stft time step ) # in milliseconds, amount of time per stft time step
params["fft_overlap"] = 0.75 # stft window overlap params["fft_overlap"] = 0.75 # stft window overlap
params["max_freq"] = 120000 # in Hz, everything above this will be discarded params[
"max_freq"
] = 120000 # in Hz, everything above this will be discarded
params["min_freq"] = 10000 # in Hz, everything below this will be discarded params["min_freq"] = 10000 # in Hz, everything below this will be discarded
params[ params[
@ -51,9 +55,13 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
] = 32 # spectrogram should be divisible by this amount in width and height ] = 32 # spectrogram should be divisible by this amount in width and height
# spec processing params # spec processing params
params["denoise_spec_avg"] = True # removes the mean for each frequency band params[
"denoise_spec_avg"
] = True # removes the mean for each frequency band
params["scale_raw_audio"] = False # scales the raw audio to [-1, 1] params["scale_raw_audio"] = False # scales the raw audio to [-1, 1]
params["max_scale_spec"] = False # scales the spectrogram so that it is max 1 params[
"max_scale_spec"
] = False # scales the spectrogram so that it is max 1
params["spec_scale"] = "pcen" # 'log', 'pcen', 'none' params["spec_scale"] = "pcen" # 'log', 'pcen', 'none'
# detection params # detection params
@ -73,13 +81,21 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
params["target_sigma"] = 2.0 params["target_sigma"] = 2.0
# augmentation params # augmentation params
params["aug_prob"] = 0.20 # augmentations will be performed with this probability params[
"aug_prob"
] = 0.20 # augmentations will be performed with this probability
params["augment_at_train"] = True params["augment_at_train"] = True
params["augment_at_train_combine"] = True params["augment_at_train_combine"] = True
params["echo_max_delay"] = 0.005 # simulate echo by adding copy of raw audio params[
"echo_max_delay"
] = 0.005 # simulate echo by adding copy of raw audio
params["stretch_squeeze_delta"] = 0.04 # stretch or squeeze spec params["stretch_squeeze_delta"] = 0.04 # stretch or squeeze spec
params["mask_max_time_perc"] = 0.05 # max mask size - here percentage, not ideal params[
params["mask_max_freq_perc"] = 0.10 # max mask size - here percentage, not ideal "mask_max_time_perc"
] = 0.05 # max mask size - here percentage, not ideal
params[
"mask_max_freq_perc"
] = 0.10 # max mask size - here percentage, not ideal
params[ params[
"spec_amp_scaling" "spec_amp_scaling"
] = 2.0 # multiply the "volume" by 0:X times current amount ] = 2.0 # multiply the "volume" by 0:X times current amount
@ -100,7 +116,9 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"):
params["class_loss_weight"] = 2.0 # weight for the classification loss params["class_loss_weight"] = 2.0 # weight for the classification loss
params["individual_loss_weight"] = 0.0 # not used params["individual_loss_weight"] = 0.0 # not used
if params["individual_loss_weight"] == 0.0: if params["individual_loss_weight"] == 0.0:
params["emb_dim"] = 0 # number of dimensions used for individual id embedding params[
"emb_dim"
] = 0 # number of dimensions used for individual id embedding
else: else:
params["emb_dim"] = 3 params["emb_dim"] = 3

View File

@ -24,7 +24,9 @@ def run_nms(outputs, params, sampling_rate):
pred_size = outputs["pred_size"] # box size pred_size = outputs["pred_size"] # box size
pred_det_nms = non_max_suppression(pred_det, params["nms_kernel_size"]) pred_det_nms = non_max_suppression(pred_det, params["nms_kernel_size"])
freq_rescale = (params["max_freq"] - params["min_freq"]) / pred_det.shape[-2] freq_rescale = (params["max_freq"] - params["min_freq"]) / pred_det.shape[
-2
]
# NOTE there will be small differences depending on which sampling rate is chosen # NOTE there will be small differences depending on which sampling rate is chosen
# as we are choosing the same sampling rate for the entire batch # as we are choosing the same sampling rate for the entire batch
@ -60,7 +62,8 @@ def run_nms(outputs, params, sampling_rate):
params["fft_overlap"], params["fft_overlap"],
) )
pred["end_times"] = x_coords_to_time( pred["end_times"] = x_coords_to_time(
(pred["x_pos"].float() + pred["bb_width"]) / params["resize_factor"], (pred["x_pos"].float() + pred["bb_width"])
/ params["resize_factor"],
sampling_rate[ii].item(), sampling_rate[ii].item(),
params["fft_win_length"], params["fft_win_length"],
params["fft_overlap"], params["fft_overlap"],
@ -68,7 +71,9 @@ def run_nms(outputs, params, sampling_rate):
pred["low_freqs"] = ( pred["low_freqs"] = (
pred_size[ii].shape[1] - pred["y_pos"].float() pred_size[ii].shape[1] - pred["y_pos"].float()
) * freq_rescale + params["min_freq"] ) * freq_rescale + params["min_freq"]
pred["high_freqs"] = pred["low_freqs"] + pred["bb_height"] * freq_rescale pred["high_freqs"] = (
pred["low_freqs"] + pred["bb_height"] * freq_rescale
)
# extract the per class votes # extract the per class votes
if "pred_class" in outputs: if "pred_class" in outputs:

View File

@ -207,7 +207,9 @@ def load_sonobat_preds(dataset, id, sb_meta, set_class_name=None):
ann_c["class"] = file_res[id]["species_1"] ann_c["class"] = file_res[id]["species_1"]
else: else:
ann_c["class"] = set_class_name ann_c["class"] = set_class_name
ann_c["start_time"] = np.round(da_c.iloc[aa]["TimeInFile"] / 1000.0, 5) ann_c["start_time"] = np.round(
da_c.iloc[aa]["TimeInFile"] / 1000.0, 5
)
ann_c["end_time"] = np.round( ann_c["end_time"] = np.round(
ann_c["start_time"] + da_c.iloc[aa]["CallDuration"] / 1000.0, 5 ann_c["start_time"] + da_c.iloc[aa]["CallDuration"] / 1000.0, 5
) )
@ -265,7 +267,9 @@ def assign_to_gt(gt, pred, iou_thresh):
iou_m = np.zeros((num_preds, num_gts)) iou_m = np.zeros((num_preds, num_gts))
for ii in range(num_preds): for ii in range(num_preds):
for jj in range(num_gts): for jj in range(num_gts):
iou_m[ii, jj] = bb_overlap(gt["annotation"][jj], pred["annotation"][ii]) iou_m[ii, jj] = bb_overlap(
gt["annotation"][jj], pred["annotation"][ii]
)
# greedily assign detections to ground truths # greedily assign detections to ground truths
# needs to be greater than some threshold and we cannot assign GT # needs to be greater than some threshold and we cannot assign GT
@ -274,7 +278,9 @@ def assign_to_gt(gt, pred, iou_thresh):
for jj in range(num_gts): for jj in range(num_gts):
max_iou = np.argmax(iou_m[:, jj]) max_iou = np.argmax(iou_m[:, jj])
if iou_m[max_iou, jj] > iou_thresh: if iou_m[max_iou, jj] > iou_thresh:
pred["annotation"][max_iou]["class"] = gt["annotation"][jj]["class"] pred["annotation"][max_iou]["class"] = gt["annotation"][jj][
"class"
]
iou_m[max_iou, :] = -1.0 iou_m[max_iou, :] = -1.0
return pred return pred
@ -284,17 +290,25 @@ def parse_data(data, class_names, non_event_classes, is_pred=False):
class_names_all = class_names + non_event_classes class_names_all = class_names + non_event_classes
data["class_names"] = np.array([aa["class"] for aa in data["annotation"]]) data["class_names"] = np.array([aa["class"] for aa in data["annotation"]])
data["start_times"] = np.array([aa["start_time"] for aa in data["annotation"]]) data["start_times"] = np.array(
[aa["start_time"] for aa in data["annotation"]]
)
data["end_times"] = np.array([aa["end_time"] for aa in data["annotation"]]) data["end_times"] = np.array([aa["end_time"] for aa in data["annotation"]])
data["high_freqs"] = np.array([float(aa["high_freq"]) for aa in data["annotation"]]) data["high_freqs"] = np.array(
data["low_freqs"] = np.array([float(aa["low_freq"]) for aa in data["annotation"]]) [float(aa["high_freq"]) for aa in data["annotation"]]
)
data["low_freqs"] = np.array(
[float(aa["low_freq"]) for aa in data["annotation"]]
)
if is_pred: if is_pred:
# when loading predictions # when loading predictions
data["det_probs"] = np.array( data["det_probs"] = np.array(
[float(aa["det_prob"]) for aa in data["annotation"]] [float(aa["det_prob"]) for aa in data["annotation"]]
) )
data["class_probs"] = np.zeros((len(class_names) + 1, len(data["annotation"]))) data["class_probs"] = np.zeros(
(len(class_names) + 1, len(data["annotation"]))
)
data["class_ids"] = np.array( data["class_ids"] = np.array(
[class_names_all.index(aa["class"]) for aa in data["annotation"]] [class_names_all.index(aa["class"]) for aa in data["annotation"]]
).astype(np.int32) ).astype(np.int32)
@ -320,7 +334,8 @@ def load_gt_data(datasets, events_of_interest, class_names, classes_to_ignore):
[dd], events_of_interest=events_of_interest, verbose=True [dd], events_of_interest=events_of_interest, verbose=True
) )
gt_dataset = [ gt_dataset = [
parse_data(gg, class_names, classes_to_ignore, False) for gg in gt_dataset parse_data(gg, class_names, classes_to_ignore, False)
for gg in gt_dataset
] ]
for gt in gt_dataset: for gt in gt_dataset:
@ -356,7 +371,9 @@ def eval_rf_model(clf, pred, un_train_class, num_classes):
# stores the prediction in place # stores the prediction in place
if pred["feats"].shape[0] > 0: if pred["feats"].shape[0] > 0:
pred["class_probs"] = np.zeros((num_classes, pred["feats"].shape[0])) pred["class_probs"] = np.zeros((num_classes, pred["feats"].shape[0]))
pred["class_probs"][un_train_class, :] = clf.predict_proba(pred["feats"]).T pred["class_probs"][un_train_class, :] = clf.predict_proba(
pred["feats"]
).T
pred["det_probs"] = pred["class_probs"][:-1, :].sum(0) pred["det_probs"] = pred["class_probs"][:-1, :].sum(0)
else: else:
pred["class_probs"] = np.zeros((num_classes, 0)) pred["class_probs"] = np.zeros((num_classes, 0))
@ -457,8 +474,12 @@ if __name__ == "__main__":
help="Output directory for plots", help="Output directory for plots",
) )
parser.add_argument("data_dir", type=str, help="Path to root of datasets") parser.add_argument("data_dir", type=str, help="Path to root of datasets")
parser.add_argument("ann_dir", type=str, help="Path to extracted annotations") parser.add_argument(
parser.add_argument("bd_model_path", type=str, help="Path to BatDetect model") "ann_dir", type=str, help="Path to extracted annotations"
)
parser.add_argument(
"bd_model_path", type=str, help="Path to BatDetect model"
)
parser.add_argument( parser.add_argument(
"--test_file", "--test_file",
type=str, type=str,
@ -498,7 +519,9 @@ if __name__ == "__main__":
default="", default="",
help="Text to add as title of plots", help="Text to add as title of plots",
) )
parser.add_argument("--rand_seed", type=int, default=2001, help="Random seed") parser.add_argument(
"--rand_seed", type=int, default=2001, help="Random seed"
)
args = vars(parser.parse_args()) args = vars(parser.parse_args())
np.random.seed(args["rand_seed"]) np.random.seed(args["rand_seed"])
@ -582,7 +605,9 @@ if __name__ == "__main__":
for ii, gt in enumerate(gt_test): for ii, gt in enumerate(gt_test):
sb_pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta) sb_pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta)
if sb_pred["class_name"] != "": if sb_pred["class_name"] != "":
sb_pred = parse_data(sb_pred, class_names, classes_to_ignore, True) sb_pred = parse_data(
sb_pred, class_names, classes_to_ignore, True
)
sb_pred["class_probs"][ sb_pred["class_probs"][
sb_pred["class_ids"], sb_pred["class_ids"],
np.arange(sb_pred["class_probs"].shape[1]), np.arange(sb_pred["class_probs"].shape[1]),
@ -617,7 +642,9 @@ if __name__ == "__main__":
x_train = [] x_train = []
y_train = [] y_train = []
for gt in gt_train: for gt in gt_train:
pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta, "Not Bat") pred = load_sonobat_preds(
gt["dataset_name"], gt["id"], sb_meta, "Not Bat"
)
if len(pred["annotation"]) > 0: if len(pred["annotation"]) > 0:
# compute detection overlap with ground truth to determine which are the TP detections # compute detection overlap with ground truth to determine which are the TP detections
@ -634,7 +661,9 @@ if __name__ == "__main__":
# run the model on the test set # run the model on the test set
preds_sb_rf = [] preds_sb_rf = []
for gt in gt_test: for gt in gt_test:
pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta, "Not Bat") pred = load_sonobat_preds(
gt["dataset_name"], gt["id"], sb_meta, "Not Bat"
)
pred = parse_data(pred, class_names, classes_to_ignore, True) pred = parse_data(pred, class_names, classes_to_ignore, True)
pred = eval_rf_model(clf_sb, pred, un_train_class, num_classes) pred = eval_rf_model(clf_sb, pred, un_train_class, num_classes)
preds_sb_rf.append(pred) preds_sb_rf.append(pred)
@ -666,7 +695,9 @@ if __name__ == "__main__":
x_train = [] x_train = []
y_train = [] y_train = []
for gt in gt_train: for gt in gt_train:
pred = load_tadarida_pred(args["td_ip_dir"], gt["dataset_name"], gt["id"]) pred = load_tadarida_pred(
args["td_ip_dir"], gt["dataset_name"], gt["id"]
)
# compute detection overlap with ground truth to determine which are the TP detections # compute detection overlap with ground truth to determine which are the TP detections
assign_to_gt(gt, pred, args["iou_thresh"]) assign_to_gt(gt, pred, args["iou_thresh"])
pred = parse_data(pred, class_names, classes_to_ignore, True) pred = parse_data(pred, class_names, classes_to_ignore, True)
@ -681,7 +712,9 @@ if __name__ == "__main__":
# run the model on the test set # run the model on the test set
preds_td = [] preds_td = []
for gt in gt_test: for gt in gt_test:
pred = load_tadarida_pred(args["td_ip_dir"], gt["dataset_name"], gt["id"]) pred = load_tadarida_pred(
args["td_ip_dir"], gt["dataset_name"], gt["id"]
)
pred = parse_data(pred, class_names, classes_to_ignore, True) pred = parse_data(pred, class_names, classes_to_ignore, True)
pred = eval_rf_model(clf_td, pred, un_train_class, num_classes) pred = eval_rf_model(clf_td, pred, un_train_class, num_classes)
preds_td.append(pred) preds_td.append(pred)

View File

@ -28,7 +28,9 @@ if __name__ == "__main__":
print(info_str) print(info_str)
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("audio_path", type=str, help="Input directory for audio") parser.add_argument(
"audio_path", type=str, help="Input directory for audio"
)
parser.add_argument( parser.add_argument(
"train_ann_path", "train_ann_path",
type=str, type=str,
@ -78,7 +80,9 @@ if __name__ == "__main__":
params["device"] = "cuda" params["device"] = "cuda"
else: else:
params["device"] = "cpu" params["device"] = "cpu"
print("\nNote, this will be a lot faster if you use computer with a GPU.\n") print(
"\nNote, this will be a lot faster if you use computer with a GPU.\n"
)
print("\nAudio directory: " + args["audio_path"]) print("\nAudio directory: " + args["audio_path"])
print("Train file: " + args["train_ann_path"]) print("Train file: " + args["train_ann_path"])
@ -129,13 +133,17 @@ if __name__ == "__main__":
data_train, data_train,
params["class_names"], params["class_names"],
params["class_inv_freq"], params["class_inv_freq"],
) = tu.load_set_of_anns(train_sets, classes_to_ignore, params["events_of_interest"]) ) = tu.load_set_of_anns(
train_sets, classes_to_ignore, params["events_of_interest"]
)
print("Number of files", len(data_train)) print("Number of files", len(data_train))
params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping( params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping(
params["class_names"] params["class_names"]
) )
params["class_names_short"] = tu.get_short_class_names(params["class_names"]) params["class_names_short"] = tu.get_short_class_names(
params["class_names"]
)
# load test annotations # load test annotations
test_sets = [] test_sets = []
@ -218,7 +226,9 @@ if __name__ == "__main__":
param.requires_grad = False param.requires_grad = False
optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"]) optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
scheduler = CosineAnnealingLR(optimizer, params["num_epochs"] * len(train_loader)) scheduler = CosineAnnealingLR(
optimizer, params["num_epochs"] * len(train_loader)
)
if params["train_loss"] == "mse": if params["train_loss"] == "mse":
det_criterion = losses.mse_loss det_criterion = losses.mse_loss
elif params["train_loss"] == "focal": elif params["train_loss"] == "focal":
@ -293,7 +303,9 @@ if __name__ == "__main__":
test_plt_class.update_and_save( test_plt_class.update_and_save(
epoch, [rs["avg_prec"] for rs in test_res["class_pr"]] epoch, [rs["avg_prec"] for rs in test_res["class_pr"]]
) )
pu.plot_pr_curve_class(params["experiment"], "test_pr", "test_pr", test_res) pu.plot_pr_curve_class(
params["experiment"], "test_pr", "test_pr", test_res
)
# save finetuned model # save finetuned model
print("saving model to: " + params["model_file_name"]) print("saving model to: " + params["model_file_name"])

View File

@ -58,7 +58,9 @@ if __name__ == "__main__":
print(info_str) print(info_str)
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("dataset_name", type=str, help="Name to call your dataset") parser.add_argument(
"dataset_name", type=str, help="Name to call your dataset"
)
parser.add_argument("audio_dir", type=str, help="Input directory for audio") parser.add_argument("audio_dir", type=str, help="Input directory for audio")
parser.add_argument( parser.add_argument(
"ann_dir", "ann_dir",
@ -147,10 +149,14 @@ if __name__ == "__main__":
test_files = load_file_names(args["test_file"]) test_files = load_file_names(args["test_file"])
file_names_all = [dd["id"] for dd in data_all] file_names_all = [dd["id"] for dd in data_all]
train_inds = [ train_inds = [
file_names_all.index(ff) for ff in train_files if ff in file_names_all file_names_all.index(ff)
for ff in train_files
if ff in file_names_all
] ]
test_inds = [ test_inds = [
file_names_all.index(ff) for ff in test_files if ff in file_names_all file_names_all.index(ff)
for ff in test_files
if ff in file_names_all
] ]
else: else:

View File

@ -73,7 +73,9 @@ def generate_gt_heatmaps(spec_op_shape, sampling_rate, ann, params):
y_2d_det = np.zeros((1, op_height, op_width), dtype=np.float32) y_2d_det = np.zeros((1, op_height, op_width), dtype=np.float32)
y_2d_size = np.zeros((2, op_height, op_width), dtype=np.float32) y_2d_size = np.zeros((2, op_height, op_width), dtype=np.float32)
# num classes and "background" class # num classes and "background" class
y_2d_classes = np.zeros((num_classes + 1, op_height, op_width), dtype=np.float32) y_2d_classes = np.zeros(
(num_classes + 1, op_height, op_width), dtype=np.float32
)
# create 2D ground truth heatmaps # create 2D ground truth heatmaps
for ii in valid_inds: for ii in valid_inds:
@ -126,7 +128,8 @@ def draw_gaussian(heatmap, center, sigmax, sigmay=None):
x0 = y0 = size // 2 x0 = y0 = size // 2
# g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) # g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
g = np.exp( g = np.exp(
-((x - x0) ** 2) / (2 * sigmax**2) - ((y - y0) ** 2) / (2 * sigmay**2) -((x - x0) ** 2) / (2 * sigmax**2)
- ((y - y0) ** 2) / (2 * sigmay**2)
) )
g_x = max(0, -ul[0]), min(br[0], h) - ul[0] g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
g_y = max(0, -ul[1]), min(br[1], w) - ul[1] g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
@ -307,7 +310,9 @@ class AudioLoader(torch.utils.data.Dataset):
# convert class name into class label # convert class name into class label
if aa["class"] in self.params["class_names"]: if aa["class"] in self.params["class_names"]:
aa["class_id"] = self.params["class_names"].index(aa["class"]) aa["class_id"] = self.params["class_names"].index(
aa["class"]
)
else: else:
aa["class_id"] = -1 aa["class_id"] = -1
@ -315,8 +320,12 @@ class AudioLoader(torch.utils.data.Dataset):
filtered_annotations.append(aa) filtered_annotations.append(aa)
dd["annotation"] = filtered_annotations dd["annotation"] = filtered_annotations
dd["start_times"] = np.array([aa["start_time"] for aa in dd["annotation"]]) dd["start_times"] = np.array(
dd["end_times"] = np.array([aa["end_time"] for aa in dd["annotation"]]) [aa["start_time"] for aa in dd["annotation"]]
)
dd["end_times"] = np.array(
[aa["end_time"] for aa in dd["annotation"]]
)
dd["high_freqs"] = np.array( dd["high_freqs"] = np.array(
[float(aa["high_freq"]) for aa in dd["annotation"]] [float(aa["high_freq"]) for aa in dd["annotation"]]
) )
@ -393,12 +402,18 @@ class AudioLoader(torch.utils.data.Dataset):
) )
if audio_raw.shape[0] - length_samples > 0: if audio_raw.shape[0] - length_samples > 0:
sample_crop = np.random.randint(audio_raw.shape[0] - length_samples) sample_crop = np.random.randint(
audio_raw.shape[0] - length_samples
)
else: else:
sample_crop = 0 sample_crop = 0
audio_raw = audio_raw[sample_crop : sample_crop + length_samples] audio_raw = audio_raw[sample_crop : sample_crop + length_samples]
ann["start_times"] = ann["start_times"] - sample_crop / float(sampling_rate) ann["start_times"] = ann["start_times"] - sample_crop / float(
ann["end_times"] = ann["end_times"] - sample_crop / float(sampling_rate) sampling_rate
)
ann["end_times"] = ann["end_times"] - sample_crop / float(
sampling_rate
)
# pad audio # pad audio
if self.is_train: if self.is_train:
@ -477,7 +492,9 @@ class AudioLoader(torch.utils.data.Dataset):
spec = scale_vol_aug(spec, self.params) spec = scale_vol_aug(spec, self.params)
if np.random.random() < self.params["aug_prob"]: if np.random.random() < self.params["aug_prob"]:
spec = warp_spec_aug(spec, ann, self.return_spec_for_viz, self.params) spec = warp_spec_aug(
spec, ann, self.return_spec_for_viz, self.params
)
if np.random.random() < self.params["aug_prob"]: if np.random.random() < self.params["aug_prob"]:
spec = mask_time_aug(spec, self.params) spec = mask_time_aug(spec, self.params)
@ -488,7 +505,9 @@ class AudioLoader(torch.utils.data.Dataset):
outputs = {} outputs = {}
outputs["spec"] = spec outputs["spec"] = spec
if self.return_spec_for_viz: if self.return_spec_for_viz:
outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze(0) outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze(
0
)
# create ground truth heatmaps # create ground truth heatmaps
( (

View File

@ -1,5 +1,10 @@
import numpy as np import numpy as np
from sklearn.metrics import accuracy_score, auc, balanced_accuracy_score, roc_curve from sklearn.metrics import (
accuracy_score,
auc,
balanced_accuracy_score,
roc_curve,
)
def compute_error_auc(op_str, gt, pred, prob): def compute_error_auc(op_str, gt, pred, prob):
@ -12,7 +17,10 @@ def compute_error_auc(op_str, gt, pred, prob):
fpr, tpr, thresholds = roc_curve(gt, pred) fpr, tpr, thresholds = roc_curve(gt, pred)
roc_auc = auc(fpr, tpr) roc_auc = auc(fpr, tpr)
print(op_str + ", class acc = {:.3f}, ROC AUC = {:.3f}".format(class_acc, roc_auc)) print(
op_str
+ ", class acc = {:.3f}, ROC AUC = {:.3f}".format(class_acc, roc_auc)
)
# return class_acc, roc_auc # return class_acc, roc_auc
@ -106,10 +114,14 @@ def compute_pre_rec(
confidence.append(pp["det_probs"][valid_inds]) confidence.append(pp["det_probs"][valid_inds])
elif eval_mode == "per_class": elif eval_mode == "per_class":
# per class # per class
confidence.append(pp["class_probs"].T[valid_inds, class_of_interest]) confidence.append(
pp["class_probs"].T[valid_inds, class_of_interest]
)
elif eval_mode == "top_class": elif eval_mode == "top_class":
# per class - note that sometimes 'class_probs' can be num_classes+1 in size # per class - note that sometimes 'class_probs' can be num_classes+1 in size
top_class = np.argmax(pp["class_probs"].T[valid_inds, :num_classes], 1) top_class = np.argmax(
pp["class_probs"].T[valid_inds, :num_classes], 1
)
confidence.append(pp["class_probs"].T[valid_inds, top_class]) confidence.append(pp["class_probs"].T[valid_inds, top_class])
pred_class.append(top_class) pred_class.append(top_class)
@ -158,7 +170,9 @@ def compute_pre_rec(
num_positives += len(gg["start_times"][valid_inds]) num_positives += len(gg["start_times"][valid_inds])
elif eval_mode == "per_class": elif eval_mode == "per_class":
# all valid ones with class of interest # all valid ones with class of interest
num_positives += (gg["class_ids"][valid_inds] == class_of_interest).sum() num_positives += (
gg["class_ids"][valid_inds] == class_of_interest
).sum()
elif eval_mode == "top_class": elif eval_mode == "top_class":
# all valid ones with non generic class # all valid ones with non generic class
num_positives += (gg["class_ids"][valid_inds] > -1).sum() num_positives += (gg["class_ids"][valid_inds] > -1).sum()
@ -240,7 +254,9 @@ def compute_pre_rec(
results["avg_prec"] = np.nan results["avg_prec"] = np.nan
results["rec_at_x"] = np.nan results["rec_at_x"] = np.nan
else: else:
results["avg_prec"] = np.round(calc_average_precision(recall, precision), 5) results["avg_prec"] = np.round(
calc_average_precision(recall, precision), 5
)
results["rec_at_x"] = np.round(calc_recall_at_x(recall, precision), 5) results["rec_at_x"] = np.round(calc_recall_at_x(recall, precision), 5)
return results return results
@ -283,12 +299,20 @@ def compute_file_accuracy(gts, preds, num_classes):
# compute min and max scoring range - then threshold # compute min and max scoring range - then threshold
min_val = 0 min_val = 0
mins = [pp["class_probs"].min() for pp in preds if pp["class_probs"].shape[1] > 0] mins = [
pp["class_probs"].min()
for pp in preds
if pp["class_probs"].shape[1] > 0
]
if len(mins) > 0: if len(mins) > 0:
min_val = np.min(mins) min_val = np.min(mins)
max_val = 1.0 max_val = 1.0
maxes = [pp["class_probs"].max() for pp in preds if pp["class_probs"].shape[1] > 0] maxes = [
pp["class_probs"].max()
for pp in preds
if pp["class_probs"].shape[1] > 0
]
if len(maxes) > 0: if len(maxes) > 0:
max_val = np.max(maxes) max_val = np.max(maxes)
@ -310,7 +334,9 @@ def compute_file_accuracy(gts, preds, num_classes):
# pick the result corresponding to the overall best threshold # pick the result corresponding to the overall best threshold
pred_valid_all = np.vstack(pred_valid_all) pred_valid_all = np.vstack(pred_valid_all)
acc_per_thresh = (np.array(gt_valid)[..., np.newaxis] == pred_valid_all).mean(0) acc_per_thresh = (
np.array(gt_valid)[..., np.newaxis] == pred_valid_all
).mean(0)
best_thresh = np.argmax(acc_per_thresh) best_thresh = np.argmax(acc_per_thresh)
best_acc = acc_per_thresh[best_thresh] best_acc = acc_per_thresh[best_thresh]
pred_valid = pred_valid_all[:, best_thresh].astype(np.int).tolist() pred_valid = pred_valid_all[:, best_thresh].astype(np.int).tolist()

View File

@ -62,7 +62,9 @@ def save_images_batch(model, data_loader, params):
data_loader.dataset.return_spec_for_viz = False data_loader.dataset.return_spec_for_viz = False
def save_image(spec_viz, outputs, ind, inputs, params, op_file_name, plot_title): def save_image(
spec_viz, outputs, ind, inputs, params, op_file_name, plot_title
):
pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float()) pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float())
pred_hm = outputs["pred_det"][ind, 0, :].data.cpu().numpy() pred_hm = outputs["pred_det"][ind, 0, :].data.cpu().numpy()
spec_viz = spec_viz[ind, 0, :] spec_viz = spec_viz[ind, 0, :]
@ -85,10 +87,14 @@ def save_image(spec_viz, outputs, ind, inputs, params, op_file_name, plot_title)
) )
def loss_fun(outputs, gt_det, gt_size, gt_class, det_criterion, params, class_inv_freq): def loss_fun(
outputs, gt_det, gt_size, gt_class, det_criterion, params, class_inv_freq
):
# detection loss # detection loss
loss = params["det_loss_weight"] * det_criterion(outputs["pred_det"], gt_det) loss = params["det_loss_weight"] * det_criterion(
outputs["pred_det"], gt_det
)
# bounding box size loss # bounding box size loss
loss += params["size_loss_weight"] * losses.bbox_size_loss( loss += params["size_loss_weight"] * losses.bbox_size_loss(
@ -105,7 +111,9 @@ def loss_fun(outputs, gt_det, gt_size, gt_class, det_criterion, params, class_in
return loss return loss
def train(model, epoch, data_loader, det_criterion, optimizer, scheduler, params): def train(
model, epoch, data_loader, det_criterion, optimizer, scheduler, params
):
model.train() model.train()
@ -218,7 +226,9 @@ def test(model, epoch, data_loader, det_criterion, params):
test_loss.update(loss.item(), data.shape[0]) test_loss.update(loss.item(), data.shape[0])
# do NMS # do NMS
pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float()) pred_nms, _ = pp.run_nms(
outputs, params, inputs["sampling_rate"].float()
)
predictions.extend(pred_nms) predictions.extend(pred_nms)
ground_truths.extend(parse_gt_data(inputs)) ground_truths.extend(parse_gt_data(inputs))
@ -328,7 +338,9 @@ if __name__ == "__main__":
# setup arg parser and populate it with exiting parameters - will not work with lists # setup arg parser and populate it with exiting parameters - will not work with lists
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("data_dir", type=str, help="Path to root of datasets") parser.add_argument("data_dir", type=str, help="Path to root of datasets")
parser.add_argument("ann_dir", type=str, help="Path to extracted annotations") parser.add_argument(
"ann_dir", type=str, help="Path to extracted annotations"
)
parser.add_argument( parser.add_argument(
"--train_split", "--train_split",
type=str, type=str,
@ -387,12 +399,14 @@ if __name__ == "__main__":
params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping( params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping(
params["class_names"] params["class_names"]
) )
params["class_names_short"] = tu.get_short_class_names(params["class_names"]) params["class_names_short"] = tu.get_short_class_names(
params["class_names"]
)
# standardize the low and high frequency value for specified classes # standardize the low and high frequency value for specified classes
params["standardize_classs_names"] = params["standardize_classs_names_ip"].split( params["standardize_classs_names"] = params[
";" "standardize_classs_names_ip"
) ].split(";")
for cc in params["standardize_classs_names"]: for cc in params["standardize_classs_names"]:
if cc in params["class_names"]: if cc in params["class_names"]:
data_train = tu.standardize_low_freq(data_train, cc) data_train = tu.standardize_low_freq(data_train, cc)
@ -442,7 +456,9 @@ if __name__ == "__main__":
optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"]) optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
# optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=0.9) # optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=0.9)
scheduler = CosineAnnealingLR(optimizer, params["num_epochs"] * len(train_loader)) scheduler = CosineAnnealingLR(
optimizer, params["num_epochs"] * len(train_loader)
)
if params["train_loss"] == "mse": if params["train_loss"] == "mse":
det_criterion = losses.mse_loss det_criterion = losses.mse_loss
elif params["train_loss"] == "focal": elif params["train_loss"] == "focal":
@ -505,7 +521,9 @@ if __name__ == "__main__":
if epoch % params["num_eval_epochs"] == 0: if epoch % params["num_eval_epochs"] == 0:
# detection accuracy on test set # detection accuracy on test set
test_res, test_loss = test(model, epoch, test_loader, det_criterion, params) test_res, test_loss = test(
model, epoch, test_loader, det_criterion, params
)
test_plt_ls.update_and_save(epoch, [test_loss["test_loss"]]) test_plt_ls.update_and_save(epoch, [test_loss["test_loss"]])
test_plt.update_and_save( test_plt.update_and_save(
epoch, epoch,
@ -520,7 +538,9 @@ if __name__ == "__main__":
test_plt_class.update_and_save( test_plt_class.update_and_save(
epoch, [rs["avg_prec"] for rs in test_res["class_pr"]] epoch, [rs["avg_prec"] for rs in test_res["class_pr"]]
) )
pu.plot_pr_curve_class(params["experiment"], "test_pr", "test_pr", test_res) pu.plot_pr_curve_class(
params["experiment"], "test_pr", "test_pr", test_res
)
# save trained model # save trained model
print("saving model to: " + params["model_file_name"]) print("saving model to: " + params["model_file_name"])

View File

@ -24,7 +24,8 @@ def split_diff(ann_dir, wav_dir, load_extra=True):
"dataset_name": "BatDetective", "dataset_name": "BatDetective",
"is_test": False, "is_test": False,
"is_binary": True, # just a bat / not bat dataset ie no classes "is_binary": True, # just a bat / not bat dataset ie no classes
"ann_path": ann_dir + "train_set_bulgaria_batdetective_with_bbs.json", "ann_path": ann_dir
+ "train_set_bulgaria_batdetective_with_bbs.json",
"wav_path": wav_dir + "bat_detective/audio/", "wav_path": wav_dir + "bat_detective/audio/",
} }
) )
@ -151,7 +152,8 @@ def split_same(ann_dir, wav_dir, load_extra=True):
"dataset_name": "BatDetective", "dataset_name": "BatDetective",
"is_test": False, "is_test": False,
"is_binary": True, "is_binary": True,
"ann_path": ann_dir + "train_set_bulgaria_batdetective_with_bbs.json", "ann_path": ann_dir
+ "train_set_bulgaria_batdetective_with_bbs.json",
"wav_path": wav_dir + "bat_detective/audio/", "wav_path": wav_dir + "bat_detective/audio/",
} }
) )

View File

@ -25,7 +25,9 @@ def get_blank_dataset_dict(dataset_name, is_test, ann_path, wav_path):
def get_short_class_names(class_names, str_len=3): def get_short_class_names(class_names, str_len=3):
class_names_short = [] class_names_short = []
for cc in class_names: for cc in class_names:
class_names_short.append(" ".join([sp[:str_len] for sp in cc.split(" ")])) class_names_short.append(
" ".join([sp[:str_len] for sp in cc.split(" ")])
)
return class_names_short return class_names_short
@ -155,7 +157,9 @@ def load_set_of_anns(
str_len = np.max([len(cc) for cc in class_names]) + 5 str_len = np.max([len(cc) for cc in class_names]) + 5
for cc in range(len(class_names)): for cc in range(len(class_names)):
print( print(
str(cc).ljust(5) + class_names[cc].ljust(str_len) + str(class_cnts[cc]) str(cc).ljust(5)
+ class_names[cc].ljust(str_len)
+ str(class_cnts[cc])
) )
if len(classes_to_ignore) == 0: if len(classes_to_ignore) == 0:

View File

@ -39,7 +39,9 @@ def generate_spectrogram(
min_freq = round(params["min_freq"] * params["fft_win_length"]) min_freq = round(params["min_freq"] * params["fft_win_length"])
if spec.shape[0] < max_freq: if spec.shape[0] < max_freq:
freq_pad = max_freq - spec.shape[0] freq_pad = max_freq - spec.shape[0]
spec = np.vstack((np.zeros((freq_pad, spec.shape[1]), dtype=spec.dtype), spec)) spec = np.vstack(
(np.zeros((freq_pad, spec.shape[1]), dtype=spec.dtype), spec)
)
spec_cropped = spec[-max_freq : spec.shape[0] - min_freq, :] spec_cropped = spec[-max_freq : spec.shape[0] - min_freq, :]
if params["spec_scale"] == "log": if params["spec_scale"] == "log":
@ -49,7 +51,11 @@ def generate_spectrogram(
* ( * (
1.0 1.0
/ ( / (
np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) np.abs(
np.hanning(
int(params["fft_win_length"] * sampling_rate)
)
)
** 2 ** 2
).sum() ).sum()
) )
@ -82,7 +88,11 @@ def generate_spectrogram(
* ( * (
1.0 1.0
/ ( / (
np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) np.abs(
np.hanning(
int(params["fft_win_length"] * sampling_rate)
)
)
** 2 ** 2
).sum() ).sum()
) )
@ -122,7 +132,9 @@ def load_audio_file(
# clipping maximum duration # clipping maximum duration
if max_duration is not False: if max_duration is not False:
max_duration = np.minimum(int(sampling_rate * max_duration), audio_raw.shape[0]) max_duration = np.minimum(
int(sampling_rate * max_duration), audio_raw.shape[0]
)
audio_raw = audio_raw[:max_duration] audio_raw = audio_raw[:max_duration]
# convert to float32 and scale # convert to float32 and scale
@ -159,7 +171,9 @@ def pad_audio(
# too small # too small
# used during training to ensure all the batches are the same size # used during training to ensure all the batches are the same size
diff = fixed_width * step + noverlap - audio_raw.shape[0] diff = fixed_width * step + noverlap - audio_raw.shape[0]
audio_raw = np.hstack((audio_raw, np.zeros(diff, dtype=audio_raw.dtype))) audio_raw = np.hstack(
(audio_raw, np.zeros(diff, dtype=audio_raw.dtype))
)
elif fixed_width is not None and spec_width > fixed_width: elif fixed_width is not None and spec_width > fixed_width:
# too big # too big
@ -167,13 +181,18 @@ def pad_audio(
diff = fixed_width * step + noverlap - audio_raw.shape[0] diff = fixed_width * step + noverlap - audio_raw.shape[0]
audio_raw = audio_raw[:diff] audio_raw = audio_raw[:diff]
elif spec_width_rs < min_size or (np.floor(spec_width_rs) % divide_factor) != 0: elif (
spec_width_rs < min_size
or (np.floor(spec_width_rs) % divide_factor) != 0
):
# need to be at least min_size # need to be at least min_size
div_amt = np.ceil(spec_width_rs / float(divide_factor)) div_amt = np.ceil(spec_width_rs / float(divide_factor))
div_amt = np.maximum(1, div_amt) div_amt = np.maximum(1, div_amt)
target_size = int(div_amt * divide_factor * (1.0 / resize_factor)) target_size = int(div_amt * divide_factor * (1.0 / resize_factor))
diff = target_size * step + noverlap - audio_raw.shape[0] diff = target_size * step + noverlap - audio_raw.shape[0]
audio_raw = np.hstack((audio_raw, np.zeros(diff, dtype=audio_raw.dtype))) audio_raw = np.hstack(
(audio_raw, np.zeros(diff, dtype=audio_raw.dtype))
)
return audio_raw return audio_raw

View File

@ -67,6 +67,7 @@ def get_audio_files(ip_dir: str) -> List[str]:
class ModelParameters(TypedDict): class ModelParameters(TypedDict):
"""Model parameters.""" """Model parameters."""
model_name: str model_name: str
num_filters: int num_filters: int
emb_dim: int emb_dim: int
@ -77,8 +78,7 @@ class ModelParameters(TypedDict):
def load_model( def load_model(
model_path: str=DEFAULT_MODEL_PATH, model_path: str = DEFAULT_MODEL_PATH, load_weights: bool = True
load_weights: bool=True
) -> Tuple[torch.nn.Module, ModelParameters]: ) -> Tuple[torch.nn.Module, ModelParameters]:
"""Load model from file. """Load model from file.
@ -211,7 +211,9 @@ def convert_results(
results["spec_feat_names"] = feats.get_feature_names() results["spec_feat_names"] = feats.get_feature_names()
if len(cnn_feats) > 0: if len(cnn_feats) > 0:
results["cnn_feats"] = cnn_feats results["cnn_feats"] = cnn_feats
results["cnn_feat_names"] = [str(ii) for ii in range(cnn_feats.shape[1])] results["cnn_feat_names"] = [
str(ii) for ii in range(cnn_feats.shape[1])
]
if len(spec_slices) > 0: if len(spec_slices) > 0:
results["spec_slices"] = spec_slices results["spec_slices"] = spec_slices
@ -245,7 +247,9 @@ def save_results_to_file(results, op_path):
# save features # save features
if "spec_feats" in results.keys(): if "spec_feats" in results.keys():
df = pd.DataFrame(results["spec_feats"], columns=results["spec_feat_names"]) df = pd.DataFrame(
results["spec_feats"], columns=results["spec_feat_names"]
)
df.to_csv( df.to_csv(
op_path + "_spec_features.csv", op_path + "_spec_features.csv",
sep=",", sep=",",
@ -254,7 +258,9 @@ def save_results_to_file(results, op_path):
) )
if "cnn_feats" in results.keys(): if "cnn_feats" in results.keys():
df = pd.DataFrame(results["cnn_feats"], columns=results["cnn_feat_names"]) df = pd.DataFrame(
results["cnn_feats"], columns=results["cnn_feat_names"]
)
df.to_csv( df.to_csv(
op_path + "_cnn_features.csv", op_path + "_cnn_features.csv",
sep=",", sep=",",
@ -289,7 +295,9 @@ def compute_spectrogram(audio, sampling_rate, params, return_np=False):
# resize the spec # resize the spec
rs = params["resize_factor"] rs = params["resize_factor"]
spec_op_shape = (int(params["spec_height"] * rs), int(spec.shape[-1] * rs)) spec_op_shape = (int(params["spec_height"] * rs), int(spec.shape[-1] * rs))
spec = F.interpolate(spec, size=spec_op_shape, mode="bilinear", align_corners=False) spec = F.interpolate(
spec, size=spec_op_shape, mode="bilinear", align_corners=False
)
if return_np: if return_np:
spec_np = spec[0, 0, :].cpu().data.numpy() spec_np = spec[0, 0, :].cpu().data.numpy()
@ -350,7 +358,9 @@ def process_file(
chunk_time = args["chunk_size"] * chunk_id chunk_time = args["chunk_size"] * chunk_id
chunk_length = int(sampling_rate * args["chunk_size"]) chunk_length = int(sampling_rate * args["chunk_size"])
start_sample = chunk_id * chunk_length start_sample = chunk_id * chunk_length
end_sample = np.minimum((chunk_id + 1) * chunk_length, audio_full.shape[0]) end_sample = np.minimum(
(chunk_id + 1) * chunk_length, audio_full.shape[0]
)
audio = audio_full[start_sample:end_sample] audio = audio_full[start_sample:end_sample]
# load audio file and compute spectrogram # load audio file and compute spectrogram
@ -385,7 +395,9 @@ def process_file(
cnn_feats.append(features[0]) cnn_feats.append(features[0])
if args["spec_slices"]: if args["spec_slices"]:
spec_slices.extend(feats.extract_spec_slices(spec_np, pred_nms, params)) spec_slices.extend(
feats.extract_spec_slices(spec_np, pred_nms, params)
)
# convert the predictions into output dictionary # convert the predictions into output dictionary
file_id = os.path.basename(audio_file) file_id = os.path.basename(audio_file)
@ -406,7 +418,10 @@ def process_file(
# summarize results # summarize results
if not args["quiet"]: if not args["quiet"]:
num_detections = len(results["pred_dict"]["annotation"]) num_detections = len(results["pred_dict"]["annotation"])
print("{}".format(num_detections) + " call(s) detected above the threshold.") print(
"{}".format(num_detections)
+ " call(s) detected above the threshold."
)
# print results for top n classes # print results for top n classes
if not args["quiet"] and (num_detections > 0): if not args["quiet"] and (num_detections > 0):
@ -416,7 +431,8 @@ def process_file(
print("species name".ljust(30) + "probablity present") print("species name".ljust(30) + "probablity present")
for cc in np.argsort(class_overall)[::-1][:top_n]: for cc in np.argsort(class_overall)[::-1][:top_n]:
print( print(
params["class_names"][cc].ljust(30) + str(round(class_overall[cc], 3)) params["class_names"][cc].ljust(30)
+ str(round(class_overall[cc], 3))
) )
if return_raw_preds: if return_raw_preds:

View File

@ -57,7 +57,9 @@ def create_box_image(
if plot_class_names: if plot_class_names:
for ii, bb in enumerate(boxes): for ii, bb in enumerate(boxes):
txt = " ".join([sp[:3] for sp in detections_ip[ii]["class"].split(" ")]) txt = " ".join(
[sp[:3] for sp in detections_ip[ii]["class"].split(" ")]
)
font_info = { font_info = {
"color": "white", "color": "white",
"size": 10, "size": 10,
@ -87,7 +89,9 @@ def save_ann_spec(
y_extent = [0, duration, min_freq, max_freq] y_extent = [0, duration, min_freq, max_freq]
plt.close("all") plt.close("all")
fig = plt.figure(0, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100) fig = plt.figure(
0, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100
)
plt.imshow( plt.imshow(
spec, spec,
aspect="auto", aspect="auto",
@ -124,12 +128,16 @@ def save_ann_spec(
plt.savefig(op_path) plt.savefig(op_path)
def plot_pts(fig_id, feats, class_names, colors, marker_size=4.0, plot_legend=False): def plot_pts(
fig_id, feats, class_names, colors, marker_size=4.0, plot_legend=False
):
plt.figure(fig_id) plt.figure(fig_id)
un_class, labels = np.unique(class_names, return_inverse=True) un_class, labels = np.unique(class_names, return_inverse=True)
un_labels = np.unique(labels) un_labels = np.unique(labels)
if un_labels.shape[0] > len(colors): if un_labels.shape[0] > len(colors):
colors = [plt.cm.jet(float(ii) / un_labels.shape[0]) for ii in un_labels] colors = [
plt.cm.jet(float(ii) / un_labels.shape[0]) for ii in un_labels
]
for ii, u in enumerate(un_labels): for ii, u in enumerate(un_labels):
inds = np.where(labels == u)[0] inds = np.where(labels == u)[0]
@ -236,7 +244,9 @@ def plot_spec(
ax0.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent) ax0.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent)
ax0.xaxis.set_ticklabels([]) ax0.xaxis.set_ticklabels([])
font_info = {"color": "white", "size": 12, "weight": "bold"} font_info = {"color": "white", "size": 12, "weight": "bold"}
ax0.text(0, params["min_freq"] // freq_scale, "Ground Truth", fontdict=font_info) ax0.text(
0, params["min_freq"] // freq_scale, "Ground Truth", fontdict=font_info
)
plt.grid(False) plt.grid(False)
if plot_boxes: if plot_boxes:
@ -261,7 +271,9 @@ def plot_spec(
ax1.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent) ax1.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent)
ax1.xaxis.set_ticklabels([]) ax1.xaxis.set_ticklabels([])
font_info = {"color": "white", "size": 12, "weight": "bold"} font_info = {"color": "white", "size": 12, "weight": "bold"}
ax1.text(0, params["min_freq"] // freq_scale, "Prediction", fontdict=font_info) ax1.text(
0, params["min_freq"] // freq_scale, "Prediction", fontdict=font_info
)
plt.grid(False) plt.grid(False)
if plot_boxes: if plot_boxes:
@ -296,7 +308,9 @@ def plot_spec(
) )
# ax2.xaxis.set_ticklabels([]) # ax2.xaxis.set_ticklabels([])
font_info = {"color": "white", "size": 12, "weight": "bold"} font_info = {"color": "white", "size": 12, "weight": "bold"}
ax2.text(0, params["min_freq"] // freq_scale, "Heatmap", fontdict=font_info) ax2.text(
0, params["min_freq"] // freq_scale, "Heatmap", fontdict=font_info
)
plt.grid(False) plt.grid(False)
@ -394,11 +408,15 @@ def plot_confusion_matrix(
# shorten the class names for plotting # shorten the class names for plotting
class_names = [] class_names = []
for cc in class_names_long: for cc in class_names_long:
class_name_sm = "".join([cc_sm[:3] + " " for cc_sm in cc.split(" ")])[:-1] class_name_sm = "".join([cc_sm[:3] + " " for cc_sm in cc.split(" ")])[
:-1
]
class_names.append(class_name_sm) class_names.append(class_name_sm)
num_classes = len(class_names) num_classes = len(class_names)
cm = confusion_matrix(gt, pred, labels=np.arange(num_classes)).astype(np.float32) cm = confusion_matrix(gt, pred, labels=np.arange(num_classes)).astype(
np.float32
)
cm_norm = cm.sum(1) cm_norm = cm.sum(1)
valid_inds = np.where(cm_norm > 0)[0] valid_inds = np.where(cm_norm > 0)[0]
@ -487,7 +505,9 @@ class LossPlotter(object):
if self.logy: if self.logy:
plt.gca().set_yscale("log") plt.gca().set_yscale("log")
plt.grid(True) plt.grid(True)
plt.legend(bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0.0) plt.legend(
bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0.0
)
plt.tight_layout() plt.tight_layout()
plt.savefig(self.op_file_name) plt.savefig(self.op_file_name)
plt.close(0) plt.close(0)
@ -502,15 +522,19 @@ class LossPlotter(object):
def save_confusion_matrix(self, gt, pred): def save_confusion_matrix(self, gt, pred):
plt.figure(0) plt.figure(0)
cm = confusion_matrix(gt, pred, np.arange(len(self.class_names))).astype( cm = confusion_matrix(
np.float32 gt, pred, np.arange(len(self.class_names))
) ).astype(np.float32)
cm_norm = cm.sum(1) cm_norm = cm.sum(1)
valid_inds = np.where(cm_norm > 0)[0] valid_inds = np.where(cm_norm > 0)[0]
cm[valid_inds, :] = cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis] cm[valid_inds, :] = (
cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis]
)
plt.imshow(cm, vmin=0, vmax=1, cmap="plasma") plt.imshow(cm, vmin=0, vmax=1, cmap="plasma")
plt.colorbar() plt.colorbar()
plt.xticks(np.arange(cm.shape[1]), self.class_names, rotation="vertical") plt.xticks(
np.arange(cm.shape[1]), self.class_names, rotation="vertical"
)
plt.yticks(np.arange(cm.shape[0]), self.class_names) plt.yticks(np.arange(cm.shape[0]), self.class_names)
plt.xlabel("Predicted") plt.xlabel("Predicted")
plt.ylabel("Ground Truth") plt.ylabel("Ground Truth")

View File

@ -56,19 +56,25 @@ class InteractivePlotter:
self.annotated = np.zeros( self.annotated = np.zeros(
self.labels.shape[0], dtype=np.int self.labels.shape[0], dtype=np.int
) # can populate this with 1's where we have labels ) # can populate this with 1's where we have labels
self.labels_cols = [colors[self.labels[ii]] for ii in range(len(self.labels))] self.labels_cols = [
colors[self.labels[ii]] for ii in range(len(self.labels))
]
self.freq_lims = freq_lims self.freq_lims = freq_lims
self.allow_training = allow_training self.allow_training = allow_training
self.pt_size = 5.0 self.pt_size = 5.0
self.spec_pad = 0.2 # this much padding has been applied to the spec slices self.spec_pad = (
0.2 # this much padding has been applied to the spec slices
)
self.fig_width = 12 self.fig_width = 12
self.fig_height = 8 self.fig_height = 8
self.current_id = 0 self.current_id = 0
max_ind = np.argmax([ss.shape[1] for ss in self.spec_slices]) max_ind = np.argmax([ss.shape[1] for ss in self.spec_slices])
self.max_width = self.spec_slices[max_ind].shape[1] self.max_width = self.spec_slices[max_ind].shape[1]
self.blank_spec = np.zeros((self.spec_slices[0].shape[0], self.max_width)) self.blank_spec = np.zeros(
(self.spec_slices[0].shape[0], self.max_width)
)
def plot(self, fig_id): def plot(self, fig_id):
self.fig, self.ax = plt.subplots( self.fig, self.ax = plt.subplots(
@ -141,7 +147,8 @@ class InteractivePlotter:
) // 2 ) // 2
new_spec[ new_spec[
:, :,
w_diff : self.spec_slices[self.current_id].shape[1] + w_diff, w_diff : self.spec_slices[self.current_id].shape[1]
+ w_diff,
] = self.spec_slices[self.current_id] ] = self.spec_slices[self.current_id]
self.spec_im.set_data(new_spec) self.spec_im.set_data(new_spec)
self.spec_im.set_clim(vmin=0, vmax=new_spec.max()) self.spec_im.set_clim(vmin=0, vmax=new_spec.max())
@ -172,7 +179,9 @@ class InteractivePlotter:
info_str = ( info_str = (
self.call_info[self.current_id]["file_name"] self.call_info[self.current_id]["file_name"]
+ ", time=" + ", time="
+ str(round(self.call_info[self.current_id]["start_time"], 3)) + str(
round(self.call_info[self.current_id]["start_time"], 3)
)
+ ", prob=" + ", prob="
+ str(round(self.call_info[self.current_id]["det_prob"], 3)) + str(round(self.call_info[self.current_id]["det_prob"], 3))
) )

View File

@ -235,7 +235,9 @@ def write(filename, rate, data):
# kind of numeric data in the numpy array # kind of numeric data in the numpy array
dkind = data.dtype.kind dkind = data.dtype.kind
if not ( if not (
dkind == "i" or dkind == "f" or (dkind == "u" and data.dtype.itemsize == 1) dkind == "i"
or dkind == "f"
or (dkind == "u" and data.dtype.itemsize == 1)
): ):
raise ValueError("Unsupported data type '%s'" % data.dtype) raise ValueError("Unsupported data type '%s'" % data.dtype)
@ -268,7 +270,9 @@ def write(filename, rate, data):
# Write the data (16, comp, noc, etc) in the correct binary format # Write the data (16, comp, noc, etc) in the correct binary format
# for the wav header. the string format (first arg) specifies how many bytes for each # for the wav header. the string format (first arg) specifies how many bytes for each
# value. # value.
fid.write(struct.pack("<ihHIIHH", 16, comp, noc, rate, sbytes, ba, bits)) fid.write(
struct.pack("<ihHIIHH", 16, comp, noc, rate, sbytes, ba, bits)
)
# data chunk: the word 'data' followed by the size followed by the actual data # data chunk: the word 'data' followed by the size followed by the actual data
fid.write(b"data") fid.write(b"data")
fid.write(struct.pack("<i", data.nbytes)) fid.write(struct.pack("<i", data.nbytes))

View File

@ -102,7 +102,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# run the model\n", "# run the model\n",
"results = du.process_file(audio_file, model, params, args, max_duration=max_duration)" "results = du.process_file(\n",
" audio_file, model, params, args, max_duration=max_duration\n",
")"
] ]
}, },
{ {
@ -197,7 +199,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# generate spectrogram for visualization\n", "# generate spectrogram for visualization\n",
"spec, spec_viz = au.generate_spectrogram(audio, sampling_rate, params, True, False)" "spec, spec_viz = au.generate_spectrogram(\n",
" audio, sampling_rate, params, True, False\n",
")"
] ]
}, },
{ {
@ -222,10 +226,16 @@
"start_time = 0.0\n", "start_time = 0.0\n",
"detections = [ann for ann in results[\"pred_dict\"][\"annotation\"]]\n", "detections = [ann for ann in results[\"pred_dict\"][\"annotation\"]]\n",
"fig = plt.figure(\n", "fig = plt.figure(\n",
" 1, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100, frameon=False\n", " 1,\n",
" figsize=(spec.shape[1] / 100, spec.shape[0] / 100),\n",
" dpi=100,\n",
" frameon=False,\n",
")\n", ")\n",
"spec_duration = au.x_coords_to_time(\n", "spec_duration = au.x_coords_to_time(\n",
" spec.shape[1], sampling_rate, params[\"fft_win_length\"], params[\"fft_overlap\"]\n", " spec.shape[1],\n",
" sampling_rate,\n",
" params[\"fft_win_length\"],\n",
" params[\"fft_overlap\"],\n",
")\n", ")\n",
"viz.create_box_image(\n", "viz.create_box_image(\n",
" spec,\n", " spec,\n",

View File

@ -23,7 +23,9 @@ def main(args):
if args["save_preds_if_empty"] or ( if args["save_preds_if_empty"] or (
len(results["pred_dict"]["annotation"]) > 0 len(results["pred_dict"]["annotation"]) > 0
): ):
results_path = audio_file.replace(args["audio_dir"], args["ann_dir"]) results_path = audio_file.replace(
args["audio_dir"], args["ann_dir"]
)
du.save_results_to_file(results, results_path) du.save_results_to_file(results, results_path)
except: except:
error_files.append(audio_file) error_files.append(audio_file)

View File

@ -20,7 +20,9 @@ import bat_detect.utils.audio_utils as au
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("audio_path", type=str, help="Input directory for audio") parser.add_argument(
"audio_path", type=str, help="Input directory for audio"
)
parser.add_argument( parser.add_argument(
"op_dir", "op_dir",
type=str, type=str,
@ -31,7 +33,9 @@ if __name__ == "__main__":
type=str, type=str,
help="Path to where single annotation json file is stored", help="Path to where single annotation json file is stored",
) )
parser.add_argument("--uk_split", type=str, default="", help="Set as: diff or same") parser.add_argument(
"--uk_split", type=str, default="", help="Set as: diff or same"
)
parser.add_argument( parser.add_argument(
"--file_type", "--file_type",
type=str, type=str,
@ -84,7 +88,9 @@ if __name__ == "__main__":
norm_type=params["norm_type"], norm_type=params["norm_type"],
) )
op_file_name = os.path.join(args["op_dir"], dataset_name + "." + args["file_type"]) op_file_name = os.path.join(
args["op_dir"], dataset_name + "." + args["file_type"]
)
vz.save_summary_image( vz.save_summary_image(
x_train, y_train, class_names, params, op_file_name, class_names_order x_train, y_train, class_names, params, op_file_name, class_names_order
) )

View File

@ -25,7 +25,9 @@ import bat_detect.utils.plot_utils as viz
def filter_anns(anns, start_time, stop_time): def filter_anns(anns, start_time, stop_time):
anns_op = [] anns_op = []
for aa in anns: for aa in anns:
if (aa["start_time"] >= start_time) and (aa["start_time"] < stop_time - 0.02): if (aa["start_time"] >= start_time) and (
aa["start_time"] < stop_time - 0.02
):
anns_op.append(aa) anns_op.append(aa)
return anns_op return anns_op
@ -130,7 +132,9 @@ if __name__ == "__main__":
print("File duration: {} seconds".format(duration)) print("File duration: {} seconds".format(duration))
# create spec for viz # create spec for viz
spec, _ = au.generate_spectrogram(audio, sampling_rate, params_bd, True, False) spec, _ = au.generate_spectrogram(
audio, sampling_rate, params_bd, True, False
)
# run model and filter detections so only keep ones in relevant time range # run model and filter detections so only keep ones in relevant time range
results = du.process_file(args_cmd["audio_file"], model, params_bd, bd_args) results = du.process_file(args_cmd["audio_file"], model, params_bd, bd_args)
@ -153,7 +157,9 @@ if __name__ == "__main__":
) )
op_path_clean = os.path.join(args_cmd["op_dir"], op_path_clean) op_path_clean = os.path.join(args_cmd["op_dir"], op_path_clean)
op_path_pred = ( op_path_pred = (
os.path.basename(args_cmd["audio_file"])[:-4] + "_pred." + args_cmd["file_type"] os.path.basename(args_cmd["audio_file"])[:-4]
+ "_pred."
+ args_cmd["file_type"]
) )
op_path_pred = os.path.join(args_cmd["op_dir"], op_path_pred) op_path_pred = os.path.join(args_cmd["op_dir"], op_path_pred)

View File

@ -27,7 +27,9 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("audio_file", type=str, help="Path to input audio file") parser.add_argument("audio_file", type=str, help="Path to input audio file")
parser.add_argument("model_path", type=str, help="Path to trained BatDetect model") parser.add_argument(
"model_path", type=str, help="Path to trained BatDetect model"
)
parser.add_argument( parser.add_argument(
"--op_dir", "--op_dir",
type=str, type=str,
@ -42,7 +44,9 @@ if __name__ == "__main__":
action="store_true", action="store_true",
help="Do not plot class names", help="Do not plot class names",
) )
parser.add_argument("--disable_axis", action="store_true", help="Do not plot axis") parser.add_argument(
"--disable_axis", action="store_true", help="Do not plot axis"
)
parser.add_argument( parser.add_argument(
"--detection_threshold", "--detection_threshold",
type=float, type=float,
@ -129,7 +133,9 @@ if __name__ == "__main__":
detections.append(bb) detections.append(bb)
# plot boxes # plot boxes
fig = plt.figure(1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi) fig = plt.figure(
1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi
)
duration = au.x_coords_to_time( duration = au.x_coords_to_time(
spec.shape[1], spec.shape[1],
sampling_rate, sampling_rate,
@ -188,7 +194,9 @@ if __name__ == "__main__":
if ii > 0: if ii > 0:
spec_op[:, int(col), :] = 1.0 spec_op[:, int(col), :] = 1.0
if reveal_boxes: if reveal_boxes:
spec_op[:, int(col) + 1 :, :] = spec_blank[:, int(col) + 1 :, :] spec_op[:, int(col) + 1 :, :] = spec_blank[
:, int(col) + 1 :, :
]
elif ii == 0 and reveal_boxes: elif ii == 0 and reveal_boxes:
spec_op = spec_blank spec_op = spec_blank

View File

@ -23,7 +23,9 @@ def generate_spectrogram_data(
# spec = au.gen_mag_spectrogram_pt(audio, sampling_rate, params['fft_win_length'], params['fft_overlap']).numpy() # spec = au.gen_mag_spectrogram_pt(audio, sampling_rate, params['fft_win_length'], params['fft_overlap']).numpy()
if spec.shape[0] < max_freq: if spec.shape[0] < max_freq:
freq_pad = max_freq - spec.shape[0] freq_pad = max_freq - spec.shape[0]
spec = np.vstack((np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec)) spec = np.vstack(
(np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec)
)
spec = spec[-max_freq : spec.shape[0] - min_freq, :] spec = spec[-max_freq : spec.shape[0] - min_freq, :]
if norm_type == "log": if norm_type == "log":
@ -33,7 +35,11 @@ def generate_spectrogram_data(
* ( * (
1.0 1.0
/ ( / (
np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) np.abs(
np.hanning(
int(params["fft_win_length"] * sampling_rate)
)
)
** 2 ** 2
).sum() ).sum()
) )
@ -106,7 +112,9 @@ def load_data(
max_samps = params["spec_width"] * (nfft - noverlap) + noverlap max_samps = params["spec_width"] * (nfft - noverlap) + noverlap
if max_samps > audio.shape[0]: if max_samps > audio.shape[0]:
audio = np.hstack((audio, np.zeros(max_samps - audio.shape[0]))) audio = np.hstack(
(audio, np.zeros(max_samps - audio.shape[0]))
)
audio = audio[:max_samps].astype(np.float32) audio = audio[:max_samps].astype(np.float32)
audio = au.pad_audio( audio = au.pad_audio(
@ -139,7 +147,9 @@ def load_data(
params["fft_overlap"], params["fft_overlap"],
) )
) )
y1 = (ann["low_freq"] - params["min_freq"]) * params["fft_win_length"] y1 = (ann["low_freq"] - params["min_freq"]) * params[
"fft_win_length"
]
coords.append((y1, x1)) coords.append((y1, x1))
_, file_ids = np.unique(file_names, return_inverse=True) _, file_ids = np.unique(file_names, return_inverse=True)