diff --git a/app.py b/app.py index 8e1f69c..1c884f0 100644 --- a/app.py +++ b/app.py @@ -82,7 +82,9 @@ def generate_results_image(audio_file, anns): duration = audio.shape[0] / sampling_rate # generate spec - spec, spec_viz = au.generate_spectrogram(audio, sampling_rate, params, True, False) + spec, spec_viz = au.generate_spectrogram( + audio, sampling_rate, params, True, False + ) # create fig plt.close("all") diff --git a/bat_detect/command.py b/bat_detect/command.py index 09b4a85..9996832 100644 --- a/bat_detect/command.py +++ b/bat_detect/command.py @@ -99,7 +99,9 @@ def main(): if args["save_preds_if_empty"] or ( len(results["pred_dict"]["annotation"]) > 0 ): - results_path = audio_file.replace(args["audio_dir"], args["ann_dir"]) + results_path = audio_file.replace( + args["audio_dir"], args["ann_dir"] + ) du.save_results_to_file(results, results_path) except: error_files.append(audio_file) diff --git a/bat_detect/detector/compute_features.py b/bat_detect/detector/compute_features.py index b24dd77..368c2db 100644 --- a/bat_detect/detector/compute_features.py +++ b/bat_detect/detector/compute_features.py @@ -3,7 +3,9 @@ import numpy as np def convert_int_to_freq(spec_ind, spec_height, min_freq, max_freq): spec_ind = spec_height - spec_ind - return round((spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2) + return round( + (spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2 + ) def extract_spec_slices(spec, pred_nms, params): @@ -25,7 +27,9 @@ def extract_spec_slices(spec, pred_nms, params): for ff in range(len(pred_nms["det_probs"])): x_start = int(np.maximum(0, x_pos_pad[ff])) x_end = int( - np.minimum(spec.shape[1] - 1, np.round(x_pos_pad[ff] + bb_width_pad[ff])) + np.minimum( + spec.shape[1] - 1, np.round(x_pos_pad[ff] + bb_width_pad[ff]) + ) ) slices.append(spec[:, x_start:x_end].astype(np.float16)) return slices @@ -62,11 +66,15 @@ def get_feats(spec, pred_nms, params): feature_names = get_feature_names() num_detections = len(pred_nms["det_probs"]) - features = np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1 + features = ( + np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1 + ) for ff in range(num_detections): x_start = int(np.maximum(0, x_pos[ff])) - x_end = int(np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff]))) + x_end = int( + np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff])) + ) # y low is the lowest freq but it will have a higher value due to array starting at 0 at top y_low = int(np.minimum(spec.shape[0] - 1, y_pos[ff])) y_high = int(np.maximum(0, np.round(y_pos[ff] - bb_height[ff]))) @@ -118,7 +126,8 @@ def get_feats(spec, pred_nms, params): if ff > 0: features[ff, 8] = round( - pred_nms["start_times"][ff] - pred_nms["start_times"][ff - 1], + pred_nms["start_times"][ff] + - pred_nms["start_times"][ff - 1], 5, ) diff --git a/bat_detect/detector/model_helpers.py b/bat_detect/detector/model_helpers.py index b05f361..789bdb6 100644 --- a/bat_detect/detector/model_helpers.py +++ b/bat_detect/detector/model_helpers.py @@ -1,7 +1,6 @@ import torch -from torch import nn import torch.nn.functional as F - +from torch import nn __all__ = [ "SelfAttention", @@ -26,18 +25,22 @@ class SelfAttention(nn.Module): def forward(self, x): x = x.squeeze(2).permute(0, 2, 1) - kk = torch.matmul(x, self.key_fun.weight.T) + self.key_fun.bias.unsqueeze( - 0 - ).unsqueeze(0) - qq = torch.matmul(x, self.que_fun.weight.T) + self.que_fun.bias.unsqueeze( - 0 - ).unsqueeze(0) - vv = torch.matmul(x, self.val_fun.weight.T) + self.val_fun.bias.unsqueeze( - 0 - ).unsqueeze(0) + kk = torch.matmul( + x, self.key_fun.weight.T + ) + self.key_fun.bias.unsqueeze(0).unsqueeze(0) + qq = torch.matmul( + x, self.que_fun.weight.T + ) + self.que_fun.bias.unsqueeze(0).unsqueeze(0) + vv = torch.matmul( + x, self.val_fun.weight.T + ) + self.val_fun.bias.unsqueeze(0).unsqueeze(0) - kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / (self.temperature * self.att_dim) - att_weights = F.softmax(kk_qq, 1) # each col of each attention matrix sums to 1 + kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / ( + self.temperature * self.att_dim + ) + att_weights = F.softmax( + kk_qq, 1 + ) # each col of each attention matrix sums to 1 att = torch.bmm(vv.permute(0, 2, 1), att_weights) op = torch.matmul( @@ -49,7 +52,9 @@ class SelfAttention(nn.Module): class ConvBlockDownCoordF(nn.Module): - def __init__(self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1): + def __init__( + self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1 + ): super(ConvBlockDownCoordF, self).__init__() self.coords = nn.Parameter( torch.linspace(-1, 1, ip_height)[None, None, ..., None], @@ -73,7 +78,9 @@ class ConvBlockDownCoordF(nn.Module): class ConvBlockDownStandard(nn.Module): - def __init__(self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1): + def __init__( + self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1 + ): super(ConvBlockDownStandard, self).__init__() self.conv = nn.Conv2d( in_chn, @@ -105,10 +112,14 @@ class ConvBlockUpF(nn.Module): self.up_scale = up_scale self.up_mode = up_mode self.coords = nn.Parameter( - torch.linspace(-1, 1, ip_height * up_scale[0])[None, None, ..., None], + torch.linspace(-1, 1, ip_height * up_scale[0])[ + None, None, ..., None + ], requires_grad=False, ) - self.conv = nn.Conv2d(in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size) + self.conv = nn.Conv2d( + in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size + ) self.conv_bn = nn.BatchNorm2d(out_chn) def forward(self, x): @@ -142,7 +153,9 @@ class ConvBlockUpStandard(nn.Module): super(ConvBlockUpStandard, self).__init__() self.up_scale = up_scale self.up_mode = up_mode - self.conv = nn.Conv2d(in_chn, out_chn, kernel_size=k_size, padding=pad_size) + self.conv = nn.Conv2d( + in_chn, out_chn, kernel_size=k_size, padding=pad_size + ) self.conv_bn = nn.BatchNorm2d(out_chn) def forward(self, x): diff --git a/bat_detect/detector/models.py b/bat_detect/detector/models.py index 4f76fe6..94b98ad 100644 --- a/bat_detect/detector/models.py +++ b/bat_detect/detector/models.py @@ -4,14 +4,13 @@ import torch.nn.functional as F from torch import nn from .model_helpers import ( - SelfAttention, ConvBlockDownCoordF, ConvBlockDownStandard, ConvBlockUpF, ConvBlockUpStandard, + SelfAttention, ) - __all__ = [ "Net2DFast", "Net2DFastNoAttn", @@ -91,13 +90,17 @@ class Net2DFast(nn.Module): num_filts // 4, num_filts // 4, kernel_size=3, padding=1 ) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) - self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) + self.conv_size_op = nn.Conv2d( + num_filts // 4, 2, kernel_size=1, padding=0 + ) self.conv_classes_op = nn.Conv2d( num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 ) if self.emb_dim > 0: - self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) + self.conv_emb = nn.Conv2d( + num_filts, self.emb_dim, kernel_size=1, padding=0 + ) def forward(self, ip, return_feats=False): @@ -204,13 +207,17 @@ class Net2DFastNoAttn(nn.Module): num_filts // 4, num_filts // 4, kernel_size=3, padding=1 ) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) - self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) + self.conv_size_op = nn.Conv2d( + num_filts // 4, 2, kernel_size=1, padding=0 + ) self.conv_classes_op = nn.Conv2d( num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 ) if self.emb_dim > 0: - self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) + self.conv_emb = nn.Conv2d( + num_filts, self.emb_dim, kernel_size=1, padding=0 + ) def forward(self, ip, return_feats=False): @@ -314,13 +321,17 @@ class Net2DFastNoCoordConv(nn.Module): num_filts // 4, num_filts // 4, kernel_size=3, padding=1 ) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) - self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) + self.conv_size_op = nn.Conv2d( + num_filts // 4, 2, kernel_size=1, padding=0 + ) self.conv_classes_op = nn.Conv2d( num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 ) if self.emb_dim > 0: - self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) + self.conv_emb = nn.Conv2d( + num_filts, self.emb_dim, kernel_size=1, padding=0 + ) def forward(self, ip, return_feats=False): diff --git a/bat_detect/detector/parameters.py b/bat_detect/detector/parameters.py index b6edd47..a1fe9c7 100644 --- a/bat_detect/detector/parameters.py +++ b/bat_detect/detector/parameters.py @@ -22,7 +22,9 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): params["experiment"] = os.path.join(exps_dir, now_str, "") params["model_file_name"] = os.path.join(params["experiment"], model_name) params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "") - params["op_im_dir_test"] = os.path.join(params["experiment"], "op_ims_test", "") + params["op_im_dir_test"] = os.path.join( + params["experiment"], "op_ims_test", "" + ) # params['notes'] = '' # can save notes about an experiment here # spec parameters @@ -34,7 +36,9 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): ) # in milliseconds, amount of time per stft time step params["fft_overlap"] = 0.75 # stft window overlap - params["max_freq"] = 120000 # in Hz, everything above this will be discarded + params[ + "max_freq" + ] = 120000 # in Hz, everything above this will be discarded params["min_freq"] = 10000 # in Hz, everything below this will be discarded params[ @@ -51,9 +55,13 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): ] = 32 # spectrogram should be divisible by this amount in width and height # spec processing params - params["denoise_spec_avg"] = True # removes the mean for each frequency band + params[ + "denoise_spec_avg" + ] = True # removes the mean for each frequency band params["scale_raw_audio"] = False # scales the raw audio to [-1, 1] - params["max_scale_spec"] = False # scales the spectrogram so that it is max 1 + params[ + "max_scale_spec" + ] = False # scales the spectrogram so that it is max 1 params["spec_scale"] = "pcen" # 'log', 'pcen', 'none' # detection params @@ -73,13 +81,21 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): params["target_sigma"] = 2.0 # augmentation params - params["aug_prob"] = 0.20 # augmentations will be performed with this probability + params[ + "aug_prob" + ] = 0.20 # augmentations will be performed with this probability params["augment_at_train"] = True params["augment_at_train_combine"] = True - params["echo_max_delay"] = 0.005 # simulate echo by adding copy of raw audio + params[ + "echo_max_delay" + ] = 0.005 # simulate echo by adding copy of raw audio params["stretch_squeeze_delta"] = 0.04 # stretch or squeeze spec - params["mask_max_time_perc"] = 0.05 # max mask size - here percentage, not ideal - params["mask_max_freq_perc"] = 0.10 # max mask size - here percentage, not ideal + params[ + "mask_max_time_perc" + ] = 0.05 # max mask size - here percentage, not ideal + params[ + "mask_max_freq_perc" + ] = 0.10 # max mask size - here percentage, not ideal params[ "spec_amp_scaling" ] = 2.0 # multiply the "volume" by 0:X times current amount @@ -100,7 +116,9 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): params["class_loss_weight"] = 2.0 # weight for the classification loss params["individual_loss_weight"] = 0.0 # not used if params["individual_loss_weight"] == 0.0: - params["emb_dim"] = 0 # number of dimensions used for individual id embedding + params[ + "emb_dim" + ] = 0 # number of dimensions used for individual id embedding else: params["emb_dim"] = 3 diff --git a/bat_detect/detector/post_process.py b/bat_detect/detector/post_process.py index 5bdb643..2745cdf 100644 --- a/bat_detect/detector/post_process.py +++ b/bat_detect/detector/post_process.py @@ -24,7 +24,9 @@ def run_nms(outputs, params, sampling_rate): pred_size = outputs["pred_size"] # box size pred_det_nms = non_max_suppression(pred_det, params["nms_kernel_size"]) - freq_rescale = (params["max_freq"] - params["min_freq"]) / pred_det.shape[-2] + freq_rescale = (params["max_freq"] - params["min_freq"]) / pred_det.shape[ + -2 + ] # NOTE there will be small differences depending on which sampling rate is chosen # as we are choosing the same sampling rate for the entire batch @@ -60,7 +62,8 @@ def run_nms(outputs, params, sampling_rate): params["fft_overlap"], ) pred["end_times"] = x_coords_to_time( - (pred["x_pos"].float() + pred["bb_width"]) / params["resize_factor"], + (pred["x_pos"].float() + pred["bb_width"]) + / params["resize_factor"], sampling_rate[ii].item(), params["fft_win_length"], params["fft_overlap"], @@ -68,7 +71,9 @@ def run_nms(outputs, params, sampling_rate): pred["low_freqs"] = ( pred_size[ii].shape[1] - pred["y_pos"].float() ) * freq_rescale + params["min_freq"] - pred["high_freqs"] = pred["low_freqs"] + pred["bb_height"] * freq_rescale + pred["high_freqs"] = ( + pred["low_freqs"] + pred["bb_height"] * freq_rescale + ) # extract the per class votes if "pred_class" in outputs: diff --git a/bat_detect/evaluate/evaluate_models.py b/bat_detect/evaluate/evaluate_models.py index 97ded25..e7ce249 100644 --- a/bat_detect/evaluate/evaluate_models.py +++ b/bat_detect/evaluate/evaluate_models.py @@ -207,7 +207,9 @@ def load_sonobat_preds(dataset, id, sb_meta, set_class_name=None): ann_c["class"] = file_res[id]["species_1"] else: ann_c["class"] = set_class_name - ann_c["start_time"] = np.round(da_c.iloc[aa]["TimeInFile"] / 1000.0, 5) + ann_c["start_time"] = np.round( + da_c.iloc[aa]["TimeInFile"] / 1000.0, 5 + ) ann_c["end_time"] = np.round( ann_c["start_time"] + da_c.iloc[aa]["CallDuration"] / 1000.0, 5 ) @@ -265,7 +267,9 @@ def assign_to_gt(gt, pred, iou_thresh): iou_m = np.zeros((num_preds, num_gts)) for ii in range(num_preds): for jj in range(num_gts): - iou_m[ii, jj] = bb_overlap(gt["annotation"][jj], pred["annotation"][ii]) + iou_m[ii, jj] = bb_overlap( + gt["annotation"][jj], pred["annotation"][ii] + ) # greedily assign detections to ground truths # needs to be greater than some threshold and we cannot assign GT @@ -274,7 +278,9 @@ def assign_to_gt(gt, pred, iou_thresh): for jj in range(num_gts): max_iou = np.argmax(iou_m[:, jj]) if iou_m[max_iou, jj] > iou_thresh: - pred["annotation"][max_iou]["class"] = gt["annotation"][jj]["class"] + pred["annotation"][max_iou]["class"] = gt["annotation"][jj][ + "class" + ] iou_m[max_iou, :] = -1.0 return pred @@ -284,17 +290,25 @@ def parse_data(data, class_names, non_event_classes, is_pred=False): class_names_all = class_names + non_event_classes data["class_names"] = np.array([aa["class"] for aa in data["annotation"]]) - data["start_times"] = np.array([aa["start_time"] for aa in data["annotation"]]) + data["start_times"] = np.array( + [aa["start_time"] for aa in data["annotation"]] + ) data["end_times"] = np.array([aa["end_time"] for aa in data["annotation"]]) - data["high_freqs"] = np.array([float(aa["high_freq"]) for aa in data["annotation"]]) - data["low_freqs"] = np.array([float(aa["low_freq"]) for aa in data["annotation"]]) + data["high_freqs"] = np.array( + [float(aa["high_freq"]) for aa in data["annotation"]] + ) + data["low_freqs"] = np.array( + [float(aa["low_freq"]) for aa in data["annotation"]] + ) if is_pred: # when loading predictions data["det_probs"] = np.array( [float(aa["det_prob"]) for aa in data["annotation"]] ) - data["class_probs"] = np.zeros((len(class_names) + 1, len(data["annotation"]))) + data["class_probs"] = np.zeros( + (len(class_names) + 1, len(data["annotation"])) + ) data["class_ids"] = np.array( [class_names_all.index(aa["class"]) for aa in data["annotation"]] ).astype(np.int32) @@ -320,7 +334,8 @@ def load_gt_data(datasets, events_of_interest, class_names, classes_to_ignore): [dd], events_of_interest=events_of_interest, verbose=True ) gt_dataset = [ - parse_data(gg, class_names, classes_to_ignore, False) for gg in gt_dataset + parse_data(gg, class_names, classes_to_ignore, False) + for gg in gt_dataset ] for gt in gt_dataset: @@ -356,7 +371,9 @@ def eval_rf_model(clf, pred, un_train_class, num_classes): # stores the prediction in place if pred["feats"].shape[0] > 0: pred["class_probs"] = np.zeros((num_classes, pred["feats"].shape[0])) - pred["class_probs"][un_train_class, :] = clf.predict_proba(pred["feats"]).T + pred["class_probs"][un_train_class, :] = clf.predict_proba( + pred["feats"] + ).T pred["det_probs"] = pred["class_probs"][:-1, :].sum(0) else: pred["class_probs"] = np.zeros((num_classes, 0)) @@ -457,8 +474,12 @@ if __name__ == "__main__": help="Output directory for plots", ) parser.add_argument("data_dir", type=str, help="Path to root of datasets") - parser.add_argument("ann_dir", type=str, help="Path to extracted annotations") - parser.add_argument("bd_model_path", type=str, help="Path to BatDetect model") + parser.add_argument( + "ann_dir", type=str, help="Path to extracted annotations" + ) + parser.add_argument( + "bd_model_path", type=str, help="Path to BatDetect model" + ) parser.add_argument( "--test_file", type=str, @@ -498,7 +519,9 @@ if __name__ == "__main__": default="", help="Text to add as title of plots", ) - parser.add_argument("--rand_seed", type=int, default=2001, help="Random seed") + parser.add_argument( + "--rand_seed", type=int, default=2001, help="Random seed" + ) args = vars(parser.parse_args()) np.random.seed(args["rand_seed"]) @@ -582,7 +605,9 @@ if __name__ == "__main__": for ii, gt in enumerate(gt_test): sb_pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta) if sb_pred["class_name"] != "": - sb_pred = parse_data(sb_pred, class_names, classes_to_ignore, True) + sb_pred = parse_data( + sb_pred, class_names, classes_to_ignore, True + ) sb_pred["class_probs"][ sb_pred["class_ids"], np.arange(sb_pred["class_probs"].shape[1]), @@ -617,7 +642,9 @@ if __name__ == "__main__": x_train = [] y_train = [] for gt in gt_train: - pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta, "Not Bat") + pred = load_sonobat_preds( + gt["dataset_name"], gt["id"], sb_meta, "Not Bat" + ) if len(pred["annotation"]) > 0: # compute detection overlap with ground truth to determine which are the TP detections @@ -634,7 +661,9 @@ if __name__ == "__main__": # run the model on the test set preds_sb_rf = [] for gt in gt_test: - pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta, "Not Bat") + pred = load_sonobat_preds( + gt["dataset_name"], gt["id"], sb_meta, "Not Bat" + ) pred = parse_data(pred, class_names, classes_to_ignore, True) pred = eval_rf_model(clf_sb, pred, un_train_class, num_classes) preds_sb_rf.append(pred) @@ -666,7 +695,9 @@ if __name__ == "__main__": x_train = [] y_train = [] for gt in gt_train: - pred = load_tadarida_pred(args["td_ip_dir"], gt["dataset_name"], gt["id"]) + pred = load_tadarida_pred( + args["td_ip_dir"], gt["dataset_name"], gt["id"] + ) # compute detection overlap with ground truth to determine which are the TP detections assign_to_gt(gt, pred, args["iou_thresh"]) pred = parse_data(pred, class_names, classes_to_ignore, True) @@ -681,7 +712,9 @@ if __name__ == "__main__": # run the model on the test set preds_td = [] for gt in gt_test: - pred = load_tadarida_pred(args["td_ip_dir"], gt["dataset_name"], gt["id"]) + pred = load_tadarida_pred( + args["td_ip_dir"], gt["dataset_name"], gt["id"] + ) pred = parse_data(pred, class_names, classes_to_ignore, True) pred = eval_rf_model(clf_td, pred, un_train_class, num_classes) preds_td.append(pred) diff --git a/bat_detect/finetune/finetune_model.py b/bat_detect/finetune/finetune_model.py index 0209670..8988096 100644 --- a/bat_detect/finetune/finetune_model.py +++ b/bat_detect/finetune/finetune_model.py @@ -28,7 +28,9 @@ if __name__ == "__main__": print(info_str) parser = argparse.ArgumentParser() - parser.add_argument("audio_path", type=str, help="Input directory for audio") + parser.add_argument( + "audio_path", type=str, help="Input directory for audio" + ) parser.add_argument( "train_ann_path", type=str, @@ -78,7 +80,9 @@ if __name__ == "__main__": params["device"] = "cuda" else: params["device"] = "cpu" - print("\nNote, this will be a lot faster if you use computer with a GPU.\n") + print( + "\nNote, this will be a lot faster if you use computer with a GPU.\n" + ) print("\nAudio directory: " + args["audio_path"]) print("Train file: " + args["train_ann_path"]) @@ -129,13 +133,17 @@ if __name__ == "__main__": data_train, params["class_names"], params["class_inv_freq"], - ) = tu.load_set_of_anns(train_sets, classes_to_ignore, params["events_of_interest"]) + ) = tu.load_set_of_anns( + train_sets, classes_to_ignore, params["events_of_interest"] + ) print("Number of files", len(data_train)) params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping( params["class_names"] ) - params["class_names_short"] = tu.get_short_class_names(params["class_names"]) + params["class_names_short"] = tu.get_short_class_names( + params["class_names"] + ) # load test annotations test_sets = [] @@ -218,7 +226,9 @@ if __name__ == "__main__": param.requires_grad = False optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"]) - scheduler = CosineAnnealingLR(optimizer, params["num_epochs"] * len(train_loader)) + scheduler = CosineAnnealingLR( + optimizer, params["num_epochs"] * len(train_loader) + ) if params["train_loss"] == "mse": det_criterion = losses.mse_loss elif params["train_loss"] == "focal": @@ -293,7 +303,9 @@ if __name__ == "__main__": test_plt_class.update_and_save( epoch, [rs["avg_prec"] for rs in test_res["class_pr"]] ) - pu.plot_pr_curve_class(params["experiment"], "test_pr", "test_pr", test_res) + pu.plot_pr_curve_class( + params["experiment"], "test_pr", "test_pr", test_res + ) # save finetuned model print("saving model to: " + params["model_file_name"]) diff --git a/bat_detect/finetune/prep_data_finetune.py b/bat_detect/finetune/prep_data_finetune.py index 1ee4ceb..d8d1df8 100644 --- a/bat_detect/finetune/prep_data_finetune.py +++ b/bat_detect/finetune/prep_data_finetune.py @@ -58,7 +58,9 @@ if __name__ == "__main__": print(info_str) parser = argparse.ArgumentParser() - parser.add_argument("dataset_name", type=str, help="Name to call your dataset") + parser.add_argument( + "dataset_name", type=str, help="Name to call your dataset" + ) parser.add_argument("audio_dir", type=str, help="Input directory for audio") parser.add_argument( "ann_dir", @@ -147,10 +149,14 @@ if __name__ == "__main__": test_files = load_file_names(args["test_file"]) file_names_all = [dd["id"] for dd in data_all] train_inds = [ - file_names_all.index(ff) for ff in train_files if ff in file_names_all + file_names_all.index(ff) + for ff in train_files + if ff in file_names_all ] test_inds = [ - file_names_all.index(ff) for ff in test_files if ff in file_names_all + file_names_all.index(ff) + for ff in test_files + if ff in file_names_all ] else: diff --git a/bat_detect/train/audio_dataloader.py b/bat_detect/train/audio_dataloader.py index 697339b..70ba5b8 100644 --- a/bat_detect/train/audio_dataloader.py +++ b/bat_detect/train/audio_dataloader.py @@ -73,7 +73,9 @@ def generate_gt_heatmaps(spec_op_shape, sampling_rate, ann, params): y_2d_det = np.zeros((1, op_height, op_width), dtype=np.float32) y_2d_size = np.zeros((2, op_height, op_width), dtype=np.float32) # num classes and "background" class - y_2d_classes = np.zeros((num_classes + 1, op_height, op_width), dtype=np.float32) + y_2d_classes = np.zeros( + (num_classes + 1, op_height, op_width), dtype=np.float32 + ) # create 2D ground truth heatmaps for ii in valid_inds: @@ -126,7 +128,8 @@ def draw_gaussian(heatmap, center, sigmax, sigmay=None): x0 = y0 = size // 2 # g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) g = np.exp( - -((x - x0) ** 2) / (2 * sigmax**2) - ((y - y0) ** 2) / (2 * sigmay**2) + -((x - x0) ** 2) / (2 * sigmax**2) + - ((y - y0) ** 2) / (2 * sigmay**2) ) g_x = max(0, -ul[0]), min(br[0], h) - ul[0] g_y = max(0, -ul[1]), min(br[1], w) - ul[1] @@ -307,7 +310,9 @@ class AudioLoader(torch.utils.data.Dataset): # convert class name into class label if aa["class"] in self.params["class_names"]: - aa["class_id"] = self.params["class_names"].index(aa["class"]) + aa["class_id"] = self.params["class_names"].index( + aa["class"] + ) else: aa["class_id"] = -1 @@ -315,8 +320,12 @@ class AudioLoader(torch.utils.data.Dataset): filtered_annotations.append(aa) dd["annotation"] = filtered_annotations - dd["start_times"] = np.array([aa["start_time"] for aa in dd["annotation"]]) - dd["end_times"] = np.array([aa["end_time"] for aa in dd["annotation"]]) + dd["start_times"] = np.array( + [aa["start_time"] for aa in dd["annotation"]] + ) + dd["end_times"] = np.array( + [aa["end_time"] for aa in dd["annotation"]] + ) dd["high_freqs"] = np.array( [float(aa["high_freq"]) for aa in dd["annotation"]] ) @@ -393,12 +402,18 @@ class AudioLoader(torch.utils.data.Dataset): ) if audio_raw.shape[0] - length_samples > 0: - sample_crop = np.random.randint(audio_raw.shape[0] - length_samples) + sample_crop = np.random.randint( + audio_raw.shape[0] - length_samples + ) else: sample_crop = 0 audio_raw = audio_raw[sample_crop : sample_crop + length_samples] - ann["start_times"] = ann["start_times"] - sample_crop / float(sampling_rate) - ann["end_times"] = ann["end_times"] - sample_crop / float(sampling_rate) + ann["start_times"] = ann["start_times"] - sample_crop / float( + sampling_rate + ) + ann["end_times"] = ann["end_times"] - sample_crop / float( + sampling_rate + ) # pad audio if self.is_train: @@ -477,7 +492,9 @@ class AudioLoader(torch.utils.data.Dataset): spec = scale_vol_aug(spec, self.params) if np.random.random() < self.params["aug_prob"]: - spec = warp_spec_aug(spec, ann, self.return_spec_for_viz, self.params) + spec = warp_spec_aug( + spec, ann, self.return_spec_for_viz, self.params + ) if np.random.random() < self.params["aug_prob"]: spec = mask_time_aug(spec, self.params) @@ -488,7 +505,9 @@ class AudioLoader(torch.utils.data.Dataset): outputs = {} outputs["spec"] = spec if self.return_spec_for_viz: - outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze(0) + outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze( + 0 + ) # create ground truth heatmaps ( diff --git a/bat_detect/train/evaluate.py b/bat_detect/train/evaluate.py index 47fb26b..a926fbb 100755 --- a/bat_detect/train/evaluate.py +++ b/bat_detect/train/evaluate.py @@ -1,5 +1,10 @@ import numpy as np -from sklearn.metrics import accuracy_score, auc, balanced_accuracy_score, roc_curve +from sklearn.metrics import ( + accuracy_score, + auc, + balanced_accuracy_score, + roc_curve, +) def compute_error_auc(op_str, gt, pred, prob): @@ -12,7 +17,10 @@ def compute_error_auc(op_str, gt, pred, prob): fpr, tpr, thresholds = roc_curve(gt, pred) roc_auc = auc(fpr, tpr) - print(op_str + ", class acc = {:.3f}, ROC AUC = {:.3f}".format(class_acc, roc_auc)) + print( + op_str + + ", class acc = {:.3f}, ROC AUC = {:.3f}".format(class_acc, roc_auc) + ) # return class_acc, roc_auc @@ -106,10 +114,14 @@ def compute_pre_rec( confidence.append(pp["det_probs"][valid_inds]) elif eval_mode == "per_class": # per class - confidence.append(pp["class_probs"].T[valid_inds, class_of_interest]) + confidence.append( + pp["class_probs"].T[valid_inds, class_of_interest] + ) elif eval_mode == "top_class": # per class - note that sometimes 'class_probs' can be num_classes+1 in size - top_class = np.argmax(pp["class_probs"].T[valid_inds, :num_classes], 1) + top_class = np.argmax( + pp["class_probs"].T[valid_inds, :num_classes], 1 + ) confidence.append(pp["class_probs"].T[valid_inds, top_class]) pred_class.append(top_class) @@ -158,7 +170,9 @@ def compute_pre_rec( num_positives += len(gg["start_times"][valid_inds]) elif eval_mode == "per_class": # all valid ones with class of interest - num_positives += (gg["class_ids"][valid_inds] == class_of_interest).sum() + num_positives += ( + gg["class_ids"][valid_inds] == class_of_interest + ).sum() elif eval_mode == "top_class": # all valid ones with non generic class num_positives += (gg["class_ids"][valid_inds] > -1).sum() @@ -240,7 +254,9 @@ def compute_pre_rec( results["avg_prec"] = np.nan results["rec_at_x"] = np.nan else: - results["avg_prec"] = np.round(calc_average_precision(recall, precision), 5) + results["avg_prec"] = np.round( + calc_average_precision(recall, precision), 5 + ) results["rec_at_x"] = np.round(calc_recall_at_x(recall, precision), 5) return results @@ -283,12 +299,20 @@ def compute_file_accuracy(gts, preds, num_classes): # compute min and max scoring range - then threshold min_val = 0 - mins = [pp["class_probs"].min() for pp in preds if pp["class_probs"].shape[1] > 0] + mins = [ + pp["class_probs"].min() + for pp in preds + if pp["class_probs"].shape[1] > 0 + ] if len(mins) > 0: min_val = np.min(mins) max_val = 1.0 - maxes = [pp["class_probs"].max() for pp in preds if pp["class_probs"].shape[1] > 0] + maxes = [ + pp["class_probs"].max() + for pp in preds + if pp["class_probs"].shape[1] > 0 + ] if len(maxes) > 0: max_val = np.max(maxes) @@ -310,7 +334,9 @@ def compute_file_accuracy(gts, preds, num_classes): # pick the result corresponding to the overall best threshold pred_valid_all = np.vstack(pred_valid_all) - acc_per_thresh = (np.array(gt_valid)[..., np.newaxis] == pred_valid_all).mean(0) + acc_per_thresh = ( + np.array(gt_valid)[..., np.newaxis] == pred_valid_all + ).mean(0) best_thresh = np.argmax(acc_per_thresh) best_acc = acc_per_thresh[best_thresh] pred_valid = pred_valid_all[:, best_thresh].astype(np.int).tolist() diff --git a/bat_detect/train/train_model.py b/bat_detect/train/train_model.py index cca7011..f7504b0 100644 --- a/bat_detect/train/train_model.py +++ b/bat_detect/train/train_model.py @@ -62,7 +62,9 @@ def save_images_batch(model, data_loader, params): data_loader.dataset.return_spec_for_viz = False -def save_image(spec_viz, outputs, ind, inputs, params, op_file_name, plot_title): +def save_image( + spec_viz, outputs, ind, inputs, params, op_file_name, plot_title +): pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float()) pred_hm = outputs["pred_det"][ind, 0, :].data.cpu().numpy() spec_viz = spec_viz[ind, 0, :] @@ -85,10 +87,14 @@ def save_image(spec_viz, outputs, ind, inputs, params, op_file_name, plot_title) ) -def loss_fun(outputs, gt_det, gt_size, gt_class, det_criterion, params, class_inv_freq): +def loss_fun( + outputs, gt_det, gt_size, gt_class, det_criterion, params, class_inv_freq +): # detection loss - loss = params["det_loss_weight"] * det_criterion(outputs["pred_det"], gt_det) + loss = params["det_loss_weight"] * det_criterion( + outputs["pred_det"], gt_det + ) # bounding box size loss loss += params["size_loss_weight"] * losses.bbox_size_loss( @@ -105,7 +111,9 @@ def loss_fun(outputs, gt_det, gt_size, gt_class, det_criterion, params, class_in return loss -def train(model, epoch, data_loader, det_criterion, optimizer, scheduler, params): +def train( + model, epoch, data_loader, det_criterion, optimizer, scheduler, params +): model.train() @@ -218,7 +226,9 @@ def test(model, epoch, data_loader, det_criterion, params): test_loss.update(loss.item(), data.shape[0]) # do NMS - pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float()) + pred_nms, _ = pp.run_nms( + outputs, params, inputs["sampling_rate"].float() + ) predictions.extend(pred_nms) ground_truths.extend(parse_gt_data(inputs)) @@ -328,7 +338,9 @@ if __name__ == "__main__": # setup arg parser and populate it with exiting parameters - will not work with lists parser = argparse.ArgumentParser() parser.add_argument("data_dir", type=str, help="Path to root of datasets") - parser.add_argument("ann_dir", type=str, help="Path to extracted annotations") + parser.add_argument( + "ann_dir", type=str, help="Path to extracted annotations" + ) parser.add_argument( "--train_split", type=str, @@ -387,12 +399,14 @@ if __name__ == "__main__": params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping( params["class_names"] ) - params["class_names_short"] = tu.get_short_class_names(params["class_names"]) + params["class_names_short"] = tu.get_short_class_names( + params["class_names"] + ) # standardize the low and high frequency value for specified classes - params["standardize_classs_names"] = params["standardize_classs_names_ip"].split( - ";" - ) + params["standardize_classs_names"] = params[ + "standardize_classs_names_ip" + ].split(";") for cc in params["standardize_classs_names"]: if cc in params["class_names"]: data_train = tu.standardize_low_freq(data_train, cc) @@ -442,7 +456,9 @@ if __name__ == "__main__": optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"]) # optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=0.9) - scheduler = CosineAnnealingLR(optimizer, params["num_epochs"] * len(train_loader)) + scheduler = CosineAnnealingLR( + optimizer, params["num_epochs"] * len(train_loader) + ) if params["train_loss"] == "mse": det_criterion = losses.mse_loss elif params["train_loss"] == "focal": @@ -505,7 +521,9 @@ if __name__ == "__main__": if epoch % params["num_eval_epochs"] == 0: # detection accuracy on test set - test_res, test_loss = test(model, epoch, test_loader, det_criterion, params) + test_res, test_loss = test( + model, epoch, test_loader, det_criterion, params + ) test_plt_ls.update_and_save(epoch, [test_loss["test_loss"]]) test_plt.update_and_save( epoch, @@ -520,7 +538,9 @@ if __name__ == "__main__": test_plt_class.update_and_save( epoch, [rs["avg_prec"] for rs in test_res["class_pr"]] ) - pu.plot_pr_curve_class(params["experiment"], "test_pr", "test_pr", test_res) + pu.plot_pr_curve_class( + params["experiment"], "test_pr", "test_pr", test_res + ) # save trained model print("saving model to: " + params["model_file_name"]) diff --git a/bat_detect/train/train_split.py b/bat_detect/train/train_split.py index 2036223..01b5c03 100644 --- a/bat_detect/train/train_split.py +++ b/bat_detect/train/train_split.py @@ -24,7 +24,8 @@ def split_diff(ann_dir, wav_dir, load_extra=True): "dataset_name": "BatDetective", "is_test": False, "is_binary": True, # just a bat / not bat dataset ie no classes - "ann_path": ann_dir + "train_set_bulgaria_batdetective_with_bbs.json", + "ann_path": ann_dir + + "train_set_bulgaria_batdetective_with_bbs.json", "wav_path": wav_dir + "bat_detective/audio/", } ) @@ -151,7 +152,8 @@ def split_same(ann_dir, wav_dir, load_extra=True): "dataset_name": "BatDetective", "is_test": False, "is_binary": True, - "ann_path": ann_dir + "train_set_bulgaria_batdetective_with_bbs.json", + "ann_path": ann_dir + + "train_set_bulgaria_batdetective_with_bbs.json", "wav_path": wav_dir + "bat_detective/audio/", } ) diff --git a/bat_detect/train/train_utils.py b/bat_detect/train/train_utils.py index 53f91c2..62441a7 100644 --- a/bat_detect/train/train_utils.py +++ b/bat_detect/train/train_utils.py @@ -25,7 +25,9 @@ def get_blank_dataset_dict(dataset_name, is_test, ann_path, wav_path): def get_short_class_names(class_names, str_len=3): class_names_short = [] for cc in class_names: - class_names_short.append(" ".join([sp[:str_len] for sp in cc.split(" ")])) + class_names_short.append( + " ".join([sp[:str_len] for sp in cc.split(" ")]) + ) return class_names_short @@ -155,7 +157,9 @@ def load_set_of_anns( str_len = np.max([len(cc) for cc in class_names]) + 5 for cc in range(len(class_names)): print( - str(cc).ljust(5) + class_names[cc].ljust(str_len) + str(class_cnts[cc]) + str(cc).ljust(5) + + class_names[cc].ljust(str_len) + + str(class_cnts[cc]) ) if len(classes_to_ignore) == 0: diff --git a/bat_detect/utils/audio_utils.py b/bat_detect/utils/audio_utils.py index 47bf103..318b790 100644 --- a/bat_detect/utils/audio_utils.py +++ b/bat_detect/utils/audio_utils.py @@ -39,7 +39,9 @@ def generate_spectrogram( min_freq = round(params["min_freq"] * params["fft_win_length"]) if spec.shape[0] < max_freq: freq_pad = max_freq - spec.shape[0] - spec = np.vstack((np.zeros((freq_pad, spec.shape[1]), dtype=spec.dtype), spec)) + spec = np.vstack( + (np.zeros((freq_pad, spec.shape[1]), dtype=spec.dtype), spec) + ) spec_cropped = spec[-max_freq : spec.shape[0] - min_freq, :] if params["spec_scale"] == "log": @@ -49,7 +51,11 @@ def generate_spectrogram( * ( 1.0 / ( - np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) + np.abs( + np.hanning( + int(params["fft_win_length"] * sampling_rate) + ) + ) ** 2 ).sum() ) @@ -82,7 +88,11 @@ def generate_spectrogram( * ( 1.0 / ( - np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) + np.abs( + np.hanning( + int(params["fft_win_length"] * sampling_rate) + ) + ) ** 2 ).sum() ) @@ -122,7 +132,9 @@ def load_audio_file( # clipping maximum duration if max_duration is not False: - max_duration = np.minimum(int(sampling_rate * max_duration), audio_raw.shape[0]) + max_duration = np.minimum( + int(sampling_rate * max_duration), audio_raw.shape[0] + ) audio_raw = audio_raw[:max_duration] # convert to float32 and scale @@ -159,7 +171,9 @@ def pad_audio( # too small # used during training to ensure all the batches are the same size diff = fixed_width * step + noverlap - audio_raw.shape[0] - audio_raw = np.hstack((audio_raw, np.zeros(diff, dtype=audio_raw.dtype))) + audio_raw = np.hstack( + (audio_raw, np.zeros(diff, dtype=audio_raw.dtype)) + ) elif fixed_width is not None and spec_width > fixed_width: # too big @@ -167,13 +181,18 @@ def pad_audio( diff = fixed_width * step + noverlap - audio_raw.shape[0] audio_raw = audio_raw[:diff] - elif spec_width_rs < min_size or (np.floor(spec_width_rs) % divide_factor) != 0: + elif ( + spec_width_rs < min_size + or (np.floor(spec_width_rs) % divide_factor) != 0 + ): # need to be at least min_size div_amt = np.ceil(spec_width_rs / float(divide_factor)) div_amt = np.maximum(1, div_amt) target_size = int(div_amt * divide_factor * (1.0 / resize_factor)) diff = target_size * step + noverlap - audio_raw.shape[0] - audio_raw = np.hstack((audio_raw, np.zeros(diff, dtype=audio_raw.dtype))) + audio_raw = np.hstack( + (audio_raw, np.zeros(diff, dtype=audio_raw.dtype)) + ) return audio_raw diff --git a/bat_detect/utils/detector_utils.py b/bat_detect/utils/detector_utils.py index b4c0e47..3186ab6 100644 --- a/bat_detect/utils/detector_utils.py +++ b/bat_detect/utils/detector_utils.py @@ -67,6 +67,7 @@ def get_audio_files(ip_dir: str) -> List[str]: class ModelParameters(TypedDict): """Model parameters.""" + model_name: str num_filters: int emb_dim: int @@ -77,8 +78,7 @@ class ModelParameters(TypedDict): def load_model( - model_path: str=DEFAULT_MODEL_PATH, - load_weights: bool=True + model_path: str = DEFAULT_MODEL_PATH, load_weights: bool = True ) -> Tuple[torch.nn.Module, ModelParameters]: """Load model from file. @@ -211,7 +211,9 @@ def convert_results( results["spec_feat_names"] = feats.get_feature_names() if len(cnn_feats) > 0: results["cnn_feats"] = cnn_feats - results["cnn_feat_names"] = [str(ii) for ii in range(cnn_feats.shape[1])] + results["cnn_feat_names"] = [ + str(ii) for ii in range(cnn_feats.shape[1]) + ] if len(spec_slices) > 0: results["spec_slices"] = spec_slices @@ -245,7 +247,9 @@ def save_results_to_file(results, op_path): # save features if "spec_feats" in results.keys(): - df = pd.DataFrame(results["spec_feats"], columns=results["spec_feat_names"]) + df = pd.DataFrame( + results["spec_feats"], columns=results["spec_feat_names"] + ) df.to_csv( op_path + "_spec_features.csv", sep=",", @@ -254,7 +258,9 @@ def save_results_to_file(results, op_path): ) if "cnn_feats" in results.keys(): - df = pd.DataFrame(results["cnn_feats"], columns=results["cnn_feat_names"]) + df = pd.DataFrame( + results["cnn_feats"], columns=results["cnn_feat_names"] + ) df.to_csv( op_path + "_cnn_features.csv", sep=",", @@ -289,7 +295,9 @@ def compute_spectrogram(audio, sampling_rate, params, return_np=False): # resize the spec rs = params["resize_factor"] spec_op_shape = (int(params["spec_height"] * rs), int(spec.shape[-1] * rs)) - spec = F.interpolate(spec, size=spec_op_shape, mode="bilinear", align_corners=False) + spec = F.interpolate( + spec, size=spec_op_shape, mode="bilinear", align_corners=False + ) if return_np: spec_np = spec[0, 0, :].cpu().data.numpy() @@ -350,7 +358,9 @@ def process_file( chunk_time = args["chunk_size"] * chunk_id chunk_length = int(sampling_rate * args["chunk_size"]) start_sample = chunk_id * chunk_length - end_sample = np.minimum((chunk_id + 1) * chunk_length, audio_full.shape[0]) + end_sample = np.minimum( + (chunk_id + 1) * chunk_length, audio_full.shape[0] + ) audio = audio_full[start_sample:end_sample] # load audio file and compute spectrogram @@ -385,7 +395,9 @@ def process_file( cnn_feats.append(features[0]) if args["spec_slices"]: - spec_slices.extend(feats.extract_spec_slices(spec_np, pred_nms, params)) + spec_slices.extend( + feats.extract_spec_slices(spec_np, pred_nms, params) + ) # convert the predictions into output dictionary file_id = os.path.basename(audio_file) @@ -406,7 +418,10 @@ def process_file( # summarize results if not args["quiet"]: num_detections = len(results["pred_dict"]["annotation"]) - print("{}".format(num_detections) + " call(s) detected above the threshold.") + print( + "{}".format(num_detections) + + " call(s) detected above the threshold." + ) # print results for top n classes if not args["quiet"] and (num_detections > 0): @@ -416,7 +431,8 @@ def process_file( print("species name".ljust(30) + "probablity present") for cc in np.argsort(class_overall)[::-1][:top_n]: print( - params["class_names"][cc].ljust(30) + str(round(class_overall[cc], 3)) + params["class_names"][cc].ljust(30) + + str(round(class_overall[cc], 3)) ) if return_raw_preds: diff --git a/bat_detect/utils/plot_utils.py b/bat_detect/utils/plot_utils.py index 8b1945a..afbbc5f 100644 --- a/bat_detect/utils/plot_utils.py +++ b/bat_detect/utils/plot_utils.py @@ -57,7 +57,9 @@ def create_box_image( if plot_class_names: for ii, bb in enumerate(boxes): - txt = " ".join([sp[:3] for sp in detections_ip[ii]["class"].split(" ")]) + txt = " ".join( + [sp[:3] for sp in detections_ip[ii]["class"].split(" ")] + ) font_info = { "color": "white", "size": 10, @@ -87,7 +89,9 @@ def save_ann_spec( y_extent = [0, duration, min_freq, max_freq] plt.close("all") - fig = plt.figure(0, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100) + fig = plt.figure( + 0, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100 + ) plt.imshow( spec, aspect="auto", @@ -124,12 +128,16 @@ def save_ann_spec( plt.savefig(op_path) -def plot_pts(fig_id, feats, class_names, colors, marker_size=4.0, plot_legend=False): +def plot_pts( + fig_id, feats, class_names, colors, marker_size=4.0, plot_legend=False +): plt.figure(fig_id) un_class, labels = np.unique(class_names, return_inverse=True) un_labels = np.unique(labels) if un_labels.shape[0] > len(colors): - colors = [plt.cm.jet(float(ii) / un_labels.shape[0]) for ii in un_labels] + colors = [ + plt.cm.jet(float(ii) / un_labels.shape[0]) for ii in un_labels + ] for ii, u in enumerate(un_labels): inds = np.where(labels == u)[0] @@ -236,7 +244,9 @@ def plot_spec( ax0.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent) ax0.xaxis.set_ticklabels([]) font_info = {"color": "white", "size": 12, "weight": "bold"} - ax0.text(0, params["min_freq"] // freq_scale, "Ground Truth", fontdict=font_info) + ax0.text( + 0, params["min_freq"] // freq_scale, "Ground Truth", fontdict=font_info + ) plt.grid(False) if plot_boxes: @@ -261,7 +271,9 @@ def plot_spec( ax1.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent) ax1.xaxis.set_ticklabels([]) font_info = {"color": "white", "size": 12, "weight": "bold"} - ax1.text(0, params["min_freq"] // freq_scale, "Prediction", fontdict=font_info) + ax1.text( + 0, params["min_freq"] // freq_scale, "Prediction", fontdict=font_info + ) plt.grid(False) if plot_boxes: @@ -296,7 +308,9 @@ def plot_spec( ) # ax2.xaxis.set_ticklabels([]) font_info = {"color": "white", "size": 12, "weight": "bold"} - ax2.text(0, params["min_freq"] // freq_scale, "Heatmap", fontdict=font_info) + ax2.text( + 0, params["min_freq"] // freq_scale, "Heatmap", fontdict=font_info + ) plt.grid(False) @@ -394,11 +408,15 @@ def plot_confusion_matrix( # shorten the class names for plotting class_names = [] for cc in class_names_long: - class_name_sm = "".join([cc_sm[:3] + " " for cc_sm in cc.split(" ")])[:-1] + class_name_sm = "".join([cc_sm[:3] + " " for cc_sm in cc.split(" ")])[ + :-1 + ] class_names.append(class_name_sm) num_classes = len(class_names) - cm = confusion_matrix(gt, pred, labels=np.arange(num_classes)).astype(np.float32) + cm = confusion_matrix(gt, pred, labels=np.arange(num_classes)).astype( + np.float32 + ) cm_norm = cm.sum(1) valid_inds = np.where(cm_norm > 0)[0] @@ -487,7 +505,9 @@ class LossPlotter(object): if self.logy: plt.gca().set_yscale("log") plt.grid(True) - plt.legend(bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0.0) + plt.legend( + bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0.0 + ) plt.tight_layout() plt.savefig(self.op_file_name) plt.close(0) @@ -502,15 +522,19 @@ class LossPlotter(object): def save_confusion_matrix(self, gt, pred): plt.figure(0) - cm = confusion_matrix(gt, pred, np.arange(len(self.class_names))).astype( - np.float32 - ) + cm = confusion_matrix( + gt, pred, np.arange(len(self.class_names)) + ).astype(np.float32) cm_norm = cm.sum(1) valid_inds = np.where(cm_norm > 0)[0] - cm[valid_inds, :] = cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis] + cm[valid_inds, :] = ( + cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis] + ) plt.imshow(cm, vmin=0, vmax=1, cmap="plasma") plt.colorbar() - plt.xticks(np.arange(cm.shape[1]), self.class_names, rotation="vertical") + plt.xticks( + np.arange(cm.shape[1]), self.class_names, rotation="vertical" + ) plt.yticks(np.arange(cm.shape[0]), self.class_names) plt.xlabel("Predicted") plt.ylabel("Ground Truth") diff --git a/bat_detect/utils/visualize.py b/bat_detect/utils/visualize.py index 9b5b4b2..d79f322 100644 --- a/bat_detect/utils/visualize.py +++ b/bat_detect/utils/visualize.py @@ -56,19 +56,25 @@ class InteractivePlotter: self.annotated = np.zeros( self.labels.shape[0], dtype=np.int ) # can populate this with 1's where we have labels - self.labels_cols = [colors[self.labels[ii]] for ii in range(len(self.labels))] + self.labels_cols = [ + colors[self.labels[ii]] for ii in range(len(self.labels)) + ] self.freq_lims = freq_lims self.allow_training = allow_training self.pt_size = 5.0 - self.spec_pad = 0.2 # this much padding has been applied to the spec slices + self.spec_pad = ( + 0.2 # this much padding has been applied to the spec slices + ) self.fig_width = 12 self.fig_height = 8 self.current_id = 0 max_ind = np.argmax([ss.shape[1] for ss in self.spec_slices]) self.max_width = self.spec_slices[max_ind].shape[1] - self.blank_spec = np.zeros((self.spec_slices[0].shape[0], self.max_width)) + self.blank_spec = np.zeros( + (self.spec_slices[0].shape[0], self.max_width) + ) def plot(self, fig_id): self.fig, self.ax = plt.subplots( @@ -141,7 +147,8 @@ class InteractivePlotter: ) // 2 new_spec[ :, - w_diff : self.spec_slices[self.current_id].shape[1] + w_diff, + w_diff : self.spec_slices[self.current_id].shape[1] + + w_diff, ] = self.spec_slices[self.current_id] self.spec_im.set_data(new_spec) self.spec_im.set_clim(vmin=0, vmax=new_spec.max()) @@ -172,7 +179,9 @@ class InteractivePlotter: info_str = ( self.call_info[self.current_id]["file_name"] + ", time=" - + str(round(self.call_info[self.current_id]["start_time"], 3)) + + str( + round(self.call_info[self.current_id]["start_time"], 3) + ) + ", prob=" + str(round(self.call_info[self.current_id]["det_prob"], 3)) ) diff --git a/bat_detect/utils/wavfile.py b/bat_detect/utils/wavfile.py index 532a8c9..7fee660 100644 --- a/bat_detect/utils/wavfile.py +++ b/bat_detect/utils/wavfile.py @@ -235,7 +235,9 @@ def write(filename, rate, data): # kind of numeric data in the numpy array dkind = data.dtype.kind if not ( - dkind == "i" or dkind == "f" or (dkind == "u" and data.dtype.itemsize == 1) + dkind == "i" + or dkind == "f" + or (dkind == "u" and data.dtype.itemsize == 1) ): raise ValueError("Unsupported data type '%s'" % data.dtype) @@ -268,7 +270,9 @@ def write(filename, rate, data): # Write the data (16, comp, noc, etc) in the correct binary format # for the wav header. the string format (first arg) specifies how many bytes for each # value. - fid.write(struct.pack(" 0 ): - results_path = audio_file.replace(args["audio_dir"], args["ann_dir"]) + results_path = audio_file.replace( + args["audio_dir"], args["ann_dir"] + ) du.save_results_to_file(results, results_path) except: error_files.append(audio_file) diff --git a/scripts/gen_dataset_summary_image.py b/scripts/gen_dataset_summary_image.py index 086e5b2..7e424ad 100644 --- a/scripts/gen_dataset_summary_image.py +++ b/scripts/gen_dataset_summary_image.py @@ -20,7 +20,9 @@ import bat_detect.utils.audio_utils as au if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("audio_path", type=str, help="Input directory for audio") + parser.add_argument( + "audio_path", type=str, help="Input directory for audio" + ) parser.add_argument( "op_dir", type=str, @@ -31,7 +33,9 @@ if __name__ == "__main__": type=str, help="Path to where single annotation json file is stored", ) - parser.add_argument("--uk_split", type=str, default="", help="Set as: diff or same") + parser.add_argument( + "--uk_split", type=str, default="", help="Set as: diff or same" + ) parser.add_argument( "--file_type", type=str, @@ -84,7 +88,9 @@ if __name__ == "__main__": norm_type=params["norm_type"], ) - op_file_name = os.path.join(args["op_dir"], dataset_name + "." + args["file_type"]) + op_file_name = os.path.join( + args["op_dir"], dataset_name + "." + args["file_type"] + ) vz.save_summary_image( x_train, y_train, class_names, params, op_file_name, class_names_order ) diff --git a/scripts/gen_spec_image.py b/scripts/gen_spec_image.py index 8821459..ba69481 100644 --- a/scripts/gen_spec_image.py +++ b/scripts/gen_spec_image.py @@ -25,7 +25,9 @@ import bat_detect.utils.plot_utils as viz def filter_anns(anns, start_time, stop_time): anns_op = [] for aa in anns: - if (aa["start_time"] >= start_time) and (aa["start_time"] < stop_time - 0.02): + if (aa["start_time"] >= start_time) and ( + aa["start_time"] < stop_time - 0.02 + ): anns_op.append(aa) return anns_op @@ -130,7 +132,9 @@ if __name__ == "__main__": print("File duration: {} seconds".format(duration)) # create spec for viz - spec, _ = au.generate_spectrogram(audio, sampling_rate, params_bd, True, False) + spec, _ = au.generate_spectrogram( + audio, sampling_rate, params_bd, True, False + ) # run model and filter detections so only keep ones in relevant time range results = du.process_file(args_cmd["audio_file"], model, params_bd, bd_args) @@ -153,7 +157,9 @@ if __name__ == "__main__": ) op_path_clean = os.path.join(args_cmd["op_dir"], op_path_clean) op_path_pred = ( - os.path.basename(args_cmd["audio_file"])[:-4] + "_pred." + args_cmd["file_type"] + os.path.basename(args_cmd["audio_file"])[:-4] + + "_pred." + + args_cmd["file_type"] ) op_path_pred = os.path.join(args_cmd["op_dir"], op_path_pred) diff --git a/scripts/gen_spec_video.py b/scripts/gen_spec_video.py index 813db20..17354f6 100644 --- a/scripts/gen_spec_video.py +++ b/scripts/gen_spec_video.py @@ -27,7 +27,9 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("audio_file", type=str, help="Path to input audio file") - parser.add_argument("model_path", type=str, help="Path to trained BatDetect model") + parser.add_argument( + "model_path", type=str, help="Path to trained BatDetect model" + ) parser.add_argument( "--op_dir", type=str, @@ -42,7 +44,9 @@ if __name__ == "__main__": action="store_true", help="Do not plot class names", ) - parser.add_argument("--disable_axis", action="store_true", help="Do not plot axis") + parser.add_argument( + "--disable_axis", action="store_true", help="Do not plot axis" + ) parser.add_argument( "--detection_threshold", type=float, @@ -129,7 +133,9 @@ if __name__ == "__main__": detections.append(bb) # plot boxes - fig = plt.figure(1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi) + fig = plt.figure( + 1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi + ) duration = au.x_coords_to_time( spec.shape[1], sampling_rate, @@ -188,7 +194,9 @@ if __name__ == "__main__": if ii > 0: spec_op[:, int(col), :] = 1.0 if reveal_boxes: - spec_op[:, int(col) + 1 :, :] = spec_blank[:, int(col) + 1 :, :] + spec_op[:, int(col) + 1 :, :] = spec_blank[ + :, int(col) + 1 :, : + ] elif ii == 0 and reveal_boxes: spec_op = spec_blank diff --git a/scripts/viz_helpers.py b/scripts/viz_helpers.py index f36cd94..a286037 100644 --- a/scripts/viz_helpers.py +++ b/scripts/viz_helpers.py @@ -23,7 +23,9 @@ def generate_spectrogram_data( # spec = au.gen_mag_spectrogram_pt(audio, sampling_rate, params['fft_win_length'], params['fft_overlap']).numpy() if spec.shape[0] < max_freq: freq_pad = max_freq - spec.shape[0] - spec = np.vstack((np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec)) + spec = np.vstack( + (np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec) + ) spec = spec[-max_freq : spec.shape[0] - min_freq, :] if norm_type == "log": @@ -33,7 +35,11 @@ def generate_spectrogram_data( * ( 1.0 / ( - np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) + np.abs( + np.hanning( + int(params["fft_win_length"] * sampling_rate) + ) + ) ** 2 ).sum() ) @@ -106,7 +112,9 @@ def load_data( max_samps = params["spec_width"] * (nfft - noverlap) + noverlap if max_samps > audio.shape[0]: - audio = np.hstack((audio, np.zeros(max_samps - audio.shape[0]))) + audio = np.hstack( + (audio, np.zeros(max_samps - audio.shape[0])) + ) audio = audio[:max_samps].astype(np.float32) audio = au.pad_audio( @@ -139,7 +147,9 @@ def load_data( params["fft_overlap"], ) ) - y1 = (ann["low_freq"] - params["min_freq"]) * params["fft_win_length"] + y1 = (ann["low_freq"] - params["min_freq"]) * params[ + "fft_win_length" + ] coords.append((y1, x1)) _, file_ids = np.unique(file_names, return_inverse=True)