diff --git a/app.py b/app.py index 1c884f0..8e1f69c 100644 --- a/app.py +++ b/app.py @@ -82,9 +82,7 @@ def generate_results_image(audio_file, anns): duration = audio.shape[0] / sampling_rate # generate spec - spec, spec_viz = au.generate_spectrogram( - audio, sampling_rate, params, True, False - ) + spec, spec_viz = au.generate_spectrogram(audio, sampling_rate, params, True, False) # create fig plt.close("all") diff --git a/bat_detect/detector/compute_features.py b/bat_detect/detector/compute_features.py index 368c2db..b24dd77 100644 --- a/bat_detect/detector/compute_features.py +++ b/bat_detect/detector/compute_features.py @@ -3,9 +3,7 @@ import numpy as np def convert_int_to_freq(spec_ind, spec_height, min_freq, max_freq): spec_ind = spec_height - spec_ind - return round( - (spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2 - ) + return round((spec_ind / float(spec_height)) * (max_freq - min_freq) + min_freq, 2) def extract_spec_slices(spec, pred_nms, params): @@ -27,9 +25,7 @@ def extract_spec_slices(spec, pred_nms, params): for ff in range(len(pred_nms["det_probs"])): x_start = int(np.maximum(0, x_pos_pad[ff])) x_end = int( - np.minimum( - spec.shape[1] - 1, np.round(x_pos_pad[ff] + bb_width_pad[ff]) - ) + np.minimum(spec.shape[1] - 1, np.round(x_pos_pad[ff] + bb_width_pad[ff])) ) slices.append(spec[:, x_start:x_end].astype(np.float16)) return slices @@ -66,15 +62,11 @@ def get_feats(spec, pred_nms, params): feature_names = get_feature_names() num_detections = len(pred_nms["det_probs"]) - features = ( - np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1 - ) + features = np.ones((num_detections, len(feature_names)), dtype=np.float32) * -1 for ff in range(num_detections): x_start = int(np.maximum(0, x_pos[ff])) - x_end = int( - np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff])) - ) + x_end = int(np.minimum(spec.shape[1] - 1, np.round(x_pos[ff] + bb_width[ff]))) # y low is the lowest freq but it will have a higher value due to array starting at 0 at top y_low = int(np.minimum(spec.shape[0] - 1, y_pos[ff])) y_high = int(np.maximum(0, np.round(y_pos[ff] - bb_height[ff]))) @@ -126,8 +118,7 @@ def get_feats(spec, pred_nms, params): if ff > 0: features[ff, 8] = round( - pred_nms["start_times"][ff] - - pred_nms["start_times"][ff - 1], + pred_nms["start_times"][ff] - pred_nms["start_times"][ff - 1], 5, ) diff --git a/bat_detect/detector/model_helpers.py b/bat_detect/detector/model_helpers.py index e237f7c..94657be 100644 --- a/bat_detect/detector/model_helpers.py +++ b/bat_detect/detector/model_helpers.py @@ -20,22 +20,18 @@ class SelfAttention(nn.Module): def forward(self, x): x = x.squeeze(2).permute(0, 2, 1) - kk = torch.matmul( - x, self.key_fun.weight.T - ) + self.key_fun.bias.unsqueeze(0).unsqueeze(0) - qq = torch.matmul( - x, self.que_fun.weight.T - ) + self.que_fun.bias.unsqueeze(0).unsqueeze(0) - vv = torch.matmul( - x, self.val_fun.weight.T - ) + self.val_fun.bias.unsqueeze(0).unsqueeze(0) + kk = torch.matmul(x, self.key_fun.weight.T) + self.key_fun.bias.unsqueeze( + 0 + ).unsqueeze(0) + qq = torch.matmul(x, self.que_fun.weight.T) + self.que_fun.bias.unsqueeze( + 0 + ).unsqueeze(0) + vv = torch.matmul(x, self.val_fun.weight.T) + self.val_fun.bias.unsqueeze( + 0 + ).unsqueeze(0) - kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / ( - self.temperature * self.att_dim - ) - att_weights = F.softmax( - kk_qq, 1 - ) # each col of each attention matrix sums to 1 + kk_qq = torch.bmm(kk, qq.permute(0, 2, 1)) / (self.temperature * self.att_dim) + att_weights = F.softmax(kk_qq, 1) # each col of each attention matrix sums to 1 att = torch.bmm(vv.permute(0, 2, 1), att_weights) op = torch.matmul( @@ -47,9 +43,7 @@ class SelfAttention(nn.Module): class ConvBlockDownCoordF(nn.Module): - def __init__( - self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1 - ): + def __init__(self, in_chn, out_chn, ip_height, k_size=3, pad_size=1, stride=1): super(ConvBlockDownCoordF, self).__init__() self.coords = nn.Parameter( torch.linspace(-1, 1, ip_height)[None, None, ..., None], @@ -73,9 +67,7 @@ class ConvBlockDownCoordF(nn.Module): class ConvBlockDownStandard(nn.Module): - def __init__( - self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1 - ): + def __init__(self, in_chn, out_chn, ip_height=None, k_size=3, pad_size=1, stride=1): super(ConvBlockDownStandard, self).__init__() self.conv = nn.Conv2d( in_chn, @@ -107,14 +99,10 @@ class ConvBlockUpF(nn.Module): self.up_scale = up_scale self.up_mode = up_mode self.coords = nn.Parameter( - torch.linspace(-1, 1, ip_height * up_scale[0])[ - None, None, ..., None - ], + torch.linspace(-1, 1, ip_height * up_scale[0])[None, None, ..., None], requires_grad=False, ) - self.conv = nn.Conv2d( - in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size - ) + self.conv = nn.Conv2d(in_chn + 1, out_chn, kernel_size=k_size, padding=pad_size) self.conv_bn = nn.BatchNorm2d(out_chn) def forward(self, x): @@ -148,9 +136,7 @@ class ConvBlockUpStandard(nn.Module): super(ConvBlockUpStandard, self).__init__() self.up_scale = up_scale self.up_mode = up_mode - self.conv = nn.Conv2d( - in_chn, out_chn, kernel_size=k_size, padding=pad_size - ) + self.conv = nn.Conv2d(in_chn, out_chn, kernel_size=k_size, padding=pad_size) self.conv_bn = nn.BatchNorm2d(out_chn) def forward(self, x): diff --git a/bat_detect/detector/models.py b/bat_detect/detector/models.py index b39cbf4..e9f8941 100644 --- a/bat_detect/detector/models.py +++ b/bat_detect/detector/models.py @@ -81,17 +81,13 @@ class Net2DFast(nn.Module): num_filts // 4, num_filts // 4, kernel_size=3, padding=1 ) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) - self.conv_size_op = nn.Conv2d( - num_filts // 4, 2, kernel_size=1, padding=0 - ) + self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) self.conv_classes_op = nn.Conv2d( num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 ) if self.emb_dim > 0: - self.conv_emb = nn.Conv2d( - num_filts, self.emb_dim, kernel_size=1, padding=0 - ) + self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) def forward(self, ip, return_feats=False): @@ -198,17 +194,13 @@ class Net2DFastNoAttn(nn.Module): num_filts // 4, num_filts // 4, kernel_size=3, padding=1 ) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) - self.conv_size_op = nn.Conv2d( - num_filts // 4, 2, kernel_size=1, padding=0 - ) + self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) self.conv_classes_op = nn.Conv2d( num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 ) if self.emb_dim > 0: - self.conv_emb = nn.Conv2d( - num_filts, self.emb_dim, kernel_size=1, padding=0 - ) + self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) def forward(self, ip, return_feats=False): @@ -312,17 +304,13 @@ class Net2DFastNoCoordConv(nn.Module): num_filts // 4, num_filts // 4, kernel_size=3, padding=1 ) self.conv_op_bn = nn.BatchNorm2d(num_filts // 4) - self.conv_size_op = nn.Conv2d( - num_filts // 4, 2, kernel_size=1, padding=0 - ) + self.conv_size_op = nn.Conv2d(num_filts // 4, 2, kernel_size=1, padding=0) self.conv_classes_op = nn.Conv2d( num_filts // 4, self.num_classes + 1, kernel_size=1, padding=0 ) if self.emb_dim > 0: - self.conv_emb = nn.Conv2d( - num_filts, self.emb_dim, kernel_size=1, padding=0 - ) + self.conv_emb = nn.Conv2d(num_filts, self.emb_dim, kernel_size=1, padding=0) def forward(self, ip, return_feats=False): diff --git a/bat_detect/detector/parameters.py b/bat_detect/detector/parameters.py index d93ac8c..b6edd47 100644 --- a/bat_detect/detector/parameters.py +++ b/bat_detect/detector/parameters.py @@ -22,9 +22,7 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): params["experiment"] = os.path.join(exps_dir, now_str, "") params["model_file_name"] = os.path.join(params["experiment"], model_name) params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "") - params["op_im_dir_test"] = os.path.join( - params["experiment"], "op_ims_test", "" - ) + params["op_im_dir_test"] = os.path.join(params["experiment"], "op_ims_test", "") # params['notes'] = '' # can save notes about an experiment here # spec parameters @@ -36,12 +34,8 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): ) # in milliseconds, amount of time per stft time step params["fft_overlap"] = 0.75 # stft window overlap - params[ - "max_freq" - ] = 120000 # in Hz, everything above this will be discarded - params[ - "min_freq" - ] = 10000 # in Hz, everything below this will be discarded + params["max_freq"] = 120000 # in Hz, everything above this will be discarded + params["min_freq"] = 10000 # in Hz, everything below this will be discarded params[ "resize_factor" @@ -57,13 +51,9 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): ] = 32 # spectrogram should be divisible by this amount in width and height # spec processing params - params[ - "denoise_spec_avg" - ] = True # removes the mean for each frequency band + params["denoise_spec_avg"] = True # removes the mean for each frequency band params["scale_raw_audio"] = False # scales the raw audio to [-1, 1] - params[ - "max_scale_spec" - ] = False # scales the spectrogram so that it is max 1 + params["max_scale_spec"] = False # scales the spectrogram so that it is max 1 params["spec_scale"] = "pcen" # 'log', 'pcen', 'none' # detection params @@ -83,21 +73,13 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): params["target_sigma"] = 2.0 # augmentation params - params[ - "aug_prob" - ] = 0.20 # augmentations will be performed with this probability + params["aug_prob"] = 0.20 # augmentations will be performed with this probability params["augment_at_train"] = True params["augment_at_train_combine"] = True - params[ - "echo_max_delay" - ] = 0.005 # simulate echo by adding copy of raw audio + params["echo_max_delay"] = 0.005 # simulate echo by adding copy of raw audio params["stretch_squeeze_delta"] = 0.04 # stretch or squeeze spec - params[ - "mask_max_time_perc" - ] = 0.05 # max mask size - here percentage, not ideal - params[ - "mask_max_freq_perc" - ] = 0.10 # max mask size - here percentage, not ideal + params["mask_max_time_perc"] = 0.05 # max mask size - here percentage, not ideal + params["mask_max_freq_perc"] = 0.10 # max mask size - here percentage, not ideal params[ "spec_amp_scaling" ] = 2.0 # multiply the "volume" by 0:X times current amount @@ -113,16 +95,12 @@ def get_params(make_dirs=False, exps_dir="../../experiments/"): # loss params params["train_loss"] = "focal" # mse or focal - params[ - "det_loss_weight" - ] = 1.0 # weight for the detection part of the loss + params["det_loss_weight"] = 1.0 # weight for the detection part of the loss params["size_loss_weight"] = 0.1 # weight for the bbox size loss params["class_loss_weight"] = 2.0 # weight for the classification loss params["individual_loss_weight"] = 0.0 # not used if params["individual_loss_weight"] == 0.0: - params[ - "emb_dim" - ] = 0 # number of dimensions used for individual id embedding + params["emb_dim"] = 0 # number of dimensions used for individual id embedding else: params["emb_dim"] = 3 diff --git a/bat_detect/detector/post_process.py b/bat_detect/detector/post_process.py index 2745cdf..5bdb643 100644 --- a/bat_detect/detector/post_process.py +++ b/bat_detect/detector/post_process.py @@ -24,9 +24,7 @@ def run_nms(outputs, params, sampling_rate): pred_size = outputs["pred_size"] # box size pred_det_nms = non_max_suppression(pred_det, params["nms_kernel_size"]) - freq_rescale = (params["max_freq"] - params["min_freq"]) / pred_det.shape[ - -2 - ] + freq_rescale = (params["max_freq"] - params["min_freq"]) / pred_det.shape[-2] # NOTE there will be small differences depending on which sampling rate is chosen # as we are choosing the same sampling rate for the entire batch @@ -62,8 +60,7 @@ def run_nms(outputs, params, sampling_rate): params["fft_overlap"], ) pred["end_times"] = x_coords_to_time( - (pred["x_pos"].float() + pred["bb_width"]) - / params["resize_factor"], + (pred["x_pos"].float() + pred["bb_width"]) / params["resize_factor"], sampling_rate[ii].item(), params["fft_win_length"], params["fft_overlap"], @@ -71,9 +68,7 @@ def run_nms(outputs, params, sampling_rate): pred["low_freqs"] = ( pred_size[ii].shape[1] - pred["y_pos"].float() ) * freq_rescale + params["min_freq"] - pred["high_freqs"] = ( - pred["low_freqs"] + pred["bb_height"] * freq_rescale - ) + pred["high_freqs"] = pred["low_freqs"] + pred["bb_height"] * freq_rescale # extract the per class votes if "pred_class" in outputs: diff --git a/bat_detect/evaluate/evaluate_models.py b/bat_detect/evaluate/evaluate_models.py index 6b7c460..97ded25 100644 --- a/bat_detect/evaluate/evaluate_models.py +++ b/bat_detect/evaluate/evaluate_models.py @@ -207,9 +207,7 @@ def load_sonobat_preds(dataset, id, sb_meta, set_class_name=None): ann_c["class"] = file_res[id]["species_1"] else: ann_c["class"] = set_class_name - ann_c["start_time"] = np.round( - da_c.iloc[aa]["TimeInFile"] / 1000.0, 5 - ) + ann_c["start_time"] = np.round(da_c.iloc[aa]["TimeInFile"] / 1000.0, 5) ann_c["end_time"] = np.round( ann_c["start_time"] + da_c.iloc[aa]["CallDuration"] / 1000.0, 5 ) @@ -267,9 +265,7 @@ def assign_to_gt(gt, pred, iou_thresh): iou_m = np.zeros((num_preds, num_gts)) for ii in range(num_preds): for jj in range(num_gts): - iou_m[ii, jj] = bb_overlap( - gt["annotation"][jj], pred["annotation"][ii] - ) + iou_m[ii, jj] = bb_overlap(gt["annotation"][jj], pred["annotation"][ii]) # greedily assign detections to ground truths # needs to be greater than some threshold and we cannot assign GT @@ -278,9 +274,7 @@ def assign_to_gt(gt, pred, iou_thresh): for jj in range(num_gts): max_iou = np.argmax(iou_m[:, jj]) if iou_m[max_iou, jj] > iou_thresh: - pred["annotation"][max_iou]["class"] = gt["annotation"][jj][ - "class" - ] + pred["annotation"][max_iou]["class"] = gt["annotation"][jj]["class"] iou_m[max_iou, :] = -1.0 return pred @@ -290,25 +284,17 @@ def parse_data(data, class_names, non_event_classes, is_pred=False): class_names_all = class_names + non_event_classes data["class_names"] = np.array([aa["class"] for aa in data["annotation"]]) - data["start_times"] = np.array( - [aa["start_time"] for aa in data["annotation"]] - ) + data["start_times"] = np.array([aa["start_time"] for aa in data["annotation"]]) data["end_times"] = np.array([aa["end_time"] for aa in data["annotation"]]) - data["high_freqs"] = np.array( - [float(aa["high_freq"]) for aa in data["annotation"]] - ) - data["low_freqs"] = np.array( - [float(aa["low_freq"]) for aa in data["annotation"]] - ) + data["high_freqs"] = np.array([float(aa["high_freq"]) for aa in data["annotation"]]) + data["low_freqs"] = np.array([float(aa["low_freq"]) for aa in data["annotation"]]) if is_pred: # when loading predictions data["det_probs"] = np.array( [float(aa["det_prob"]) for aa in data["annotation"]] ) - data["class_probs"] = np.zeros( - (len(class_names) + 1, len(data["annotation"])) - ) + data["class_probs"] = np.zeros((len(class_names) + 1, len(data["annotation"]))) data["class_ids"] = np.array( [class_names_all.index(aa["class"]) for aa in data["annotation"]] ).astype(np.int32) @@ -334,8 +320,7 @@ def load_gt_data(datasets, events_of_interest, class_names, classes_to_ignore): [dd], events_of_interest=events_of_interest, verbose=True ) gt_dataset = [ - parse_data(gg, class_names, classes_to_ignore, False) - for gg in gt_dataset + parse_data(gg, class_names, classes_to_ignore, False) for gg in gt_dataset ] for gt in gt_dataset: @@ -371,9 +356,7 @@ def eval_rf_model(clf, pred, un_train_class, num_classes): # stores the prediction in place if pred["feats"].shape[0] > 0: pred["class_probs"] = np.zeros((num_classes, pred["feats"].shape[0])) - pred["class_probs"][un_train_class, :] = clf.predict_proba( - pred["feats"] - ).T + pred["class_probs"][un_train_class, :] = clf.predict_proba(pred["feats"]).T pred["det_probs"] = pred["class_probs"][:-1, :].sum(0) else: pred["class_probs"] = np.zeros((num_classes, 0)) @@ -474,12 +457,8 @@ if __name__ == "__main__": help="Output directory for plots", ) parser.add_argument("data_dir", type=str, help="Path to root of datasets") - parser.add_argument( - "ann_dir", type=str, help="Path to extracted annotations" - ) - parser.add_argument( - "bd_model_path", type=str, help="Path to BatDetect model" - ) + parser.add_argument("ann_dir", type=str, help="Path to extracted annotations") + parser.add_argument("bd_model_path", type=str, help="Path to BatDetect model") parser.add_argument( "--test_file", type=str, @@ -519,9 +498,7 @@ if __name__ == "__main__": default="", help="Text to add as title of plots", ) - parser.add_argument( - "--rand_seed", type=int, default=2001, help="Random seed" - ) + parser.add_argument("--rand_seed", type=int, default=2001, help="Random seed") args = vars(parser.parse_args()) np.random.seed(args["rand_seed"]) @@ -554,9 +531,7 @@ if __name__ == "__main__": test_dict["dataset_name"] = args["test_file"].replace(".json", "") test_dict["is_test"] = True test_dict["is_binary"] = True - test_dict["ann_path"] = os.path.join( - args["ann_dir"], args["test_file"] - ) + test_dict["ann_path"] = os.path.join(args["ann_dir"], args["test_file"]) test_dict["wav_path"] = args["data_dir"] test_sets = [test_dict] @@ -607,9 +582,7 @@ if __name__ == "__main__": for ii, gt in enumerate(gt_test): sb_pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta) if sb_pred["class_name"] != "": - sb_pred = parse_data( - sb_pred, class_names, classes_to_ignore, True - ) + sb_pred = parse_data(sb_pred, class_names, classes_to_ignore, True) sb_pred["class_probs"][ sb_pred["class_ids"], np.arange(sb_pred["class_probs"].shape[1]), @@ -644,9 +617,7 @@ if __name__ == "__main__": x_train = [] y_train = [] for gt in gt_train: - pred = load_sonobat_preds( - gt["dataset_name"], gt["id"], sb_meta, "Not Bat" - ) + pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta, "Not Bat") if len(pred["annotation"]) > 0: # compute detection overlap with ground truth to determine which are the TP detections @@ -663,9 +634,7 @@ if __name__ == "__main__": # run the model on the test set preds_sb_rf = [] for gt in gt_test: - pred = load_sonobat_preds( - gt["dataset_name"], gt["id"], sb_meta, "Not Bat" - ) + pred = load_sonobat_preds(gt["dataset_name"], gt["id"], sb_meta, "Not Bat") pred = parse_data(pred, class_names, classes_to_ignore, True) pred = eval_rf_model(clf_sb, pred, un_train_class, num_classes) preds_sb_rf.append(pred) @@ -697,9 +666,7 @@ if __name__ == "__main__": x_train = [] y_train = [] for gt in gt_train: - pred = load_tadarida_pred( - args["td_ip_dir"], gt["dataset_name"], gt["id"] - ) + pred = load_tadarida_pred(args["td_ip_dir"], gt["dataset_name"], gt["id"]) # compute detection overlap with ground truth to determine which are the TP detections assign_to_gt(gt, pred, args["iou_thresh"]) pred = parse_data(pred, class_names, classes_to_ignore, True) @@ -714,9 +681,7 @@ if __name__ == "__main__": # run the model on the test set preds_td = [] for gt in gt_test: - pred = load_tadarida_pred( - args["td_ip_dir"], gt["dataset_name"], gt["id"] - ) + pred = load_tadarida_pred(args["td_ip_dir"], gt["dataset_name"], gt["id"]) pred = parse_data(pred, class_names, classes_to_ignore, True) pred = eval_rf_model(clf_td, pred, un_train_class, num_classes) preds_td.append(pred) diff --git a/bat_detect/finetune/finetune_model.py b/bat_detect/finetune/finetune_model.py index 8c20e22..0209670 100644 --- a/bat_detect/finetune/finetune_model.py +++ b/bat_detect/finetune/finetune_model.py @@ -28,9 +28,7 @@ if __name__ == "__main__": print(info_str) parser = argparse.ArgumentParser() - parser.add_argument( - "audio_path", type=str, help="Input directory for audio" - ) + parser.add_argument("audio_path", type=str, help="Input directory for audio") parser.add_argument( "train_ann_path", type=str, @@ -41,9 +39,7 @@ if __name__ == "__main__": type=str, help="Path to where test annotation file is stored", ) - parser.add_argument( - "model_path", type=str, help="Path to pretrained model" - ) + parser.add_argument("model_path", type=str, help="Path to pretrained model") parser.add_argument( "--op_model_name", type=str, @@ -82,9 +78,7 @@ if __name__ == "__main__": params["device"] = "cuda" else: params["device"] = "cpu" - print( - "\nNote, this will be a lot faster if you use computer with a GPU.\n" - ) + print("\nNote, this will be a lot faster if you use computer with a GPU.\n") print("\nAudio directory: " + args["audio_path"]) print("Train file: " + args["train_ann_path"]) @@ -98,9 +92,7 @@ if __name__ == "__main__": ) if args["train_from_scratch"]: - print( - "\nTraining model from scratch i.e. not using pretrained weights" - ) + print("\nTraining model from scratch i.e. not using pretrained weights") model, params_train = du.load_model(args["model_path"], False) else: model, params_train = du.load_model(args["model_path"], True) @@ -137,17 +129,13 @@ if __name__ == "__main__": data_train, params["class_names"], params["class_inv_freq"], - ) = tu.load_set_of_anns( - train_sets, classes_to_ignore, params["events_of_interest"] - ) + ) = tu.load_set_of_anns(train_sets, classes_to_ignore, params["events_of_interest"]) print("Number of files", len(data_train)) params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping( params["class_names"] ) - params["class_names_short"] = tu.get_short_class_names( - params["class_names"] - ) + params["class_names_short"] = tu.get_short_class_names(params["class_names"]) # load test annotations test_sets = [] @@ -230,9 +218,7 @@ if __name__ == "__main__": param.requires_grad = False optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"]) - scheduler = CosineAnnealingLR( - optimizer, params["num_epochs"] * len(train_loader) - ) + scheduler = CosineAnnealingLR(optimizer, params["num_epochs"] * len(train_loader)) if params["train_loss"] == "mse": det_criterion = losses.mse_loss elif params["train_loss"] == "focal": @@ -307,9 +293,7 @@ if __name__ == "__main__": test_plt_class.update_and_save( epoch, [rs["avg_prec"] for rs in test_res["class_pr"]] ) - pu.plot_pr_curve_class( - params["experiment"], "test_pr", "test_pr", test_res - ) + pu.plot_pr_curve_class(params["experiment"], "test_pr", "test_pr", test_res) # save finetuned model print("saving model to: " + params["model_file_name"]) diff --git a/bat_detect/finetune/prep_data_finetune.py b/bat_detect/finetune/prep_data_finetune.py index bf86e97..1ee4ceb 100644 --- a/bat_detect/finetune/prep_data_finetune.py +++ b/bat_detect/finetune/prep_data_finetune.py @@ -58,12 +58,8 @@ if __name__ == "__main__": print(info_str) parser = argparse.ArgumentParser() - parser.add_argument( - "dataset_name", type=str, help="Name to call your dataset" - ) - parser.add_argument( - "audio_dir", type=str, help="Input directory for audio" - ) + parser.add_argument("dataset_name", type=str, help="Name to call your dataset") + parser.add_argument("audio_dir", type=str, help="Input directory for audio") parser.add_argument( "ann_dir", type=str, @@ -151,14 +147,10 @@ if __name__ == "__main__": test_files = load_file_names(args["test_file"]) file_names_all = [dd["id"] for dd in data_all] train_inds = [ - file_names_all.index(ff) - for ff in train_files - if ff in file_names_all + file_names_all.index(ff) for ff in train_files if ff in file_names_all ] test_inds = [ - file_names_all.index(ff) - for ff in test_files - if ff in file_names_all + file_names_all.index(ff) for ff in test_files if ff in file_names_all ] else: @@ -181,9 +173,7 @@ if __name__ == "__main__": op_name_train = op_name + "_TRAIN.json" op_name_test = op_name + "_TEST.json" - class_un_train = print_dataset_stats( - data_train, "Train", classes_to_ignore - ) + class_un_train = print_dataset_stats(data_train, "Train", classes_to_ignore) class_un_test = print_dataset_stats(data_test, "Test", classes_to_ignore) if len(data_train) > 0 and len(data_test) > 0: diff --git a/bat_detect/train/audio_dataloader.py b/bat_detect/train/audio_dataloader.py index ffd8086..697339b 100644 --- a/bat_detect/train/audio_dataloader.py +++ b/bat_detect/train/audio_dataloader.py @@ -73,9 +73,7 @@ def generate_gt_heatmaps(spec_op_shape, sampling_rate, ann, params): y_2d_det = np.zeros((1, op_height, op_width), dtype=np.float32) y_2d_size = np.zeros((2, op_height, op_width), dtype=np.float32) # num classes and "background" class - y_2d_classes = np.zeros( - (num_classes + 1, op_height, op_width), dtype=np.float32 - ) + y_2d_classes = np.zeros((num_classes + 1, op_height, op_width), dtype=np.float32) # create 2D ground truth heatmaps for ii in valid_inds: @@ -128,8 +126,7 @@ def draw_gaussian(heatmap, center, sigmax, sigmay=None): x0 = y0 = size // 2 # g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) g = np.exp( - -((x - x0) ** 2) / (2 * sigmax**2) - - ((y - y0) ** 2) / (2 * sigmay**2) + -((x - x0) ** 2) / (2 * sigmax**2) - ((y - y0) ** 2) / (2 * sigmay**2) ) g_x = max(0, -ul[0]), min(br[0], h) - ul[0] g_y = max(0, -ul[1]), min(br[1], w) - ul[1] @@ -278,9 +275,7 @@ def combine_audio_aug(audio, sampling_rate, ann, audio2, sampling_rate2, ann2): # when combining calls from different files, assume they come from different individuals if kk == "individual_ids": if (ann[kk] > -1).sum() > 0: - ann2[kk][ann2[kk] > -1] += ( - np.max(ann[kk][ann[kk] > -1]) + 1 - ) + ann2[kk][ann2[kk] > -1] += np.max(ann[kk][ann[kk] > -1]) + 1 if (kk != "class_id_file") and (kk != "annotated"): ann[kk] = np.hstack((ann[kk], ann2[kk]))[inds] @@ -289,9 +284,7 @@ def combine_audio_aug(audio, sampling_rate, ann, audio2, sampling_rate2, ann2): class AudioLoader(torch.utils.data.Dataset): - def __init__( - self, data_anns_ip, params, dataset_name=None, is_train=False - ): + def __init__(self, data_anns_ip, params, dataset_name=None, is_train=False): self.data_anns = [] self.is_train = is_train @@ -314,9 +307,7 @@ class AudioLoader(torch.utils.data.Dataset): # convert class name into class label if aa["class"] in self.params["class_names"]: - aa["class_id"] = self.params["class_names"].index( - aa["class"] - ) + aa["class_id"] = self.params["class_names"].index(aa["class"]) else: aa["class_id"] = -1 @@ -324,12 +315,8 @@ class AudioLoader(torch.utils.data.Dataset): filtered_annotations.append(aa) dd["annotation"] = filtered_annotations - dd["start_times"] = np.array( - [aa["start_time"] for aa in dd["annotation"]] - ) - dd["end_times"] = np.array( - [aa["end_time"] for aa in dd["annotation"]] - ) + dd["start_times"] = np.array([aa["start_time"] for aa in dd["annotation"]]) + dd["end_times"] = np.array([aa["end_time"] for aa in dd["annotation"]]) dd["high_freqs"] = np.array( [float(aa["high_freq"]) for aa in dd["annotation"]] ) @@ -406,18 +393,12 @@ class AudioLoader(torch.utils.data.Dataset): ) if audio_raw.shape[0] - length_samples > 0: - sample_crop = np.random.randint( - audio_raw.shape[0] - length_samples - ) + sample_crop = np.random.randint(audio_raw.shape[0] - length_samples) else: sample_crop = 0 audio_raw = audio_raw[sample_crop : sample_crop + length_samples] - ann["start_times"] = ann["start_times"] - sample_crop / float( - sampling_rate - ) - ann["end_times"] = ann["end_times"] - sample_crop / float( - sampling_rate - ) + ann["start_times"] = ann["start_times"] - sample_crop / float(sampling_rate) + ann["end_times"] = ann["end_times"] - sample_crop / float(sampling_rate) # pad audio if self.is_train: @@ -496,9 +477,7 @@ class AudioLoader(torch.utils.data.Dataset): spec = scale_vol_aug(spec, self.params) if np.random.random() < self.params["aug_prob"]: - spec = warp_spec_aug( - spec, ann, self.return_spec_for_viz, self.params - ) + spec = warp_spec_aug(spec, ann, self.return_spec_for_viz, self.params) if np.random.random() < self.params["aug_prob"]: spec = mask_time_aug(spec, self.params) @@ -509,9 +488,7 @@ class AudioLoader(torch.utils.data.Dataset): outputs = {} outputs["spec"] = spec if self.return_spec_for_viz: - outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze( - 0 - ) + outputs["spec_for_viz"] = torch.from_numpy(spec_for_viz).unsqueeze(0) # create ground truth heatmaps ( @@ -519,9 +496,7 @@ class AudioLoader(torch.utils.data.Dataset): outputs["y_2d_size"], outputs["y_2d_classes"], ann_aug, - ) = generate_gt_heatmaps( - spec_op_shape, sampling_rate, ann, self.params - ) + ) = generate_gt_heatmaps(spec_op_shape, sampling_rate, ann, self.params) # hack to get around requirement that all vectors are the same length in # the output batch diff --git a/bat_detect/train/evaluate.py b/bat_detect/train/evaluate.py index a926fbb..47fb26b 100755 --- a/bat_detect/train/evaluate.py +++ b/bat_detect/train/evaluate.py @@ -1,10 +1,5 @@ import numpy as np -from sklearn.metrics import ( - accuracy_score, - auc, - balanced_accuracy_score, - roc_curve, -) +from sklearn.metrics import accuracy_score, auc, balanced_accuracy_score, roc_curve def compute_error_auc(op_str, gt, pred, prob): @@ -17,10 +12,7 @@ def compute_error_auc(op_str, gt, pred, prob): fpr, tpr, thresholds = roc_curve(gt, pred) roc_auc = auc(fpr, tpr) - print( - op_str - + ", class acc = {:.3f}, ROC AUC = {:.3f}".format(class_acc, roc_auc) - ) + print(op_str + ", class acc = {:.3f}, ROC AUC = {:.3f}".format(class_acc, roc_auc)) # return class_acc, roc_auc @@ -114,14 +106,10 @@ def compute_pre_rec( confidence.append(pp["det_probs"][valid_inds]) elif eval_mode == "per_class": # per class - confidence.append( - pp["class_probs"].T[valid_inds, class_of_interest] - ) + confidence.append(pp["class_probs"].T[valid_inds, class_of_interest]) elif eval_mode == "top_class": # per class - note that sometimes 'class_probs' can be num_classes+1 in size - top_class = np.argmax( - pp["class_probs"].T[valid_inds, :num_classes], 1 - ) + top_class = np.argmax(pp["class_probs"].T[valid_inds, :num_classes], 1) confidence.append(pp["class_probs"].T[valid_inds, top_class]) pred_class.append(top_class) @@ -170,9 +158,7 @@ def compute_pre_rec( num_positives += len(gg["start_times"][valid_inds]) elif eval_mode == "per_class": # all valid ones with class of interest - num_positives += ( - gg["class_ids"][valid_inds] == class_of_interest - ).sum() + num_positives += (gg["class_ids"][valid_inds] == class_of_interest).sum() elif eval_mode == "top_class": # all valid ones with non generic class num_positives += (gg["class_ids"][valid_inds] > -1).sum() @@ -254,9 +240,7 @@ def compute_pre_rec( results["avg_prec"] = np.nan results["rec_at_x"] = np.nan else: - results["avg_prec"] = np.round( - calc_average_precision(recall, precision), 5 - ) + results["avg_prec"] = np.round(calc_average_precision(recall, precision), 5) results["rec_at_x"] = np.round(calc_recall_at_x(recall, precision), 5) return results @@ -299,20 +283,12 @@ def compute_file_accuracy(gts, preds, num_classes): # compute min and max scoring range - then threshold min_val = 0 - mins = [ - pp["class_probs"].min() - for pp in preds - if pp["class_probs"].shape[1] > 0 - ] + mins = [pp["class_probs"].min() for pp in preds if pp["class_probs"].shape[1] > 0] if len(mins) > 0: min_val = np.min(mins) max_val = 1.0 - maxes = [ - pp["class_probs"].max() - for pp in preds - if pp["class_probs"].shape[1] > 0 - ] + maxes = [pp["class_probs"].max() for pp in preds if pp["class_probs"].shape[1] > 0] if len(maxes) > 0: max_val = np.max(maxes) @@ -334,9 +310,7 @@ def compute_file_accuracy(gts, preds, num_classes): # pick the result corresponding to the overall best threshold pred_valid_all = np.vstack(pred_valid_all) - acc_per_thresh = ( - np.array(gt_valid)[..., np.newaxis] == pred_valid_all - ).mean(0) + acc_per_thresh = (np.array(gt_valid)[..., np.newaxis] == pred_valid_all).mean(0) best_thresh = np.argmax(acc_per_thresh) best_acc = acc_per_thresh[best_thresh] pred_valid = pred_valid_all[:, best_thresh].astype(np.int).tolist() diff --git a/bat_detect/train/train_model.py b/bat_detect/train/train_model.py index 2fd33fe..cca7011 100644 --- a/bat_detect/train/train_model.py +++ b/bat_detect/train/train_model.py @@ -62,9 +62,7 @@ def save_images_batch(model, data_loader, params): data_loader.dataset.return_spec_for_viz = False -def save_image( - spec_viz, outputs, ind, inputs, params, op_file_name, plot_title -): +def save_image(spec_viz, outputs, ind, inputs, params, op_file_name, plot_title): pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float()) pred_hm = outputs["pred_det"][ind, 0, :].data.cpu().numpy() spec_viz = spec_viz[ind, 0, :] @@ -87,14 +85,10 @@ def save_image( ) -def loss_fun( - outputs, gt_det, gt_size, gt_class, det_criterion, params, class_inv_freq -): +def loss_fun(outputs, gt_det, gt_size, gt_class, det_criterion, params, class_inv_freq): # detection loss - loss = params["det_loss_weight"] * det_criterion( - outputs["pred_det"], gt_det - ) + loss = params["det_loss_weight"] * det_criterion(outputs["pred_det"], gt_det) # bounding box size loss loss += params["size_loss_weight"] * losses.bbox_size_loss( @@ -111,9 +105,7 @@ def loss_fun( return loss -def train( - model, epoch, data_loader, det_criterion, optimizer, scheduler, params -): +def train(model, epoch, data_loader, det_criterion, optimizer, scheduler, params): model.train() @@ -226,9 +218,7 @@ def test(model, epoch, data_loader, det_criterion, params): test_loss.update(loss.item(), data.shape[0]) # do NMS - pred_nms, _ = pp.run_nms( - outputs, params, inputs["sampling_rate"].float() - ) + pred_nms, _ = pp.run_nms(outputs, params, inputs["sampling_rate"].float()) predictions.extend(pred_nms) ground_truths.extend(parse_gt_data(inputs)) @@ -338,9 +328,7 @@ if __name__ == "__main__": # setup arg parser and populate it with exiting parameters - will not work with lists parser = argparse.ArgumentParser() parser.add_argument("data_dir", type=str, help="Path to root of datasets") - parser.add_argument( - "ann_dir", type=str, help="Path to extracted annotations" - ) + parser.add_argument("ann_dir", type=str, help="Path to extracted annotations") parser.add_argument( "--train_split", type=str, @@ -367,9 +355,7 @@ if __name__ == "__main__": # save notes file if params["notes"] != "": - tu.write_notes_file( - params["experiment"] + "notes.txt", params["notes"] - ) + tu.write_notes_file(params["experiment"] + "notes.txt", params["notes"]) # load the training and test meta data - there are different splits defined train_sets, test_sets = ts.get_train_test_data( @@ -401,14 +387,12 @@ if __name__ == "__main__": params["genus_names"], params["genus_mapping"] = tu.get_genus_mapping( params["class_names"] ) - params["class_names_short"] = tu.get_short_class_names( - params["class_names"] - ) + params["class_names_short"] = tu.get_short_class_names(params["class_names"]) # standardize the low and high frequency value for specified classes - params["standardize_classs_names"] = params[ - "standardize_classs_names_ip" - ].split(";") + params["standardize_classs_names"] = params["standardize_classs_names_ip"].split( + ";" + ) for cc in params["standardize_classs_names"]: if cc in params["class_names"]: data_train = tu.standardize_low_freq(data_train, cc) @@ -458,9 +442,7 @@ if __name__ == "__main__": optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"]) # optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=0.9) - scheduler = CosineAnnealingLR( - optimizer, params["num_epochs"] * len(train_loader) - ) + scheduler = CosineAnnealingLR(optimizer, params["num_epochs"] * len(train_loader)) if params["train_loss"] == "mse": det_criterion = losses.mse_loss elif params["train_loss"] == "focal": @@ -523,9 +505,7 @@ if __name__ == "__main__": if epoch % params["num_eval_epochs"] == 0: # detection accuracy on test set - test_res, test_loss = test( - model, epoch, test_loader, det_criterion, params - ) + test_res, test_loss = test(model, epoch, test_loader, det_criterion, params) test_plt_ls.update_and_save(epoch, [test_loss["test_loss"]]) test_plt.update_and_save( epoch, @@ -540,9 +520,7 @@ if __name__ == "__main__": test_plt_class.update_and_save( epoch, [rs["avg_prec"] for rs in test_res["class_pr"]] ) - pu.plot_pr_curve_class( - params["experiment"], "test_pr", "test_pr", test_res - ) + pu.plot_pr_curve_class(params["experiment"], "test_pr", "test_pr", test_res) # save trained model print("saving model to: " + params["model_file_name"]) diff --git a/bat_detect/train/train_split.py b/bat_detect/train/train_split.py index 01b5c03..2036223 100644 --- a/bat_detect/train/train_split.py +++ b/bat_detect/train/train_split.py @@ -24,8 +24,7 @@ def split_diff(ann_dir, wav_dir, load_extra=True): "dataset_name": "BatDetective", "is_test": False, "is_binary": True, # just a bat / not bat dataset ie no classes - "ann_path": ann_dir - + "train_set_bulgaria_batdetective_with_bbs.json", + "ann_path": ann_dir + "train_set_bulgaria_batdetective_with_bbs.json", "wav_path": wav_dir + "bat_detective/audio/", } ) @@ -152,8 +151,7 @@ def split_same(ann_dir, wav_dir, load_extra=True): "dataset_name": "BatDetective", "is_test": False, "is_binary": True, - "ann_path": ann_dir - + "train_set_bulgaria_batdetective_with_bbs.json", + "ann_path": ann_dir + "train_set_bulgaria_batdetective_with_bbs.json", "wav_path": wav_dir + "bat_detective/audio/", } ) diff --git a/bat_detect/train/train_utils.py b/bat_detect/train/train_utils.py index 62441a7..53f91c2 100644 --- a/bat_detect/train/train_utils.py +++ b/bat_detect/train/train_utils.py @@ -25,9 +25,7 @@ def get_blank_dataset_dict(dataset_name, is_test, ann_path, wav_path): def get_short_class_names(class_names, str_len=3): class_names_short = [] for cc in class_names: - class_names_short.append( - " ".join([sp[:str_len] for sp in cc.split(" ")]) - ) + class_names_short.append(" ".join([sp[:str_len] for sp in cc.split(" ")])) return class_names_short @@ -157,9 +155,7 @@ def load_set_of_anns( str_len = np.max([len(cc) for cc in class_names]) + 5 for cc in range(len(class_names)): print( - str(cc).ljust(5) - + class_names[cc].ljust(str_len) - + str(class_cnts[cc]) + str(cc).ljust(5) + class_names[cc].ljust(str_len) + str(class_cnts[cc]) ) if len(classes_to_ignore) == 0: diff --git a/bat_detect/utils/audio_utils.py b/bat_detect/utils/audio_utils.py index 3ad648b..47bf103 100644 --- a/bat_detect/utils/audio_utils.py +++ b/bat_detect/utils/audio_utils.py @@ -39,9 +39,7 @@ def generate_spectrogram( min_freq = round(params["min_freq"] * params["fft_win_length"]) if spec.shape[0] < max_freq: freq_pad = max_freq - spec.shape[0] - spec = np.vstack( - (np.zeros((freq_pad, spec.shape[1]), dtype=spec.dtype), spec) - ) + spec = np.vstack((np.zeros((freq_pad, spec.shape[1]), dtype=spec.dtype), spec)) spec_cropped = spec[-max_freq : spec.shape[0] - min_freq, :] if params["spec_scale"] == "log": @@ -51,11 +49,7 @@ def generate_spectrogram( * ( 1.0 / ( - np.abs( - np.hanning( - int(params["fft_win_length"] * sampling_rate) - ) - ) + np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) ** 2 ).sum() ) @@ -88,11 +82,7 @@ def generate_spectrogram( * ( 1.0 / ( - np.abs( - np.hanning( - int(params["fft_win_length"] * sampling_rate) - ) - ) + np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) ** 2 ).sum() ) @@ -132,9 +122,7 @@ def load_audio_file( # clipping maximum duration if max_duration is not False: - max_duration = np.minimum( - int(sampling_rate * max_duration), audio_raw.shape[0] - ) + max_duration = np.minimum(int(sampling_rate * max_duration), audio_raw.shape[0]) audio_raw = audio_raw[:max_duration] # convert to float32 and scale @@ -171,9 +159,7 @@ def pad_audio( # too small # used during training to ensure all the batches are the same size diff = fixed_width * step + noverlap - audio_raw.shape[0] - audio_raw = np.hstack( - (audio_raw, np.zeros(diff, dtype=audio_raw.dtype)) - ) + audio_raw = np.hstack((audio_raw, np.zeros(diff, dtype=audio_raw.dtype))) elif fixed_width is not None and spec_width > fixed_width: # too big @@ -181,18 +167,13 @@ def pad_audio( diff = fixed_width * step + noverlap - audio_raw.shape[0] audio_raw = audio_raw[:diff] - elif ( - spec_width_rs < min_size - or (np.floor(spec_width_rs) % divide_factor) != 0 - ): + elif spec_width_rs < min_size or (np.floor(spec_width_rs) % divide_factor) != 0: # need to be at least min_size div_amt = np.ceil(spec_width_rs / float(divide_factor)) div_amt = np.maximum(1, div_amt) target_size = int(div_amt * divide_factor * (1.0 / resize_factor)) diff = target_size * step + noverlap - audio_raw.shape[0] - audio_raw = np.hstack( - (audio_raw, np.zeros(diff, dtype=audio_raw.dtype)) - ) + audio_raw = np.hstack((audio_raw, np.zeros(diff, dtype=audio_raw.dtype))) return audio_raw @@ -235,7 +216,7 @@ def gen_mag_spectrogram_pt(x, fs, ms, overlap_perc): def pcen(spec_cropped, sampling_rate): # TODO should be passing hop_length too i.e. step - spec = librosa.pcen( - spec_cropped * (2**31), sr=sampling_rate / 10 - ).astype(np.float32) + spec = librosa.pcen(spec_cropped * (2**31), sr=sampling_rate / 10).astype( + np.float32 + ) return spec diff --git a/bat_detect/utils/detector_utils.py b/bat_detect/utils/detector_utils.py index 7d2470f..2815cc0 100644 --- a/bat_detect/utils/detector_utils.py +++ b/bat_detect/utils/detector_utils.py @@ -158,9 +158,7 @@ def convert_results( results["spec_feat_names"] = feats.get_feature_names() if len(cnn_feats) > 0: results["cnn_feats"] = cnn_feats - results["cnn_feat_names"] = [ - str(ii) for ii in range(cnn_feats.shape[1]) - ] + results["cnn_feat_names"] = [str(ii) for ii in range(cnn_feats.shape[1])] if len(spec_slices) > 0: results["spec_slices"] = spec_slices @@ -194,9 +192,7 @@ def save_results_to_file(results, op_path): # save features if "spec_feats" in results.keys(): - df = pd.DataFrame( - results["spec_feats"], columns=results["spec_feat_names"] - ) + df = pd.DataFrame(results["spec_feats"], columns=results["spec_feat_names"]) df.to_csv( op_path + "_spec_features.csv", sep=",", @@ -205,9 +201,7 @@ def save_results_to_file(results, op_path): ) if "cnn_feats" in results.keys(): - df = pd.DataFrame( - results["cnn_feats"], columns=results["cnn_feat_names"] - ) + df = pd.DataFrame(results["cnn_feats"], columns=results["cnn_feat_names"]) df.to_csv( op_path + "_cnn_features.csv", sep=",", @@ -243,9 +237,7 @@ def compute_spectrogram(audio, sampling_rate, params, return_np=False): # resize the spec rs = params["resize_factor"] spec_op_shape = (int(params["spec_height"] * rs), int(spec.shape[-1] * rs)) - spec = F.interpolate( - spec, size=spec_op_shape, mode="bilinear", align_corners=False - ) + spec = F.interpolate(spec, size=spec_op_shape, mode="bilinear", align_corners=False) if return_np: spec_np = spec[0, 0, :].cpu().data.numpy() @@ -306,9 +298,7 @@ def process_file( chunk_time = args["chunk_size"] * chunk_id chunk_length = int(sampling_rate * args["chunk_size"]) start_sample = chunk_id * chunk_length - end_sample = np.minimum( - (chunk_id + 1) * chunk_length, audio_full.shape[0] - ) + end_sample = np.minimum((chunk_id + 1) * chunk_length, audio_full.shape[0]) audio = audio_full[start_sample:end_sample] # load audio file and compute spectrogram @@ -343,9 +333,7 @@ def process_file( cnn_feats.append(features[0]) if args["spec_slices"]: - spec_slices.extend( - feats.extract_spec_slices(spec_np, pred_nms, params) - ) + spec_slices.extend(feats.extract_spec_slices(spec_np, pred_nms, params)) # convert the predictions into output dictionary file_id = os.path.basename(audio_file) @@ -366,10 +354,7 @@ def process_file( # summarize results if not args["quiet"]: num_detections = len(results["pred_dict"]["annotation"]) - print( - "{}".format(num_detections) - + " call(s) detected above the threshold." - ) + print("{}".format(num_detections) + " call(s) detected above the threshold.") # print results for top n classes if not args["quiet"] and (num_detections > 0): @@ -379,8 +364,7 @@ def process_file( print("species name".ljust(30) + "probablity present") for cc in np.argsort(class_overall)[::-1][:top_n]: print( - params["class_names"][cc].ljust(30) - + str(round(class_overall[cc], 3)) + params["class_names"][cc].ljust(30) + str(round(class_overall[cc], 3)) ) if return_raw_preds: diff --git a/bat_detect/utils/plot_utils.py b/bat_detect/utils/plot_utils.py index ce88375..8b1945a 100644 --- a/bat_detect/utils/plot_utils.py +++ b/bat_detect/utils/plot_utils.py @@ -57,9 +57,7 @@ def create_box_image( if plot_class_names: for ii, bb in enumerate(boxes): - txt = " ".join( - [sp[:3] for sp in detections_ip[ii]["class"].split(" ")] - ) + txt = " ".join([sp[:3] for sp in detections_ip[ii]["class"].split(" ")]) font_info = { "color": "white", "size": 10, @@ -89,9 +87,7 @@ def save_ann_spec( y_extent = [0, duration, min_freq, max_freq] plt.close("all") - fig = plt.figure( - 0, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100 - ) + fig = plt.figure(0, figsize=(spec.shape[1] / 100, spec.shape[0] / 100), dpi=100) plt.imshow( spec, aspect="auto", @@ -128,16 +124,12 @@ def save_ann_spec( plt.savefig(op_path) -def plot_pts( - fig_id, feats, class_names, colors, marker_size=4.0, plot_legend=False -): +def plot_pts(fig_id, feats, class_names, colors, marker_size=4.0, plot_legend=False): plt.figure(fig_id) un_class, labels = np.unique(class_names, return_inverse=True) un_labels = np.unique(labels) if un_labels.shape[0] > len(colors): - colors = [ - plt.cm.jet(float(ii) / un_labels.shape[0]) for ii in un_labels - ] + colors = [plt.cm.jet(float(ii) / un_labels.shape[0]) for ii in un_labels] for ii, u in enumerate(un_labels): inds = np.where(labels == u)[0] @@ -244,9 +236,7 @@ def plot_spec( ax0.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent) ax0.xaxis.set_ticklabels([]) font_info = {"color": "white", "size": 12, "weight": "bold"} - ax0.text( - 0, params["min_freq"] // freq_scale, "Ground Truth", fontdict=font_info - ) + ax0.text(0, params["min_freq"] // freq_scale, "Ground Truth", fontdict=font_info) plt.grid(False) if plot_boxes: @@ -271,9 +261,7 @@ def plot_spec( ax1.imshow(spec, aspect="auto", cmap="plasma", extent=y_extent) ax1.xaxis.set_ticklabels([]) font_info = {"color": "white", "size": 12, "weight": "bold"} - ax1.text( - 0, params["min_freq"] // freq_scale, "Prediction", fontdict=font_info - ) + ax1.text(0, params["min_freq"] // freq_scale, "Prediction", fontdict=font_info) plt.grid(False) if plot_boxes: @@ -308,9 +296,7 @@ def plot_spec( ) # ax2.xaxis.set_ticklabels([]) font_info = {"color": "white", "size": 12, "weight": "bold"} - ax2.text( - 0, params["min_freq"] // freq_scale, "Heatmap", fontdict=font_info - ) + ax2.text(0, params["min_freq"] // freq_scale, "Heatmap", fontdict=font_info) plt.grid(False) @@ -408,21 +394,15 @@ def plot_confusion_matrix( # shorten the class names for plotting class_names = [] for cc in class_names_long: - class_name_sm = "".join([cc_sm[:3] + " " for cc_sm in cc.split(" ")])[ - :-1 - ] + class_name_sm = "".join([cc_sm[:3] + " " for cc_sm in cc.split(" ")])[:-1] class_names.append(class_name_sm) num_classes = len(class_names) - cm = confusion_matrix(gt, pred, labels=np.arange(num_classes)).astype( - np.float32 - ) + cm = confusion_matrix(gt, pred, labels=np.arange(num_classes)).astype(np.float32) cm_norm = cm.sum(1) valid_inds = np.where(cm_norm > 0)[0] - cm[valid_inds, :] = ( - cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis] - ) + cm[valid_inds, :] = cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis] cm[np.where(cm_norm == -0)[0], :] = np.nan if verbose: @@ -507,9 +487,7 @@ class LossPlotter(object): if self.logy: plt.gca().set_yscale("log") plt.grid(True) - plt.legend( - bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0.0 - ) + plt.legend(bbox_to_anchor=(1.01, 1), loc="upper left", borderaxespad=0.0) plt.tight_layout() plt.savefig(self.op_file_name) plt.close(0) @@ -524,19 +502,15 @@ class LossPlotter(object): def save_confusion_matrix(self, gt, pred): plt.figure(0) - cm = confusion_matrix( - gt, pred, np.arange(len(self.class_names)) - ).astype(np.float32) + cm = confusion_matrix(gt, pred, np.arange(len(self.class_names))).astype( + np.float32 + ) cm_norm = cm.sum(1) valid_inds = np.where(cm_norm > 0)[0] - cm[valid_inds, :] = ( - cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis] - ) + cm[valid_inds, :] = cm[valid_inds, :] / cm_norm[valid_inds][..., np.newaxis] plt.imshow(cm, vmin=0, vmax=1, cmap="plasma") plt.colorbar() - plt.xticks( - np.arange(cm.shape[1]), self.class_names, rotation="vertical" - ) + plt.xticks(np.arange(cm.shape[1]), self.class_names, rotation="vertical") plt.yticks(np.arange(cm.shape[0]), self.class_names) plt.xlabel("Predicted") plt.ylabel("Ground Truth") diff --git a/bat_detect/utils/visualize.py b/bat_detect/utils/visualize.py index 54be1df..9b5b4b2 100644 --- a/bat_detect/utils/visualize.py +++ b/bat_detect/utils/visualize.py @@ -56,25 +56,19 @@ class InteractivePlotter: self.annotated = np.zeros( self.labels.shape[0], dtype=np.int ) # can populate this with 1's where we have labels - self.labels_cols = [ - colors[self.labels[ii]] for ii in range(len(self.labels)) - ] + self.labels_cols = [colors[self.labels[ii]] for ii in range(len(self.labels))] self.freq_lims = freq_lims self.allow_training = allow_training self.pt_size = 5.0 - self.spec_pad = ( - 0.2 # this much padding has been applied to the spec slices - ) + self.spec_pad = 0.2 # this much padding has been applied to the spec slices self.fig_width = 12 self.fig_height = 8 self.current_id = 0 max_ind = np.argmax([ss.shape[1] for ss in self.spec_slices]) self.max_width = self.spec_slices[max_ind].shape[1] - self.blank_spec = np.zeros( - (self.spec_slices[0].shape[0], self.max_width) - ) + self.blank_spec = np.zeros((self.spec_slices[0].shape[0], self.max_width)) def plot(self, fig_id): self.fig, self.ax = plt.subplots( @@ -147,17 +141,16 @@ class InteractivePlotter: ) // 2 new_spec[ :, - w_diff : self.spec_slices[self.current_id].shape[1] - + w_diff, + w_diff : self.spec_slices[self.current_id].shape[1] + w_diff, ] = self.spec_slices[self.current_id] self.spec_im.set_data(new_spec) self.spec_im.set_clim(vmin=0, vmax=new_spec.max()) # draw bounding box around call self.ax[1].patches[0].remove() - spec_width_orig = self.spec_slices[self.current_id].shape[ - 1 - ] / (1.0 + 2.0 * self.spec_pad) + spec_width_orig = self.spec_slices[self.current_id].shape[1] / ( + 1.0 + 2.0 * self.spec_pad + ) xx = w_diff + self.spec_pad * spec_width_orig ww = spec_width_orig yy = self.call_info[self.current_id]["low_freq"] / 1000 @@ -179,13 +172,9 @@ class InteractivePlotter: info_str = ( self.call_info[self.current_id]["file_name"] + ", time=" - + str( - round(self.call_info[self.current_id]["start_time"], 3) - ) + + str(round(self.call_info[self.current_id]["start_time"], 3)) + ", prob=" - + str( - round(self.call_info[self.current_id]["det_prob"], 3) - ) + + str(round(self.call_info[self.current_id]["det_prob"], 3)) ) self.ax[0].set_xlabel(info_str) diff --git a/bat_detect/utils/wavfile.py b/bat_detect/utils/wavfile.py index 7fee660..532a8c9 100644 --- a/bat_detect/utils/wavfile.py +++ b/bat_detect/utils/wavfile.py @@ -235,9 +235,7 @@ def write(filename, rate, data): # kind of numeric data in the numpy array dkind = data.dtype.kind if not ( - dkind == "i" - or dkind == "f" - or (dkind == "u" and data.dtype.itemsize == 1) + dkind == "i" or dkind == "f" or (dkind == "u" and data.dtype.itemsize == 1) ): raise ValueError("Unsupported data type '%s'" % data.dtype) @@ -270,9 +268,7 @@ def write(filename, rate, data): # Write the data (16, comp, noc, etc) in the correct binary format # for the wav header. the string format (first arg) specifies how many bytes for each # value. - fid.write( - struct.pack(" 0 ): - results_path = audio_file.replace( - args["audio_dir"], args["ann_dir"] - ) + results_path = audio_file.replace(args["audio_dir"], args["ann_dir"]) du.save_results_to_file(results, results_path) except: error_files.append(audio_file) @@ -50,9 +48,7 @@ if __name__ == "__main__": print(info_str) parser = argparse.ArgumentParser() - parser.add_argument( - "audio_dir", type=str, help="Input directory for audio" - ) + parser.add_argument("audio_dir", type=str, help="Input directory for audio") parser.add_argument( "ann_dir", type=str, diff --git a/scripts/gen_dataset_summary_image.py b/scripts/gen_dataset_summary_image.py index cb823d6..086e5b2 100644 --- a/scripts/gen_dataset_summary_image.py +++ b/scripts/gen_dataset_summary_image.py @@ -20,9 +20,7 @@ import bat_detect.utils.audio_utils as au if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument( - "audio_path", type=str, help="Input directory for audio" - ) + parser.add_argument("audio_path", type=str, help="Input directory for audio") parser.add_argument( "op_dir", type=str, @@ -33,9 +31,7 @@ if __name__ == "__main__": type=str, help="Path to where single annotation json file is stored", ) - parser.add_argument( - "--uk_split", type=str, default="", help="Set as: diff or same" - ) + parser.add_argument("--uk_split", type=str, default="", help="Set as: diff or same") parser.add_argument( "--file_type", type=str, @@ -67,9 +63,7 @@ if __name__ == "__main__": else: # load uk data - special case print("\nLoading:", args["uk_split"], "\n") - dataset_name = ( - "uk_" + args["uk_split"] - ) # should be uk_diff, or uk_same + dataset_name = "uk_" + args["uk_split"] # should be uk_diff, or uk_same datasets, _ = ts.get_train_test_data( args["ann_file"], args["audio_path"], @@ -90,9 +84,7 @@ if __name__ == "__main__": norm_type=params["norm_type"], ) - op_file_name = os.path.join( - args["op_dir"], dataset_name + "." + args["file_type"] - ) + op_file_name = os.path.join(args["op_dir"], dataset_name + "." + args["file_type"]) vz.save_summary_image( x_train, y_train, class_names, params, op_file_name, class_names_order ) diff --git a/scripts/gen_spec_image.py b/scripts/gen_spec_image.py index 3d4cffa..8821459 100644 --- a/scripts/gen_spec_image.py +++ b/scripts/gen_spec_image.py @@ -25,9 +25,7 @@ import bat_detect.utils.plot_utils as viz def filter_anns(anns, start_time, stop_time): anns_op = [] for aa in anns: - if (aa["start_time"] >= start_time) and ( - aa["start_time"] < stop_time - 0.02 - ): + if (aa["start_time"] >= start_time) and (aa["start_time"] < stop_time - 0.02): anns_op.append(aa) return anns_op @@ -132,14 +130,10 @@ if __name__ == "__main__": print("File duration: {} seconds".format(duration)) # create spec for viz - spec, _ = au.generate_spectrogram( - audio, sampling_rate, params_bd, True, False - ) + spec, _ = au.generate_spectrogram(audio, sampling_rate, params_bd, True, False) # run model and filter detections so only keep ones in relevant time range - results = du.process_file( - args_cmd["audio_file"], model, params_bd, bd_args - ) + results = du.process_file(args_cmd["audio_file"], model, params_bd, bd_args) pred_anns = filter_anns( results["pred_dict"]["annotation"], args_cmd["start_time"], @@ -159,9 +153,7 @@ if __name__ == "__main__": ) op_path_clean = os.path.join(args_cmd["op_dir"], op_path_clean) op_path_pred = ( - os.path.basename(args_cmd["audio_file"])[:-4] - + "_pred." - + args_cmd["file_type"] + os.path.basename(args_cmd["audio_file"])[:-4] + "_pred." + args_cmd["file_type"] ) op_path_pred = os.path.join(args_cmd["op_dir"], op_path_pred) diff --git a/scripts/gen_spec_video.py b/scripts/gen_spec_video.py index 3c055ec..813db20 100644 --- a/scripts/gen_spec_video.py +++ b/scripts/gen_spec_video.py @@ -26,12 +26,8 @@ import bat_detect.utils.plot_utils as viz if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument( - "audio_file", type=str, help="Path to input audio file" - ) - parser.add_argument( - "model_path", type=str, help="Path to trained BatDetect model" - ) + parser.add_argument("audio_file", type=str, help="Path to input audio file") + parser.add_argument("model_path", type=str, help="Path to trained BatDetect model") parser.add_argument( "--op_dir", type=str, @@ -46,9 +42,7 @@ if __name__ == "__main__": action="store_true", help="Do not plot class names", ) - parser.add_argument( - "--disable_axis", action="store_true", help="Do not plot axis" - ) + parser.add_argument("--disable_axis", action="store_true", help="Do not plot axis") parser.add_argument( "--detection_threshold", type=float, @@ -135,9 +129,7 @@ if __name__ == "__main__": detections.append(bb) # plot boxes - fig = plt.figure( - 1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi - ) + fig = plt.figure(1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi) duration = au.x_coords_to_time( spec.shape[1], sampling_rate, @@ -196,9 +188,7 @@ if __name__ == "__main__": if ii > 0: spec_op[:, int(col), :] = 1.0 if reveal_boxes: - spec_op[:, int(col) + 1 :, :] = spec_blank[ - :, int(col) + 1 :, : - ] + spec_op[:, int(col) + 1 :, :] = spec_blank[:, int(col) + 1 :, :] elif ii == 0 and reveal_boxes: spec_op = spec_blank diff --git a/scripts/viz_helpers.py b/scripts/viz_helpers.py index 667bb9c..f36cd94 100644 --- a/scripts/viz_helpers.py +++ b/scripts/viz_helpers.py @@ -23,9 +23,7 @@ def generate_spectrogram_data( # spec = au.gen_mag_spectrogram_pt(audio, sampling_rate, params['fft_win_length'], params['fft_overlap']).numpy() if spec.shape[0] < max_freq: freq_pad = max_freq - spec.shape[0] - spec = np.vstack( - (np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec) - ) + spec = np.vstack((np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec)) spec = spec[-max_freq : spec.shape[0] - min_freq, :] if norm_type == "log": @@ -35,11 +33,7 @@ def generate_spectrogram_data( * ( 1.0 / ( - np.abs( - np.hanning( - int(params["fft_win_length"] * sampling_rate) - ) - ) + np.abs(np.hanning(int(params["fft_win_length"] * sampling_rate))) ** 2 ).sum() ) @@ -112,9 +106,7 @@ def load_data( max_samps = params["spec_width"] * (nfft - noverlap) + noverlap if max_samps > audio.shape[0]: - audio = np.hstack( - (audio, np.zeros(max_samps - audio.shape[0])) - ) + audio = np.hstack((audio, np.zeros(max_samps - audio.shape[0]))) audio = audio[:max_samps].astype(np.float32) audio = au.pad_audio( @@ -147,9 +139,7 @@ def load_data( params["fft_overlap"], ) ) - y1 = (ann["low_freq"] - params["min_freq"]) * params[ - "fft_win_length" - ] + y1 = (ann["low_freq"] - params["min_freq"]) * params["fft_win_length"] coords.append((y1, x1)) _, file_ids = np.unique(file_names, return_inverse=True) @@ -215,9 +205,7 @@ def save_summary_image( ) col.grid(color="w", alpha=0.3, linewidth=0.3) col.set_xticks([]) - col.title.set_text( - str(ii + 1) + " " + species_names[order[ii]] - ) + col.title.set_text(str(ii + 1) + " " + species_names[order[ii]]) col.tick_params(axis="both", which="major", labelsize=7) ii += 1