From 78ede31b8bec12245acb884c6310a0a6f8bc19f5 Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Thu, 19 Mar 2026 14:44:05 +0000 Subject: [PATCH] Remove stale scripts --- scripts/README.md | 17 -- scripts/gen_dataset_summary_image.py | 96 --------- scripts/gen_spec_image.py | 211 -------------------- scripts/gen_spec_video.py | 278 --------------------------- scripts/viz_helpers.py | 226 ---------------------- 5 files changed, 828 deletions(-) delete mode 100644 scripts/README.md delete mode 100644 scripts/gen_dataset_summary_image.py delete mode 100644 scripts/gen_spec_image.py delete mode 100644 scripts/gen_spec_video.py delete mode 100644 scripts/viz_helpers.py diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index bcc4692..0000000 --- a/scripts/README.md +++ /dev/null @@ -1,17 +0,0 @@ -This directory contains some scripts for visualizing the raw data and model outputs. - - -`gen_spec_image.py`: saves the model predictions on a spectrogram of the input audio file. -e.g. -`python gen_spec_image.py ../example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav ../models/Net2DFast_UK_same.pth.tar` - - -`gen_spec_video.py`: generates a video showing the model predictions for a file. -e.g. -`python gen_spec_video.py ../example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav ../models/Net2DFast_UK_same.pth.tar` - - - -`gen_dataset_summary_image.py`: generates an image displaying the mean spectrogram for each class in a specified dataset. -e.g. -`python gen_dataset_summary_image.py --ann_file PATH_TO_ANN/australia_TRAIN.json PATH_TO_AUDIO/audio/ ../plots/australia/` diff --git a/scripts/gen_dataset_summary_image.py b/scripts/gen_dataset_summary_image.py deleted file mode 100644 index 3e0a26b..0000000 --- a/scripts/gen_dataset_summary_image.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -Loads a set of annotations corresponding to a dataset and saves an image which -is the mean spectrogram for each class. -""" - -import argparse -import os - -import matplotlib.pyplot as plt -import numpy as np -import viz_helpers as vz - -import batdetect2.detector.parameters as parameters -import batdetect2.train.train_split as ts -import batdetect2.train.train_utils as tu -import batdetect2.utils.audio_utils as au - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "audio_path", type=str, help="Input directory for audio" - ) - parser.add_argument( - "op_dir", - type=str, - help="Path to where single annotation json file is stored", - ) - parser.add_argument( - "--ann_file", - type=str, - help="Path to where single annotation json file is stored", - ) - parser.add_argument( - "--uk_split", type=str, default="", help="Set as: diff or same" - ) - parser.add_argument( - "--file_type", - type=str, - default="png", - help="Type of image to save png or pdf", - ) - args = vars(parser.parse_args()) - - if not os.path.isdir(args["op_dir"]): - os.makedirs(args["op_dir"]) - - params = parameters.get_params(False) - params["smooth_spec"] = False - params["spec_width"] = 48 - params["norm_type"] = "log" # log, pcen - params["aud_pad"] = 0.005 - classes_to_ignore = params["classes_to_ignore"] + params["generic_class"] - - # load train annotations - if args["uk_split"] == "": - print("\nLoading:", args["ann_file"], "\n") - dataset_name = os.path.basename(args["ann_file"]).replace(".json", "") - datasets = [] - datasets.append( - tu.get_blank_dataset_dict( - dataset_name, False, args["ann_file"], args["audio_path"] - ) - ) - else: - # load uk data - special case - print("\nLoading:", args["uk_split"], "\n") - dataset_name = ( - "uk_" + args["uk_split"] - ) # should be uk_diff, or uk_same - datasets, _ = ts.get_train_test_data( - args["ann_file"], - args["audio_path"], - args["uk_split"], - load_extra=False, - ) - - anns, class_names, _ = tu.load_set_of_anns( - datasets, classes_to_ignore, params["events_of_interest"] - ) - class_names_order = range(len(class_names)) - - x_train, y_train = vz.load_data( - anns, - params, - class_names, - smooth_spec=params["smooth_spec"], - norm_type=params["norm_type"], - ) - - op_file_name = os.path.join( - args["op_dir"], dataset_name + "." + args["file_type"] - ) - vz.save_summary_image( - x_train, y_train, class_names, params, op_file_name, class_names_order - ) - print("\nImage saved to:", op_file_name) diff --git a/scripts/gen_spec_image.py b/scripts/gen_spec_image.py deleted file mode 100644 index 490cad3..0000000 --- a/scripts/gen_spec_image.py +++ /dev/null @@ -1,211 +0,0 @@ -""" -Visualize predctions on top of spectrogram. - -Will save images with: -1) raw spectrogram -2) spectrogram with GT boxes -3) spectrogram with predicted boxes -""" - -import argparse -import json -import os -import sys - -import torch -import matplotlib.pyplot as plt -import numpy as np - -import batdetect2.evaluate.evaluate_models as evlm -import batdetect2.utils.audio_utils as au -import batdetect2.utils.detector_utils as du -import batdetect2.utils.plot_utils as viz - - -def filter_anns(anns, start_time, stop_time): - anns_op = [] - for aa in anns: - if (aa["start_time"] >= start_time) and ( - aa["start_time"] < stop_time - 0.02 - ): - anns_op.append(aa) - return anns_op - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("audio_file", type=str, help="Path to audio file") - parser.add_argument("model_path", type=str, help="Path to BatDetect model") - parser.add_argument( - "--ann_file", type=str, default="", help="Path to annotation file" - ) - parser.add_argument( - "--op_dir", - type=str, - default="plots/", - help="Output directory for plots", - ) - parser.add_argument( - "--file_type", - type=str, - default="png", - help="Type of image to save png or pdf", - ) - parser.add_argument( - "--title_text", - type=str, - default="", - help="Text to add as title of plots", - ) - parser.add_argument( - "--detection_threshold", - type=float, - default=0.2, - help="Threshold for output detections", - ) - parser.add_argument( - "--start_time", - type=float, - default=0.0, - help="Start time for cropped file", - ) - parser.add_argument( - "--stop_time", - type=float, - default=0.5, - help="End time for cropped file", - ) - parser.add_argument( - "--time_expansion_factor", - type=int, - default=1, - help="Time expansion factor", - ) - - args_cmd = vars(parser.parse_args()) - - # load the model - bd_args = du.get_default_bd_args() - model, params_bd = du.load_model(args_cmd["model_path"]) - bd_args["detection_threshold"] = args_cmd["detection_threshold"] - bd_args["time_expansion_factor"] = args_cmd["time_expansion_factor"] - - # load the annotation if it exists - gt_present = False - if args_cmd["ann_file"] != "": - if os.path.isfile(args_cmd["ann_file"]): - with open(args_cmd["ann_file"]) as da: - gt_anns = json.load(da) - gt_anns = filter_anns( - gt_anns["annotation"], - args_cmd["start_time"], - args_cmd["stop_time"], - ) - gt_present = True - else: - print("Annotation file not found: ", args_cmd["ann_file"]) - - # load the audio file - if not os.path.isfile(args_cmd["audio_file"]): - print("Audio file not found: ", args_cmd["audio_file"]) - sys.exit() - - # load audio and crop - print("\nProcessing: " + os.path.basename(args_cmd["audio_file"])) - print("\nOutput directory: " + args_cmd["op_dir"]) - sampling_rate, audio = au.load_audio( - args_cmd["audio_file"], - args_cmd["time_exp"], - params_bd["target_samp_rate"], - params_bd["scale_raw_audio"], - ) - st_samp = int(sampling_rate * args_cmd["start_time"]) - en_samp = int(sampling_rate * args_cmd["stop_time"]) - if en_samp > audio.shape[0]: - audio = np.hstack( - (audio, np.zeros((en_samp) - audio.shape[0], dtype=audio.dtype)) - ) - audio = audio[st_samp:en_samp] - - duration = audio.shape[0] / sampling_rate - print("File duration: {} seconds".format(duration)) - - # create spec for viz - spec, _ = au.generate_spectrogram( - audio, sampling_rate, params_bd, True, False - ) - - run_config = { - **params_bd, - **bd_args, - } - - # run model and filter detections so only keep ones in relevant time range - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - results = du.process_file( - args_cmd["audio_file"], model, run_config, device - ) - pred_anns = filter_anns( - results["pred_dict"]["annotation"], - args_cmd["start_time"], - args_cmd["stop_time"], - ) - print(len(pred_anns), "Detections") - - # save output - if not os.path.isdir(args_cmd["op_dir"]): - os.makedirs(args_cmd["op_dir"]) - - # create output file names - op_path_clean = ( - os.path.basename(args_cmd["audio_file"])[:-4] - + "_clean." - + args_cmd["file_type"] - ) - op_path_clean = os.path.join(args_cmd["op_dir"], op_path_clean) - op_path_pred = ( - os.path.basename(args_cmd["audio_file"])[:-4] - + "_pred." - + args_cmd["file_type"] - ) - op_path_pred = os.path.join(args_cmd["op_dir"], op_path_pred) - - # create and save iamges - viz.save_ann_spec( - op_path_clean, - spec, - params_bd["min_freq"], - params_bd["max_freq"], - duration, - args_cmd["start_time"], - "", - None, - ) - viz.save_ann_spec( - op_path_pred, - spec, - params_bd["min_freq"], - params_bd["max_freq"], - duration, - args_cmd["start_time"], - "", - pred_anns, - ) - - if gt_present: - op_path_gt = ( - os.path.basename(args_cmd["audio_file"])[:-4] - + "_gt." - + args_cmd["file_type"] - ) - op_path_gt = os.path.join(args_cmd["op_dir"], op_path_gt) - viz.save_ann_spec( - op_path_gt, - spec, - params_bd["min_freq"], - params_bd["max_freq"], - duration, - args_cmd["start_time"], - "", - gt_anns, - ) diff --git a/scripts/gen_spec_video.py b/scripts/gen_spec_video.py deleted file mode 100644 index 625ba1a..0000000 --- a/scripts/gen_spec_video.py +++ /dev/null @@ -1,278 +0,0 @@ -""" -This script takes an audio file as input, runs the detector, and makes a video output - -Notes: - It needs ffmpeg installed to make the videos - Sometimes conda can overwrite the default ffmpeg path set this to use system one. - Check which one is being used with `which ffmpeg`. If conda version, can thow an error. - Best to use system one - see ffmpeg_path. -""" - -import argparse -import os -import shutil -import sys - -import matplotlib.pyplot as plt -import numpy as np -import torch -from scipy.io import wavfile - -import batdetect2.detector.parameters as parameters -import batdetect2.utils.audio_utils as au -import batdetect2.utils.detector_utils as du -import batdetect2.utils.plot_utils as viz - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "audio_file", type=str, help="Path to input audio file" - ) - parser.add_argument( - "model_path", type=str, help="Path to trained BatDetect model" - ) - parser.add_argument( - "--op_dir", - type=str, - default="generated_vids/", - help="Path to output directory", - ) - parser.add_argument( - "--no_detector", action="store_true", help="Do not run detector" - ) - parser.add_argument( - "--plot_class_names_off", - action="store_true", - help="Do not plot class names", - ) - parser.add_argument( - "--disable_axis", action="store_true", help="Do not plot axis" - ) - parser.add_argument( - "--detection_threshold", - type=float, - default=0.2, - help="Cut-off probability for detector", - ) - parser.add_argument( - "--time_expansion_factor", - type=int, - default=1, - dest="time_expansion_factor", - help="The time expansion factor used for all files (default is 1)", - ) - args_cmd = vars(parser.parse_args()) - - # file of interest - audio_file = args_cmd["audio_file"] - op_dir = args_cmd["op_dir"] - op_str = "_output" - ffmpeg_path = "/usr/bin/" - - if not os.path.isfile(audio_file): - print("Audio file not found: ", audio_file) - sys.exit() - - if not os.path.isfile(args_cmd["model_path"]): - print("Model not found: ", args_cmd["model_path"]) - sys.exit() - - start_time = 0.0 - duration = 0.5 - reveal_boxes = True # makes the boxes appear one at a time - fps = 24 - dpi = 100 - - op_dir_tmp = os.path.join(op_dir, "op_tmp_vids", "") - if not os.path.isdir(op_dir_tmp): - os.makedirs(op_dir_tmp) - if not os.path.isdir(op_dir): - os.makedirs(op_dir) - - params = parameters.get_params(False) - args = du.get_default_bd_args() - args["time_expansion_factor"] = args_cmd["time_expansion_factor"] - args["detection_threshold"] = args_cmd["detection_threshold"] - - # load audio file - print("\nProcessing: " + os.path.basename(audio_file)) - print("\nOutput directory: " + op_dir) - sampling_rate, audio = au.load_audio( - audio_file, args["time_expansion_factor"], params["target_samp_rate"] - ) - audio = audio[ - int(sampling_rate * start_time) : int( - sampling_rate * start_time + sampling_rate * duration - ) - ] - audio_orig = audio.copy() - audio = au.pad_audio( - audio, - sampling_rate, - params["fft_win_length"], - params["fft_overlap"], - params["resize_factor"], - params["spec_divide_factor"], - ) - - # generate spectrogram - spec, _ = au.generate_spectrogram(audio, sampling_rate, params, True) - max_val = spec.max() * 1.1 - - if not args_cmd["no_detector"]: - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - print(" Loading model and running detector on entire file ...") - model, det_params = du.load_model(args_cmd["model_path"]) - det_params["detection_threshold"] = args["detection_threshold"] - - run_config = { - **det_params, - **args, - } - results = du.process_file( - audio_file, - model, - run_config, - device, - ) - - print(" Processing detections and plotting ...") - detections = [] - for bb in results["pred_dict"]["annotation"]: - if (bb["start_time"] >= start_time) and ( - bb["end_time"] < start_time + duration - ): - detections.append(bb) - - # plot boxes - fig = plt.figure( - 1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi - ) - duration = au.x_coords_to_time( - spec.shape[1], - sampling_rate, - params["fft_win_length"], - params["fft_overlap"], - ) - viz.create_box_image( - spec, - fig, - detections, - start_time, - start_time + duration, - duration, - params, - max_val, - plot_class_names=not args_cmd["plot_class_names_off"], - ) - op_im_file_boxes = os.path.join( - op_dir, os.path.basename(audio_file)[:-4] + op_str + "_boxes.png" - ) - fig.savefig(op_im_file_boxes, dpi=dpi) - plt.close(1) - spec_with_boxes = plt.imread(op_im_file_boxes) - - print(" Saving audio file ...") - if args["time_expansion_factor"] == 1: - sampling_rate_op = int(sampling_rate / 10.0) - else: - sampling_rate_op = sampling_rate - op_audio_file = os.path.join( - op_dir, os.path.basename(audio_file)[:-4] + op_str + ".wav" - ) - wavfile.write(op_audio_file, sampling_rate_op, audio_orig) - - print(" Saving image ...") - op_im_file = os.path.join( - op_dir, os.path.basename(audio_file)[:-4] + op_str + ".png" - ) - plt.imsave(op_im_file, spec, vmin=0, vmax=max_val, cmap="plasma") - spec_blank = plt.imread(op_im_file) - - # create figure - freq_scale = 1000 # turn Hz to kHz - min_freq = params["min_freq"] // freq_scale - max_freq = params["max_freq"] // freq_scale - y_extent = [0, duration, min_freq, max_freq] - - print(" Saving video frames ...") - # save images that will be combined into video - # will either plot with or without boxes - for ii, col in enumerate( - np.linspace(0, spec.shape[1] - 1, int(fps * duration * 10)) - ): - if not args_cmd["no_detector"]: - spec_op = spec_with_boxes.copy() - if ii > 0: - spec_op[:, int(col), :] = 1.0 - if reveal_boxes: - spec_op[:, int(col) + 1 :, :] = spec_blank[ - :, int(col) + 1 :, : - ] - elif ii == 0 and reveal_boxes: - spec_op = spec_blank - - if not args_cmd["disable_axis"]: - plt.close("all") - fig = plt.figure( - ii, - figsize=( - 1.2 * (spec_op.shape[1] / dpi), - 1.5 * (spec_op.shape[0] / dpi), - ), - dpi=dpi, - ) - plt.xlabel("Time - seconds") - plt.ylabel("Frequency - kHz") - plt.imshow( - spec_op, - vmin=0, - vmax=1.0, - cmap="plasma", - extent=y_extent, - aspect="auto", - ) - plt.tight_layout() - fig.savefig(op_dir_tmp + str(ii).zfill(4) + ".png", dpi=dpi) - else: - plt.imsave( - op_dir_tmp + str(ii).zfill(4) + ".png", - spec_op, - vmin=0, - vmax=1.0, - cmap="plasma", - ) - else: - spec_op = spec.copy() - if ii > 0: - spec_op[:, int(col)] = max_val - plt.imsave( - op_dir_tmp + str(ii).zfill(4) + ".png", - spec_op, - vmin=0, - vmax=max_val, - cmap="plasma", - ) - - print(" Creating video ...") - op_vid_file = os.path.join( - op_dir, os.path.basename(audio_file)[:-4] + op_str + ".avi" - ) - ffmpeg_cmd = ( - "ffmpeg -hide_banner -loglevel panic -y -r {} -f image2 -s {}x{} -i {}%04d.png -i {} -vcodec libx264 " - "-crf 25 -pix_fmt yuv420p -acodec copy {}".format( - fps, - spec.shape[1], - spec.shape[0], - op_dir_tmp, - op_audio_file, - op_vid_file, - ) - ) - ffmpeg_cmd = ffmpeg_path + ffmpeg_cmd - os.system(ffmpeg_cmd) - - print(" Deleting temporary files ...") - if os.path.isdir(op_dir_tmp): - shutil.rmtree(op_dir_tmp) diff --git a/scripts/viz_helpers.py b/scripts/viz_helpers.py deleted file mode 100644 index 4d86283..0000000 --- a/scripts/viz_helpers.py +++ /dev/null @@ -1,226 +0,0 @@ -import os -import sys - -import matplotlib.pyplot as plt -import numpy as np -from scipy import ndimage - -sys.path.append(os.path.join("..")) - -import batdetect2.utils.audio_utils as au - - -def generate_spectrogram_data( - audio, sampling_rate, params, norm_type="log", smooth_spec=False -): - max_freq = round(params["max_freq"] * params["fft_win_length"]) - min_freq = round(params["min_freq"] * params["fft_win_length"]) - - # create spectrogram - numpy - spec = au.gen_mag_spectrogram( - audio, sampling_rate, params["fft_win_length"], params["fft_overlap"] - ) - # spec = au.gen_mag_spectrogram_pt(audio, sampling_rate, params['fft_win_length'], params['fft_overlap']).numpy() - if spec.shape[0] < max_freq: - freq_pad = max_freq - spec.shape[0] - spec = np.vstack( - (np.zeros((freq_pad, spec.shape[1]), dtype=np.float32), spec) - ) - spec = spec[-max_freq : spec.shape[0] - min_freq, :] - - if norm_type == "log": - log_scaling = ( - 2.0 - * (1.0 / sampling_rate) - * ( - 1.0 - / ( - np.abs( - np.hanning( - int(params["fft_win_length"] * sampling_rate) - ) - ) - ** 2 - ).sum() - ) - ) - ##log_scaling = 0.01 - spec = np.log(1.0 + log_scaling * spec).astype(np.float32) - elif norm_type == "pcen": - spec = au.pcen(spec, sampling_rate) - else: - pass - - if smooth_spec: - spec = ndimage.gaussian_filter(spec, 1) - - return spec - - -def load_data( - anns, - params, - class_names, - smooth_spec=False, - norm_type="log", - extract_bg=False, -): - specs = [] - labels = [] - coords = [] - audios = [] - sampling_rates = [] - file_names = [] - for cur_file in anns: - sampling_rate, audio_orig = au.load_audio( - cur_file["file_path"], - cur_file["time_exp"], - params["target_samp_rate"], - params["scale_raw_audio"], - ) - - for ann in cur_file["annotation"]: - if ( - ann["class"] not in params["classes_to_ignore"] - and ann["class"] in class_names - ): - # clip out of bounds - if ann["low_freq"] < params["min_freq"]: - ann["low_freq"] = params["min_freq"] - if ann["high_freq"] > params["max_freq"]: - ann["high_freq"] = params["max_freq"] - - # load cropped audio - start_samp_diff = int(sampling_rate * ann["start_time"]) - int( - sampling_rate * params["aud_pad"] - ) - start_samp = np.maximum(0, start_samp_diff) - end_samp = np.minimum( - audio_orig.shape[0], - int(sampling_rate * ann["end_time"]) * 2 - + int(sampling_rate * params["aud_pad"]), - ) - audio = audio_orig[start_samp:end_samp] - if start_samp_diff < 0: - # need to pad at start if the call is at the very begining - audio = np.hstack( - (np.zeros(-start_samp_diff, dtype=np.float32), audio) - ) - - nfft = int(params["fft_win_length"] * sampling_rate) - noverlap = int(params["fft_overlap"] * nfft) - max_samps = params["spec_width"] * (nfft - noverlap) + noverlap - - if max_samps > audio.shape[0]: - audio = np.hstack( - (audio, np.zeros(max_samps - audio.shape[0])) - ) - audio = audio[:max_samps].astype(np.float32) - - audio = au.pad_audio( - audio, - sampling_rate, - params["fft_win_length"], - params["fft_overlap"], - params["resize_factor"], - params["spec_divide_factor"], - ) - - # generate spectrogram - spec = generate_spectrogram_data( - audio, sampling_rate, params, norm_type, smooth_spec - )[:, : params["spec_width"]] - - specs.append(spec[np.newaxis, ...]) - labels.append(ann["class"]) - - audios.append(audio) - sampling_rates.append(sampling_rate) - file_names.append(cur_file["file_path"]) - - # position in crop - x1 = int( - au.time_to_x_coords( - np.array(params["aud_pad"]), - sampling_rate, - params["fft_win_length"], - params["fft_overlap"], - ) - ) - y1 = (ann["low_freq"] - params["min_freq"]) * params[ - "fft_win_length" - ] - coords.append((y1, x1)) - - _, file_ids = np.unique(file_names, return_inverse=True) - labels = np.array([class_names.index(ll) for ll in labels]) - - # return np.vstack(specs), labels, coords, audios, sampling_rates, file_ids, file_names - return np.vstack(specs), labels - - -def save_summary_image( - specs, - labels, - species_names, - params, - op_file_name="plots/all_species.png", - order=None, -): - # takes the mean for each class and plots it on a grid - mean_specs = [] - max_band = [] - for ii in range(len(species_names)): - inds = np.where(labels == ii)[0] - mu = specs[inds, :].mean(0) - max_band.append(np.argmax(mu.sum(1))) - mean_specs.append(mu) - - # control the order in which classes are printed - if order is None: - order = np.arange(len(species_names)) - - max_cols = 6 - nrows = int(np.ceil(len(species_names) / max_cols)) - ncols = np.minimum(len(species_names), max_cols) - - fig, ax = plt.subplots( - nrows=nrows, - ncols=ncols, - figsize=(ncols * 3.3, nrows * 6), - gridspec_kw={"wspace": 0, "hspace": 0.2}, - ) - spec_min_max = ( - 0, - mean_specs[0].shape[1], - params["min_freq"] / 1000, - params["max_freq"] / 1000, - ) - ii = 0 - for row in ax: - if type(row) != np.ndarray: - row = np.array([row]) - - for col in row: - if ii >= len(species_names): - col.axis("off") - else: - inds = np.where(labels == order[ii])[0] - col.imshow( - mean_specs[order[ii]], - extent=spec_min_max, - cmap="plasma", - aspect="equal", - ) - col.grid(color="w", alpha=0.3, linewidth=0.3) - col.set_xticks([]) - col.title.set_text( - str(ii + 1) + " " + species_names[order[ii]] - ) - col.tick_params(axis="both", which="major", labelsize=7) - ii += 1 - - # plt.tight_layout() - # plt.show() - plt.savefig(op_file_name) - plt.close("all")