""" This script takes an audio file as input, runs the detector, and makes a video output Notes: It needs ffmpeg installed to make the videos Sometimes conda can overwrite the default ffmpeg path set this to use system one. Check which one is being used with `which ffmpeg`. If conda version, can thow an error. Best to use system one - see ffmpeg_path. """ import argparse import os import shutil import sys import matplotlib.pyplot as plt import numpy as np from scipy.io import wavfile sys.path.append(os.path.join("..")) import bat_detect.detector.parameters as parameters import bat_detect.utils.audio_utils as au import bat_detect.utils.detector_utils as du import bat_detect.utils.plot_utils as viz if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "audio_file", type=str, help="Path to input audio file" ) parser.add_argument( "model_path", type=str, help="Path to trained BatDetect model" ) parser.add_argument( "--op_dir", type=str, default="generated_vids/", help="Path to output directory", ) parser.add_argument( "--no_detector", action="store_true", help="Do not run detector" ) parser.add_argument( "--plot_class_names_off", action="store_true", help="Do not plot class names", ) parser.add_argument( "--disable_axis", action="store_true", help="Do not plot axis" ) parser.add_argument( "--detection_threshold", type=float, default=0.2, help="Cut-off probability for detector", ) parser.add_argument( "--time_expansion_factor", type=int, default=1, dest="time_expansion_factor", help="The time expansion factor used for all files (default is 1)", ) args_cmd = vars(parser.parse_args()) # file of interest audio_file = args_cmd["audio_file"] op_dir = args_cmd["op_dir"] op_str = "_output" ffmpeg_path = "/usr/bin/" if not os.path.isfile(audio_file): print("Audio file not found: ", audio_file) sys.exit() if not os.path.isfile(args_cmd["model_path"]): print("Model not found: ", model_path) sys.exit() start_time = 0.0 duration = 0.5 reveal_boxes = True # makes the boxes appear one at a time fps = 24 dpi = 100 op_dir_tmp = os.path.join(op_dir, "op_tmp_vids", "") if not os.path.isdir(op_dir_tmp): os.makedirs(op_dir_tmp) if not os.path.isdir(op_dir): os.makedirs(op_dir) params = parameters.get_params(False) args = du.get_default_bd_args() args["time_expansion_factor"] = args_cmd["time_expansion_factor"] args["detection_threshold"] = args_cmd["detection_threshold"] # load audio file print("\nProcessing: " + os.path.basename(audio_file)) print("\nOutput directory: " + op_dir) sampling_rate, audio = au.load_audio_file( audio_file, args["time_expansion_factor"], params["target_samp_rate"] ) audio = audio[ int(sampling_rate * start_time) : int( sampling_rate * start_time + sampling_rate * duration ) ] audio_orig = audio.copy() audio = au.pad_audio( audio, sampling_rate, params["fft_win_length"], params["fft_overlap"], params["resize_factor"], params["spec_divide_factor"], ) # generate spectrogram spec, _ = au.generate_spectrogram(audio, sampling_rate, params, True) max_val = spec.max() * 1.1 if not args_cmd["no_detector"]: print(" Loading model and running detector on entire file ...") model, det_params = du.load_model(args_cmd["model_path"]) det_params["detection_threshold"] = args["detection_threshold"] results = du.process_file(audio_file, model, det_params, args) print(" Processing detections and plotting ...") detections = [] for bb in results["pred_dict"]["annotation"]: if (bb["start_time"] >= start_time) and ( bb["end_time"] < start_time + duration ): detections.append(bb) # plot boxes fig = plt.figure( 1, figsize=(spec.shape[1] / dpi, spec.shape[0] / dpi), dpi=dpi ) duration = au.x_coords_to_time( spec.shape[1], sampling_rate, params["fft_win_length"], params["fft_overlap"], ) viz.create_box_image( spec, fig, detections, start_time, start_time + duration, duration, params, max_val, plot_class_names=not args_cmd["plot_class_names_off"], ) op_im_file_boxes = os.path.join( op_dir, os.path.basename(audio_file)[:-4] + op_str + "_boxes.png" ) fig.savefig(op_im_file_boxes, dpi=dpi) plt.close(1) spec_with_boxes = plt.imread(op_im_file_boxes) print(" Saving audio file ...") if args["time_expansion_factor"] == 1: sampling_rate_op = int(sampling_rate / 10.0) else: sampling_rate_op = sampling_rate op_audio_file = os.path.join( op_dir, os.path.basename(audio_file)[:-4] + op_str + ".wav" ) wavfile.write(op_audio_file, sampling_rate_op, audio_orig) print(" Saving image ...") op_im_file = os.path.join( op_dir, os.path.basename(audio_file)[:-4] + op_str + ".png" ) plt.imsave(op_im_file, spec, vmin=0, vmax=max_val, cmap="plasma") spec_blank = plt.imread(op_im_file) # create figure freq_scale = 1000 # turn Hz to kHz min_freq = params["min_freq"] // freq_scale max_freq = params["max_freq"] // freq_scale y_extent = [0, duration, min_freq, max_freq] print(" Saving video frames ...") # save images that will be combined into video # will either plot with or without boxes for ii, col in enumerate( np.linspace(0, spec.shape[1] - 1, int(fps * duration * 10)) ): if not args_cmd["no_detector"]: spec_op = spec_with_boxes.copy() if ii > 0: spec_op[:, int(col), :] = 1.0 if reveal_boxes: spec_op[:, int(col) + 1 :, :] = spec_blank[ :, int(col) + 1 :, : ] elif ii == 0 and reveal_boxes: spec_op = spec_blank if not args_cmd["disable_axis"]: plt.close("all") fig = plt.figure( ii, figsize=( 1.2 * (spec_op.shape[1] / dpi), 1.5 * (spec_op.shape[0] / dpi), ), dpi=dpi, ) plt.xlabel("Time - seconds") plt.ylabel("Frequency - kHz") plt.imshow( spec_op, vmin=0, vmax=1.0, cmap="plasma", extent=y_extent, aspect="auto", ) plt.tight_layout() fig.savefig(op_dir_tmp + str(ii).zfill(4) + ".png", dpi=dpi) else: plt.imsave( op_dir_tmp + str(ii).zfill(4) + ".png", spec_op, vmin=0, vmax=1.0, cmap="plasma", ) else: spec_op = spec.copy() if ii > 0: spec_op[:, int(col)] = max_val plt.imsave( op_dir_tmp + str(ii).zfill(4) + ".png", spec_op, vmin=0, vmax=max_val, cmap="plasma", ) print(" Creating video ...") op_vid_file = os.path.join( op_dir, os.path.basename(audio_file)[:-4] + op_str + ".avi" ) ffmpeg_cmd = ( "ffmpeg -hide_banner -loglevel panic -y -r {} -f image2 -s {}x{} -i {}%04d.png -i {} -vcodec libx264 " "-crf 25 -pix_fmt yuv420p -acodec copy {}".format( fps, spec.shape[1], spec.shape[0], op_dir_tmp, op_audio_file, op_vid_file, ) ) ffmpeg_cmd = ffmpeg_path + ffmpeg_cmd os.system(ffmpeg_cmd) print(" Deleting temporary files ...") if os.path.isdir(op_dir_tmp): shutil.rmtree(op_dir_tmp)