""" This script takes an audio file as input, runs the detector, and makes a video output Notes: It needs ffmpeg installed to make the videos Sometimes conda can overwrite the default ffmpeg path set this to use system one. Check which one is being used with `which ffmpeg`. If conda version, can thow an error. Best to use system one - see ffmpeg_path. """ from scipy.io import wavfile import os import shutil import matplotlib.pyplot as plt import numpy as np import argparse import sys sys.path.append(os.path.join('..')) import bat_detect.detector.parameters as parameters import bat_detect.utils.audio_utils as au import bat_detect.utils.plot_utils as viz import bat_detect.utils.detector_utils as du if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('audio_file', type=str, help='Path to input audio file') parser.add_argument('model_path', type=str, help='Path to trained BatDetect model') parser.add_argument('--op_dir', type=str, default='generated_vids/', help='Path to output directory') parser.add_argument('--no_detector', action='store_true', help='Do not run detector') parser.add_argument('--plot_class_names_off', action='store_true', help='Do not plot class names') parser.add_argument('--disable_axis', action='store_true', help='Do not plot axis') parser.add_argument('--detection_threshold', type=float, default=0.2, help='Cut-off probability for detector') parser.add_argument('--time_expansion_factor', type=int, default=1, dest='time_expansion_factor', help='The time expansion factor used for all files (default is 1)') args_cmd = vars(parser.parse_args()) # file of interest audio_file = args_cmd['audio_file'] op_dir = args_cmd['op_dir'] op_str = '_output' ffmpeg_path = '/usr/bin/' if not os.path.isfile(audio_file): print('Audio file not found: ', audio_file) sys.exit() if not os.path.isfile(args_cmd['model_path']): print('Model not found: ', model_path) sys.exit() start_time = 0.0 duration = 0.5 reveal_boxes = True # makes the boxes appear one at a time fps = 24 dpi = 100 op_dir_tmp = os.path.join(op_dir, 'op_tmp_vids', '') if not os.path.isdir(op_dir_tmp): os.makedirs(op_dir_tmp) if not os.path.isdir(op_dir): os.makedirs(op_dir) params = parameters.get_params(False) args = du.get_default_bd_args() args['time_expansion_factor'] = args_cmd['time_expansion_factor'] args['detection_threshold'] = args_cmd['detection_threshold'] # load audio file print('\nProcessing: ' + os.path.basename(audio_file)) print('\nOutput directory: ' + op_dir) sampling_rate, audio = au.load_audio_file(audio_file, args['time_expansion_factor'], params['target_samp_rate']) audio = audio[int(sampling_rate*start_time):int(sampling_rate*start_time + sampling_rate*duration)] audio_orig = audio.copy() audio = au.pad_audio(audio, sampling_rate, params['fft_win_length'], params['fft_overlap'], params['resize_factor'], params['spec_divide_factor']) # generate spectrogram spec, _ = au.generate_spectrogram(audio, sampling_rate, params, True) max_val = spec.max()*1.1 if not args_cmd['no_detector']: print(' Loading model and running detector on entire file ...') model, det_params = du.load_model(args_cmd['model_path']) det_params['detection_threshold'] = args['detection_threshold'] results = du.process_file(audio_file, model, det_params, args) print(' Processing detections and plotting ...') detections = [] for bb in results['pred_dict']['annotation']: if (bb['start_time'] >= start_time) and (bb['end_time'] < start_time+duration): detections.append(bb) # plot boxes fig = plt.figure(1, figsize=(spec.shape[1]/dpi, spec.shape[0]/dpi), dpi=dpi) duration = au.x_coords_to_time(spec.shape[1], sampling_rate, params['fft_win_length'], params['fft_overlap']) viz.create_box_image(spec, fig, detections, start_time, start_time+duration, duration, params, max_val, plot_class_names=not args_cmd['plot_class_names_off']) op_im_file_boxes = os.path.join(op_dir, os.path.basename(audio_file)[:-4] + op_str + '_boxes.png') fig.savefig(op_im_file_boxes, dpi=dpi) plt.close(1) spec_with_boxes = plt.imread(op_im_file_boxes) print(' Saving audio file ...') if args['time_expansion_factor']==1: sampling_rate_op = int(sampling_rate/10.0) else: sampling_rate_op = sampling_rate op_audio_file = os.path.join(op_dir, os.path.basename(audio_file)[:-4] + op_str + '.wav') wavfile.write(op_audio_file, sampling_rate_op, audio_orig) print(' Saving image ...') op_im_file = os.path.join(op_dir, os.path.basename(audio_file)[:-4] + op_str + '.png') plt.imsave(op_im_file, spec, vmin=0, vmax=max_val, cmap='plasma') spec_blank = plt.imread(op_im_file) # create figure freq_scale = 1000 # turn Hz to kHz min_freq = params['min_freq']//freq_scale max_freq = params['max_freq']//freq_scale y_extent = [0, duration, min_freq, max_freq] print(' Saving video frames ...') # save images that will be combined into video # will either plot with or without boxes for ii, col in enumerate(np.linspace(0, spec.shape[1]-1, int(fps*duration*10))): if not args_cmd['no_detector']: spec_op = spec_with_boxes.copy() if ii > 0: spec_op[:, int(col), :] = 1.0 if reveal_boxes: spec_op[:, int(col)+1:, :] = spec_blank[:, int(col)+1:, :] elif ii == 0 and reveal_boxes: spec_op = spec_blank if not args_cmd['disable_axis']: plt.close('all') fig = plt.figure(ii, figsize=(1.2*(spec_op.shape[1]/dpi), 1.5*(spec_op.shape[0]/dpi)), dpi=dpi) plt.xlabel('Time - seconds') plt.ylabel('Frequency - kHz') plt.imshow(spec_op, vmin=0, vmax=1.0, cmap='plasma', extent=y_extent, aspect='auto') plt.tight_layout() fig.savefig(op_dir_tmp + str(ii).zfill(4) + '.png', dpi=dpi) else: plt.imsave(op_dir_tmp + str(ii).zfill(4) + '.png', spec_op, vmin=0, vmax=1.0, cmap='plasma') else: spec_op = spec.copy() if ii > 0: spec_op[:, int(col)] = max_val plt.imsave(op_dir_tmp + str(ii).zfill(4) + '.png', spec_op, vmin=0, vmax=max_val, cmap='plasma') print(' Creating video ...') op_vid_file = os.path.join(op_dir, os.path.basename(audio_file)[:-4] + op_str + '.avi') ffmpeg_cmd = 'ffmpeg -hide_banner -loglevel panic -y -r {} -f image2 -s {}x{} -i {}%04d.png -i {} -vcodec libx264 ' \ '-crf 25 -pix_fmt yuv420p -acodec copy {}'.format(fps, spec.shape[1], spec.shape[0], op_dir_tmp, op_audio_file, op_vid_file) ffmpeg_cmd = ffmpeg_path + ffmpeg_cmd os.system(ffmpeg_cmd) print(' Deleting temporary files ...') if os.path.isdir(op_dir_tmp): shutil.rmtree(op_dir_tmp)