import datetime import os from bat_detect.types import ( ProcessingConfiguration, SpectrogramParameters, ) TARGET_SAMPLERATE_HZ = 256000 FFT_WIN_LENGTH_S = 512 / 256000.0 FFT_OVERLAP = 0.75 MAX_FREQ_HZ = 120000 MIN_FREQ_HZ = 10000 RESIZE_FACTOR = 0.5 SPEC_DIVIDE_FACTOR = 32 SPEC_HEIGHT = 256 SCALE_RAW_AUDIO = False DETECTION_THRESHOLD = 0.01 NMS_KERNEL_SIZE = 9 NMS_TOP_K_PER_SEC = 200 SPEC_SCALE = "pcen" DENOISE_SPEC_AVG = True MAX_SCALE_SPEC = False DEFAULT_MODEL_PATH = os.path.join( os.path.dirname(os.path.dirname(__file__)), "models", "Net2DFast_UK_same.pth.tar", ) DEFAULT_SPECTROGRAM_PARAMETERS: SpectrogramParameters = { "fft_win_length": FFT_WIN_LENGTH_S, "fft_overlap": FFT_OVERLAP, "spec_height": SPEC_HEIGHT, "resize_factor": RESIZE_FACTOR, "spec_divide_factor": SPEC_DIVIDE_FACTOR, "max_freq": MAX_FREQ_HZ, "min_freq": MIN_FREQ_HZ, "spec_scale": SPEC_SCALE, "denoise_spec_avg": DENOISE_SPEC_AVG, "max_scale_spec": MAX_SCALE_SPEC, } DEFAULT_PROCESSING_CONFIGURATIONS: ProcessingConfiguration = { "detection_threshold": DETECTION_THRESHOLD, "spec_slices": False, "chunk_size": 3, "spec_features": False, "cnn_features": False, "quiet": True, "target_samp_rate": TARGET_SAMPLERATE_HZ, "fft_win_length": FFT_WIN_LENGTH_S, "fft_overlap": FFT_OVERLAP, "resize_factor": RESIZE_FACTOR, "spec_divide_factor": SPEC_DIVIDE_FACTOR, "spec_height": SPEC_HEIGHT, "scale_raw_audio": SCALE_RAW_AUDIO, "class_names": [], "time_expansion": 1, "top_n": 3, "return_raw_preds": False, "max_duration": None, "nms_kernel_size": NMS_KERNEL_SIZE, "max_freq": MAX_FREQ_HZ, "min_freq": MIN_FREQ_HZ, "nms_top_k_per_sec": NMS_TOP_K_PER_SEC, "spec_scale": SPEC_SCALE, "denoise_spec_avg": DENOISE_SPEC_AVG, "max_scale_spec": MAX_SCALE_SPEC, } def mk_dir(path): if not os.path.isdir(path): os.makedirs(path) def get_params(make_dirs=False, exps_dir="../../experiments/"): params = {} params[ "model_name" ] = "Net2DFast" # Net2DFast, Net2DSkip, Net2DSimple, Net2DSkipDS, Net2DRN params["num_filters"] = 128 now_str = datetime.datetime.now().strftime("%Y_%m_%d__%H_%M_%S") model_name = now_str + ".pth.tar" params["experiment"] = os.path.join(exps_dir, now_str, "") params["model_file_name"] = os.path.join(params["experiment"], model_name) params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "") params["op_im_dir_test"] = os.path.join( params["experiment"], "op_ims_test", "" ) # params['notes'] = '' # can save notes about an experiment here # spec parameters params[ "target_samp_rate" ] = TARGET_SAMPLERATE_HZ # resamples all audio so that it is at this rate params[ "fft_win_length" ] = FFT_WIN_LENGTH_S # in milliseconds, amount of time per stft time step params["fft_overlap"] = FFT_OVERLAP # stft window overlap params[ "max_freq" ] = MAX_FREQ_HZ # in Hz, everything above this will be discarded params[ "min_freq" ] = MIN_FREQ_HZ # in Hz, everything below this will be discarded params[ "resize_factor" ] = RESIZE_FACTOR # resize so the spectrogram at the input of the network params[ "spec_height" ] = SPEC_HEIGHT # units are number of frequency bins (before resizing is performed) params[ "spec_train_width" ] = 512 # units are number of time steps (before resizing is performed) params[ "spec_divide_factor" ] = SPEC_DIVIDE_FACTOR # spectrogram should be divisible by this amount in width and height # spec processing params params[ "denoise_spec_avg" ] = DENOISE_SPEC_AVG # removes the mean for each frequency band params[ "scale_raw_audio" ] = SCALE_RAW_AUDIO # scales the raw audio to [-1, 1] params[ "max_scale_spec" ] = MAX_SCALE_SPEC # scales the spectrogram so that it is max 1 params["spec_scale"] = SPEC_SCALE # 'log', 'pcen', 'none' # detection params params[ "detection_overlap" ] = 0.01 # has to be within this number of ms to count as detection params[ "ignore_start_end" ] = 0.01 # if start of GT calls are within this time from the start/end of file ignore params[ "detection_threshold" ] = DETECTION_THRESHOLD # the smaller this is the better the recall will be params[ "nms_kernel_size" ] = NMS_KERNEL_SIZE # size of the kernel for non-max suppression params[ "nms_top_k_per_sec" ] = NMS_TOP_K_PER_SEC # keep top K highest predictions per second of audio params["target_sigma"] = 2.0 # augmentation params params[ "aug_prob" ] = 0.20 # augmentations will be performed with this probability params["augment_at_train"] = True params["augment_at_train_combine"] = True params[ "echo_max_delay" ] = 0.005 # simulate echo by adding copy of raw audio params["stretch_squeeze_delta"] = 0.04 # stretch or squeeze spec params[ "mask_max_time_perc" ] = 0.05 # max mask size - here percentage, not ideal params[ "mask_max_freq_perc" ] = 0.10 # max mask size - here percentage, not ideal params[ "spec_amp_scaling" ] = 2.0 # multiply the "volume" by 0:X times current amount params["aug_sampling_rates"] = [ 220500, 256000, 300000, 312500, 384000, 441000, 500000, ] # loss params params["train_loss"] = "focal" # mse or focal params["det_loss_weight"] = 1.0 # weight for the detection part of the loss params["size_loss_weight"] = 0.1 # weight for the bbox size loss params["class_loss_weight"] = 2.0 # weight for the classification loss params["individual_loss_weight"] = 0.0 # not used if params["individual_loss_weight"] == 0.0: params[ "emb_dim" ] = 0 # number of dimensions used for individual id embedding else: params["emb_dim"] = 3 # train params params["lr"] = 0.001 params["batch_size"] = 8 params["num_workers"] = 4 params["num_epochs"] = 200 params["num_eval_epochs"] = 5 # run evaluation every X epochs params["device"] = "cuda" params["save_test_image_during_train"] = False params["save_test_image_after_train"] = True params["convert_to_genus"] = False params["genus_mapping"] = [] params["class_names"] = [] params["classes_to_ignore"] = ["", " ", "Unknown", "Not Bat"] params["generic_class"] = ["Bat"] params["events_of_interest"] = [ "Echolocation" ] # will ignore all other types of events e.g. social calls # the classes in this list are standardized during training so that the same low and high freq are used params["standardize_classs_names"] = [] # create directories if make_dirs: print("Model name : " + params["model_name"]) print("Model file : " + params["model_file_name"]) print("Experiment : " + params["experiment"]) mk_dir(params["experiment"]) if params["save_test_image_during_train"]: mk_dir(params["op_im_dir"]) if params["save_test_image_after_train"]: mk_dir(params["op_im_dir_test"]) mk_dir(os.path.dirname(params["model_file_name"])) return params