batdetect2/bat_detect/detector/parameters.py

import datetime
import os

import numpy as np


def mk_dir(path):
    if not os.path.isdir(path):
        os.makedirs(path)


def get_params(make_dirs=False, exps_dir="../../experiments/"):
    params = {}

    params[
        "model_name"
    ] = "Net2DFast"  # Net2DFast, Net2DSkip, Net2DSimple, Net2DSkipDS, Net2DRN
    params["num_filters"] = 128

    now_str = datetime.datetime.now().strftime("%Y_%m_%d__%H_%M_%S")
    model_name = now_str + ".pth.tar"
    params["experiment"] = os.path.join(exps_dir, now_str, "")
    params["model_file_name"] = os.path.join(params["experiment"], model_name)
    params["op_im_dir"] = os.path.join(params["experiment"], "op_ims", "")
    params["op_im_dir_test"] = os.path.join(
        params["experiment"], "op_ims_test", ""
    )
    # params['notes']           = ''  # can save notes about an experiment here

    # spec parameters
    params[
        "target_samp_rate"
    ] = 256000  # resamples all audio so that it is at this rate
    params["fft_win_length"] = (
        512 / 256000.0
    )  # in milliseconds, amount of time per stft time step
    params["fft_overlap"] = 0.75  # stft window overlap

    params[
        "max_freq"
    ] = 120000  # in Hz, everything above this will be discarded
    params[
        "min_freq"
    ] = 10000  # in Hz, everything below this will be discarded

    params[
        "resize_factor"
    ] = 0.5  # resize so the spectrogram at the input of the network
    params[
        "spec_height"
    ] = 256  # units are number of frequency bins (before resizing is performed)
    params[
        "spec_train_width"
    ] = 512  # units are number of time steps (before resizing is performed)
    params[
        "spec_divide_factor"
    ] = 32  # spectrogram should be divisible by this amount in width and height

    # spec processing params
    params[
        "denoise_spec_avg"
    ] = True  # removes the mean for each frequency band
    params["scale_raw_audio"] = False  # scales the raw audio to [-1, 1]
    params[
        "max_scale_spec"
    ] = False  # scales the spectrogram so that it is max 1
    params["spec_scale"] = "pcen"  # 'log', 'pcen', 'none'

    # detection params
    params[
        "detection_overlap"
    ] = 0.01  # has to be within this number of ms to count as detection
    params[
        "ignore_start_end"
    ] = 0.01  # if start of GT calls are within this time from the start/end of file ignore
    params[
        "detection_threshold"
    ] = 0.01  # the smaller this is the better the recall will be
    params["nms_kernel_size"] = 9
    params[
        "nms_top_k_per_sec"
    ] = 200  # keep top K highest predictions per second of audio
    params["target_sigma"] = 2.0

    # augmentation params
    params[
        "aug_prob"
    ] = 0.20  # augmentations will be performed with this probability
    params["augment_at_train"] = True
    params["augment_at_train_combine"] = True
    params[
        "echo_max_delay"
    ] = 0.005  # simulate echo by adding copy of raw audio
    params["stretch_squeeze_delta"] = 0.04  # stretch or squeeze spec
    params[
        "mask_max_time_perc"
    ] = 0.05  # max mask size - here percentage, not ideal
    params[
        "mask_max_freq_perc"
    ] = 0.10  # max mask size - here percentage, not ideal
    params[
        "spec_amp_scaling"
    ] = 2.0  # multiply the "volume" by 0:X times current amount
    params["aug_sampling_rates"] = [
        220500,
        256000,
        300000,
        312500,
        384000,
        441000,
        500000,
    ]

    # loss params
    params["train_loss"] = "focal"  # mse or focal
    params[
        "det_loss_weight"
    ] = 1.0  # weight for the detection part of the loss
    params["size_loss_weight"] = 0.1  # weight for the bbox size loss
    params["class_loss_weight"] = 2.0  # weight for the classification loss
    params["individual_loss_weight"] = 0.0  # not used
    if params["individual_loss_weight"] == 0.0:
        params[
            "emb_dim"
        ] = 0  # number of dimensions used for individual id embedding
    else:
        params["emb_dim"] = 3

    # train params
    params["lr"] = 0.001
    params["batch_size"] = 8
    params["num_workers"] = 4
    params["num_epochs"] = 200
    params["num_eval_epochs"] = 5  # run evaluation every X epochs
    params["device"] = "cuda"
    params["save_test_image_during_train"] = False
    params["save_test_image_after_train"] = True

    params["convert_to_genus"] = False
    params["genus_mapping"] = []
    params["class_names"] = []
    params["classes_to_ignore"] = ["", " ", "Unknown", "Not Bat"]
    params["generic_class"] = ["Bat"]
    params["events_of_interest"] = [
        "Echolocation"
    ]  # will ignore all other types of events e.g. social calls

    # the classes in this list are standardized during training so that the same low and high freq are used
    params["standardize_classs_names"] = []

    # create directories
    if make_dirs:
        print("Model name : " + params["model_name"])
        print("Model file : " + params["model_file_name"])
        print("Experiment : " + params["experiment"])

        mk_dir(params["experiment"])
        if params["save_test_image_during_train"]:
            mk_dir(params["op_im_dir"])
        if params["save_test_image_after_train"]:
            mk_dir(params["op_im_dir_test"])
        mk_dir(os.path.dirname(params["model_file_name"]))

    return params