mirror of
https://github.com/macaodha/batdetect2.git
synced 2025-06-29 22:51:58 +02:00
Added load_audio_data() which returns the original sample rate. Changed load_audio() implementation so that it uses load_audio_data but retains its signature. du.process_file() now does not need to call get_samplerate
This commit is contained in:
parent
f62bc99ab2
commit
230b6167bc
@ -17,6 +17,7 @@ from . import wavfile
|
||||
|
||||
__all__ = [
|
||||
"load_audio",
|
||||
"load_audio_data",
|
||||
"generate_spectrogram",
|
||||
"pad_audio",
|
||||
]
|
||||
@ -145,16 +146,6 @@ def generate_spectrogram(
|
||||
|
||||
return spec, spec_for_viz
|
||||
|
||||
def get_samplerate(
|
||||
path: Union[
|
||||
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
|
||||
]):
|
||||
if isinstance(path, (BinaryIO, io.BytesIO)):
|
||||
path.seek(0)
|
||||
|
||||
with sf.SoundFile(path) as f:
|
||||
return f.samplerate
|
||||
|
||||
def load_audio(
|
||||
path: Union[
|
||||
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
|
||||
@ -169,6 +160,37 @@ def load_audio(
|
||||
The audio is also scaled to [-1, 1] and clipped to the maximum duration.
|
||||
Only mono files are supported.
|
||||
|
||||
Args:
|
||||
path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
|
||||
target_samp_rate (int): Target sampling rate.
|
||||
scale (bool): Whether to scale the audio to [-1, 1].
|
||||
max_duration (float): Maximum duration of the audio in seconds.
|
||||
|
||||
Returns:
|
||||
sampling_rate: The sampling rate of the audio.
|
||||
audio_raw: The audio signal in a numpy array.
|
||||
|
||||
Raises:
|
||||
ValueError: If the audio file is stereo.
|
||||
|
||||
"""
|
||||
sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration)
|
||||
return sample_rate, audio_data
|
||||
|
||||
def load_audio_data(
|
||||
path: Union[
|
||||
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
|
||||
],
|
||||
time_exp_fact: float,
|
||||
target_samp_rate: int,
|
||||
scale: bool = False,
|
||||
max_duration: Optional[float] = None,
|
||||
) -> Tuple[int, np.ndarray, int | float]:
|
||||
"""Load an audio file and resample it to the target sampling rate.
|
||||
|
||||
The audio is also scaled to [-1, 1] and clipped to the maximum duration.
|
||||
Only mono files are supported.
|
||||
|
||||
Args:
|
||||
path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
|
||||
target_samp_rate (int): Target sampling rate.
|
||||
@ -223,7 +245,7 @@ def load_audio(
|
||||
audio_raw = audio_raw - audio_raw.mean()
|
||||
audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6)
|
||||
|
||||
return sampling_rate, audio_raw
|
||||
return sampling_rate, audio_raw, file_sampling_rate
|
||||
|
||||
|
||||
def compute_spectrogram_width(
|
||||
|
@ -773,14 +773,13 @@ def process_file(
|
||||
spec_slices = []
|
||||
|
||||
# load audio file
|
||||
sampling_rate, audio_full = au.load_audio(
|
||||
sampling_rate, audio_full, file_samp_rate = au.load_audio_data(
|
||||
path,
|
||||
time_exp_fact=config.get("time_expansion", 1) or 1,
|
||||
target_samp_rate=config["target_samp_rate"],
|
||||
scale=config["scale_raw_audio"],
|
||||
max_duration=config.get("max_duration"),
|
||||
)
|
||||
file_samp_rate = au.get_samplerate(path)
|
||||
|
||||
orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)
|
||||
|
||||
|
@ -137,57 +137,20 @@ def test_pad_audio_with_fixed_width(duration: float, width: int):
|
||||
resize_factor=params["resize_factor"],
|
||||
)
|
||||
assert expected_width == width
|
||||
|
||||
def test_get_samplerate_using_bytesio():
|
||||
with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f:
|
||||
audio_bytes = io.BytesIO(f.read())
|
||||
|
||||
sample_rate = audio_utils.get_samplerate(audio_bytes)
|
||||
|
||||
expected_sample_rate = 500000
|
||||
assert expected_sample_rate == sample_rate
|
||||
|
||||
|
||||
|
||||
def test_load_audio_using_bytes():
|
||||
filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
|
||||
|
||||
with open(filename, "rb") as f:
|
||||
audio_bytes = io.BytesIO(f.read())
|
||||
|
||||
sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
|
||||
|
||||
expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
|
||||
|
||||
assert expected_sample_rate == sample_rate
|
||||
|
||||
assert np.array_equal(audio_data, expected_audio_data)
|
||||
|
||||
|
||||
|
||||
def test_get_samplerate_using_bytesio_2():
|
||||
basename = "20230322_172000_selec2.wav"
|
||||
path = os.path.join(DATA_DIR, basename)
|
||||
|
||||
with open(path, "rb") as f:
|
||||
audio_bytes = io.BytesIO(f.read())
|
||||
|
||||
sample_rate = audio_utils.get_samplerate(audio_bytes)
|
||||
|
||||
expected_sample_rate = 192_000
|
||||
assert expected_sample_rate == sample_rate
|
||||
|
||||
def test_load_audio_using_bytes_2():
|
||||
def test_load_audio_using_bytesio():
|
||||
basename = "20230322_172000_selec2.wav"
|
||||
path = os.path.join(DATA_DIR, basename)
|
||||
|
||||
with open(path, "rb") as f:
|
||||
data = io.BytesIO(f.read())
|
||||
|
||||
sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
|
||||
sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
|
||||
|
||||
expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
|
||||
expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
|
||||
|
||||
assert expected_sample_rate == sample_rate
|
||||
assert exp_file_sample_rate == file_sample_rate
|
||||
|
||||
assert np.array_equal(audio_data, expected_audio_data)
|
Loading…
Reference in New Issue
Block a user