Added load_audio_data() which returns the original sample rate. Changed load_audio() implementation so that it uses load_audio_data but retains its signature. du.process_file() now does not need to call get_samplerate

This commit is contained in:
Kavi 2025-02-27 08:10:27 +01:00
parent f62bc99ab2
commit 230b6167bc
3 changed files with 38 additions and 54 deletions

View File

@ -17,6 +17,7 @@ from . import wavfile
__all__ = [
"load_audio",
"load_audio_data",
"generate_spectrogram",
"pad_audio",
]
@ -145,16 +146,6 @@ def generate_spectrogram(
return spec, spec_for_viz
def get_samplerate(
path: Union[
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
]):
if isinstance(path, (BinaryIO, io.BytesIO)):
path.seek(0)
with sf.SoundFile(path) as f:
return f.samplerate
def load_audio(
path: Union[
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
@ -169,6 +160,37 @@ def load_audio(
The audio is also scaled to [-1, 1] and clipped to the maximum duration.
Only mono files are supported.
Args:
path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
target_samp_rate (int): Target sampling rate.
scale (bool): Whether to scale the audio to [-1, 1].
max_duration (float): Maximum duration of the audio in seconds.
Returns:
sampling_rate: The sampling rate of the audio.
audio_raw: The audio signal in a numpy array.
Raises:
ValueError: If the audio file is stereo.
"""
sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration)
return sample_rate, audio_data
def load_audio_data(
path: Union[
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
],
time_exp_fact: float,
target_samp_rate: int,
scale: bool = False,
max_duration: Optional[float] = None,
) -> Tuple[int, np.ndarray, int | float]:
"""Load an audio file and resample it to the target sampling rate.
The audio is also scaled to [-1, 1] and clipped to the maximum duration.
Only mono files are supported.
Args:
path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
target_samp_rate (int): Target sampling rate.
@ -223,7 +245,7 @@ def load_audio(
audio_raw = audio_raw - audio_raw.mean()
audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6)
return sampling_rate, audio_raw
return sampling_rate, audio_raw, file_sampling_rate
def compute_spectrogram_width(

View File

@ -773,14 +773,13 @@ def process_file(
spec_slices = []
# load audio file
sampling_rate, audio_full = au.load_audio(
sampling_rate, audio_full, file_samp_rate = au.load_audio_data(
path,
time_exp_fact=config.get("time_expansion", 1) or 1,
target_samp_rate=config["target_samp_rate"],
scale=config["scale_raw_audio"],
max_duration=config.get("max_duration"),
)
file_samp_rate = au.get_samplerate(path)
orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)

View File

@ -137,57 +137,20 @@ def test_pad_audio_with_fixed_width(duration: float, width: int):
resize_factor=params["resize_factor"],
)
assert expected_width == width
def test_get_samplerate_using_bytesio():
with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f:
audio_bytes = io.BytesIO(f.read())
sample_rate = audio_utils.get_samplerate(audio_bytes)
expected_sample_rate = 500000
assert expected_sample_rate == sample_rate
def test_load_audio_using_bytes():
filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
with open(filename, "rb") as f:
audio_bytes = io.BytesIO(f.read())
sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
assert expected_sample_rate == sample_rate
assert np.array_equal(audio_data, expected_audio_data)
def test_get_samplerate_using_bytesio_2():
basename = "20230322_172000_selec2.wav"
path = os.path.join(DATA_DIR, basename)
with open(path, "rb") as f:
audio_bytes = io.BytesIO(f.read())
sample_rate = audio_utils.get_samplerate(audio_bytes)
expected_sample_rate = 192_000
assert expected_sample_rate == sample_rate
def test_load_audio_using_bytes_2():
def test_load_audio_using_bytesio():
basename = "20230322_172000_selec2.wav"
path = os.path.join(DATA_DIR, basename)
with open(path, "rb") as f:
data = io.BytesIO(f.read())
sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
assert expected_sample_rate == sample_rate
assert exp_file_sample_rate == file_sample_rate
assert np.array_equal(audio_data, expected_audio_data)