Compare commits

...

2 Commits

3 changed files with 38 additions and 54 deletions

View File

@ -17,6 +17,7 @@ from . import wavfile
__all__ = [ __all__ = [
"load_audio", "load_audio",
"load_audio_data",
"generate_spectrogram", "generate_spectrogram",
"pad_audio", "pad_audio",
] ]
@ -145,16 +146,6 @@ def generate_spectrogram(
return spec, spec_for_viz return spec, spec_for_viz
def get_samplerate(
path: Union[
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
]):
if isinstance(path, (BinaryIO, io.BytesIO)):
path.seek(0)
with sf.SoundFile(path) as f:
return f.samplerate
def load_audio( def load_audio(
path: Union[ path: Union[
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
@ -169,6 +160,37 @@ def load_audio(
The audio is also scaled to [-1, 1] and clipped to the maximum duration. The audio is also scaled to [-1, 1] and clipped to the maximum duration.
Only mono files are supported. Only mono files are supported.
Args:
path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
target_samp_rate (int): Target sampling rate.
scale (bool): Whether to scale the audio to [-1, 1].
max_duration (float): Maximum duration of the audio in seconds.
Returns:
sampling_rate: The sampling rate of the audio.
audio_raw: The audio signal in a numpy array.
Raises:
ValueError: If the audio file is stereo.
"""
sample_rate, audio_data, _ = load_audio_data(path, time_exp_fact, target_samp_rate, scale, max_duration)
return sample_rate, audio_data
def load_audio_data(
path: Union[
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
],
time_exp_fact: float,
target_samp_rate: int,
scale: bool = False,
max_duration: Optional[float] = None,
) -> Tuple[int, np.ndarray, int | float]:
"""Load an audio file and resample it to the target sampling rate.
The audio is also scaled to [-1, 1] and clipped to the maximum duration.
Only mono files are supported.
Args: Args:
path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file. path (string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object): path to the input file.
target_samp_rate (int): Target sampling rate. target_samp_rate (int): Target sampling rate.
@ -223,7 +245,7 @@ def load_audio(
audio_raw = audio_raw - audio_raw.mean() audio_raw = audio_raw - audio_raw.mean()
audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6) audio_raw = audio_raw / (np.abs(audio_raw).max() + 10e-6)
return sampling_rate, audio_raw return sampling_rate, audio_raw, file_sampling_rate
def compute_spectrogram_width( def compute_spectrogram_width(

View File

@ -773,14 +773,13 @@ def process_file(
spec_slices = [] spec_slices = []
# load audio file # load audio file
sampling_rate, audio_full = au.load_audio( sampling_rate, audio_full, file_samp_rate = au.load_audio_data(
path, path,
time_exp_fact=config.get("time_expansion", 1) or 1, time_exp_fact=config.get("time_expansion", 1) or 1,
target_samp_rate=config["target_samp_rate"], target_samp_rate=config["target_samp_rate"],
scale=config["scale_raw_audio"], scale=config["scale_raw_audio"],
max_duration=config.get("max_duration"), max_duration=config.get("max_duration"),
) )
file_samp_rate = au.get_samplerate(path)
orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1) orig_samp_rate = file_samp_rate * (config.get("time_expansion") or 1)

View File

@ -138,56 +138,19 @@ def test_pad_audio_with_fixed_width(duration: float, width: int):
) )
assert expected_width == width assert expected_width == width
def test_get_samplerate_using_bytesio():
with open("example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav", "rb") as f:
audio_bytes = io.BytesIO(f.read())
sample_rate = audio_utils.get_samplerate(audio_bytes) def test_load_audio_using_bytesio():
expected_sample_rate = 500000
assert expected_sample_rate == sample_rate
def test_load_audio_using_bytes():
filename = "example_data/audio/20170701_213954-MYOMYS-LR_0_0.5.wav"
with open(filename, "rb") as f:
audio_bytes = io.BytesIO(f.read())
sample_rate, audio_data = audio_utils.load_audio(audio_bytes, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
expected_sample_rate, expected_audio_data = audio_utils.load_audio(filename, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
assert expected_sample_rate == sample_rate
assert np.array_equal(audio_data, expected_audio_data)
def test_get_samplerate_using_bytesio_2():
basename = "20230322_172000_selec2.wav"
path = os.path.join(DATA_DIR, basename)
with open(path, "rb") as f:
audio_bytes = io.BytesIO(f.read())
sample_rate = audio_utils.get_samplerate(audio_bytes)
expected_sample_rate = 192_000
assert expected_sample_rate == sample_rate
def test_load_audio_using_bytes_2():
basename = "20230322_172000_selec2.wav" basename = "20230322_172000_selec2.wav"
path = os.path.join(DATA_DIR, basename) path = os.path.join(DATA_DIR, basename)
with open(path, "rb") as f: with open(path, "rb") as f:
data = io.BytesIO(f.read()) data = io.BytesIO(f.read())
sample_rate, audio_data = audio_utils.load_audio(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) sample_rate, audio_data, file_sample_rate = audio_utils.load_audio_data(data, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
expected_sample_rate, expected_audio_data = audio_utils.load_audio(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ) expected_sample_rate, expected_audio_data, exp_file_sample_rate = audio_utils.load_audio_data(path, time_exp_fact=1, target_samp_rate=parameters.TARGET_SAMPLERATE_HZ)
assert expected_sample_rate == sample_rate assert expected_sample_rate == sample_rate
assert exp_file_sample_rate == file_sample_rate
assert np.array_equal(audio_data, expected_audio_data) assert np.array_equal(audio_data, expected_audio_data)