Fix 'unknown' id by providing a _generate_id() function.

This commit is contained in:
Kavi 2025-02-26 14:11:11 +01:00
parent 838a1ade0d
commit 6af7fef316

View File

@ -33,8 +33,10 @@ from batdetect2.types import (
import audioread import audioread
import os import os
import io
import soundfile as sf import soundfile as sf
import hashlib
import uuid
__all__ = [ __all__ = [
"load_model", "load_model",
@ -832,7 +834,7 @@ def process_file(
_file_id = file_id _file_id = file_id
if _file_id is None: if _file_id is None:
_file_id = os.path.basename(path) if isinstance(path, str) else "unknown" _file_id = _generate_id(path)
# convert results to a dictionary in the right format # convert results to a dictionary in the right format
results = convert_results( results = convert_results(
@ -856,6 +858,24 @@ def process_file(
return results return results
def _generate_id(path: Union[
str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
]) -> str:
""" Generate an id based on the path.
If the path is a str or PathLike it will parsed as the basename.
This should ensure backwards compatibility with previous versions.
"""
if isinstance(path, str) or isinstance(path, os.PathLike):
return os.path.basename(path)
elif isinstance(path, (BinaryIO, io.BytesIO)):
path.seek(0)
md5 = hashlib.md5(path.read()).hexdigest()
path.seek(0)
return md5
else:
return str(uuid.uuid4())
def summarize_results(results, predictions, config): def summarize_results(results, predictions, config):
"""Print summary of results.""" """Print summary of results."""