From: Marcel van der Veldt <m.vanderveldt@outlook.com>
Date: Tue, 18 Feb 2025 00:20:06 +0000 (+0100)
Subject: Parse additional (v2.4) tags with mutagen
X-Git-Url: https://git.kitaultman.com/?a=commitdiff_plain;h=c597dc391b57ab332cbff82e7c667d9a70c8f32e;p=music-assistant-server.git

Parse additional (v2.4) tags with mutagen

solves issues with multi-value tags
---

diff --git a/music_assistant/helpers/tags.py b/music_assistant/helpers/tags.py
index cd802845..fd55969b 100644
--- a/music_assistant/helpers/tags.py
+++ b/music_assistant/helpers/tags.py
@@ -13,7 +13,7 @@ from dataclasses import dataclass
 from json import JSONDecodeError
 from typing import Any
 
-import eyed3
+import mutagen
 from music_assistant_models.enums import AlbumType
 from music_assistant_models.errors import InvalidDataError
 
@@ -24,9 +24,6 @@ from music_assistant.helpers.util import try_parse_int
 
 LOGGER = logging.getLogger(f"{MASS_LOGGER_NAME}.tags")
 
-# silence the eyed3 logger because it is too verbose
-logging.getLogger("eyed3").setLevel(logging.ERROR)
-
 
 # the only multi-item splitter we accept is the semicolon,
 # which is also the default in Musicbrainz Picard.
@@ -427,7 +424,7 @@ async def async_parse_tags(input_file: str, file_size: int | None = None) -> Aud
     return await asyncio.to_thread(parse_tags, input_file, file_size)
 
 
-def parse_tags(input_file: str, file_size: int | None = None) -> AudioTags:  # noqa: PLR0915
+def parse_tags(input_file: str, file_size: int | None = None) -> AudioTags:
     """
     Parse tags from a media file (or URL). NOT Async friendly.
 
@@ -466,52 +463,12 @@ def parse_tags(input_file: str, file_size: int | None = None) -> AudioTags:  # n
         if not tags.duration and tags.raw.get("format", {}).get("duration"):
             tags.duration = float(tags.raw["format"]["duration"])
 
-        if (
-            not input_file.startswith("http")
-            and input_file.endswith(".mp3")
-            and os.path.isfile(input_file)
-        ):
-            # eyed3 is able to extract the musicbrainzrecordingid from the unique file id
-            # this is actually a bug in ffmpeg/ffprobe which does not expose this tag
-            # so we use this as alternative approach for mp3 files
-            # TODO: Convert all the tag reading to Mutagen!
-            audiofile = eyed3.load(input_file)
-            if audiofile is not None and audiofile.tag is not None:
-                for uf_id in audiofile.tag.unique_file_ids:
-                    if uf_id.owner_id == b"http://musicbrainz.org" and uf_id.uniq_id:
-                        tags.tags["musicbrainzrecordingid"] = uf_id.uniq_id.decode()
-                        break
-                if audiofile.tag.version == (2, 4, 0):
-                    # ffmpeg messes up reading ID3v2.4 tags from mp3 files, especially
-                    # on multi-item tags. We need to read the TXXX frames manually
-                    if frameset := audiofile.tag.frame_set.get(b"TXXX"):
-                        for raw_tag in frameset:
-                            if not hasattr(raw_tag, "description"):
-                                continue
-                            if raw_tag.description.upper() == "ARTISTS":
-                                tags.tags["artists"] = raw_tag.text.split("\x00\ufeff")
-                            if raw_tag.description == "MusicBrainz Artist Id":
-                                tags.tags["musicbrainzartistid"] = raw_tag.text.split("\x00\ufeff")
-                            if raw_tag.description == "MusicBrainz Album Artist Id":
-                                tags.tags["musicbrainzalbumartistid"] = raw_tag.text.split(
-                                    "\x00\ufeff"
-                                )
-                    if frameset := audiofile.tag.frame_set.get(b"TSOP"):
-                        for raw_tag in frameset:
-                            if not hasattr(raw_tag, "text"):
-                                continue
-                            tags.tags["artistsort"] = raw_tag.text.split("\x00\ufeff")
-                    if frameset := audiofile.tag.frame_set.get(b"TSO2"):
-                        for raw_tag in frameset:
-                            if not hasattr(raw_tag, "text"):
-                                continue
-                            tags.tags["albumartistsort"] = raw_tag.text.split("\x00\ufeff")
-                    if frameset := audiofile.tag.frame_set.get(b"TCON"):
-                        for raw_tag in frameset:
-                            if not hasattr(raw_tag, "text"):
-                                continue
-                            tags.tags["genre"] = raw_tag.text.split("\x00\ufeff")
-            del audiofile
+        # we parse all (basic) tags for all file formats using ffmpeg
+        # but we also try to extract some extra tags for local files using mutagen
+        if not input_file.startswith("http") and os.path.isfile(input_file):
+            extra_tags = parse_tags_mutagen(input_file)
+            if extra_tags:
+                tags.tags.update(extra_tags)
         return tags
     except subprocess.CalledProcessError as err:
         error_msg = f"Unable to retrieve info for {input_file}"
@@ -525,6 +482,64 @@ def parse_tags(input_file: str, file_size: int | None = None) -> AudioTags:  # n
         raise InvalidDataError(msg) from err
 
 
+def parse_tags_mutagen(input_file: str) -> dict[str, Any]:
+    """
+    Parse tags from an audio file using Mutagen.
+
+    NOT Async friendly.
+    """
+    result = {}
+    try:
+        # TODO: extend with more tags and file types!
+        tags = mutagen.File(input_file)
+        if tags is None:
+            return result
+        tags = dict(tags.tags)
+        # ID3 tags
+        if "TIT2" in tags:
+            result["title"] = tags["TIT2"].text[0]
+        if "TPE1" in tags:
+            result["artist"] = tags["TPE1"].text[0]
+        if "TPE2" in tags:
+            result["albumartist"] = tags["TPE2"].text[0]
+        if "TALB" in tags:
+            result["album"] = tags["TALB"].text[0]
+        if "TCON" in tags:
+            result["genre"] = tags["TCON"].text
+        if "TXXX:ARTISTS" in tags:
+            result["artists"] = tags["TXXX:ARTISTS"].text
+        if "TXXX:MusicBrainz Album Id" in tags:
+            result["musicbrainzalbumid"] = tags["TXXX:MusicBrainz Album Id"].text[0]
+        if "TXXX:MusicBrainz Album Artist Id" in tags:
+            result["musicbrainzalbumartistid"] = tags["TXXX:MusicBrainz Album Artist Id"].text
+        if "TXXX:MusicBrainz Artist Id" in tags:
+            result["musicbrainzartistid"] = tags["TXXX:MusicBrainz Artist Id"].text
+        if "TXXX:MusicBrainz Release Group Id" in tags:
+            result["musicbrainzreleasegroupid"] = tags["TXXX:MusicBrainz Release Group Id"].text[0]
+        if "UFID:http://musicbrainz.org" in tags:
+            result["musicbrainzrecordingid"] = tags["UFID:http://musicbrainz.org"].data.decode()
+        if "TXXX:MusicBrainz Track Id" in tags:
+            result["musicbrainztrackid"] = tags["TXXX:MusicBrainz Track Id"].text[0]
+        if "TXXX:BARCODE" in tags:
+            result["barcode"] = tags["TXXX:BARCODE"].text
+        if "TXXX:TSRC" in tags:
+            result["tsrc"] = tags["TXXX:TSRC"].text
+        if "TSOP" in tags:
+            result["artistsort"] = tags["TSOP"].text
+        if "TSO2" in tags:
+            result["albumartistsort"] = tags["TSO2"].text
+        if "TSOT" in tags:
+            result["titlesort"] = tags["TSOT"].text
+        if "TSOA" in tags:
+            result["albumsort"] = tags["TSOA"].text
+
+        del tags
+        return result
+    except Exception as err:
+        LOGGER.debug(f"Error parsing mutagen tags for {input_file}: {err}")
+        return result
+
+
 async def get_embedded_image(input_file: str) -> bytes | None:
     """Return embedded image data.
 
diff --git a/pyproject.toml b/pyproject.toml
index e5c6abc4..4dc657ad 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,13 +19,13 @@ dependencies = [
   "certifi==2025.1.31",
   "colorlog==6.9.0",
   "cryptography==44.0.1",
-  "eyeD3==0.9.7",
   "faust-cchardet>=2.1.18",
   "ifaddr==0.2.0",
   "mashumaro==3.15",
   "memory-tempfile==2.2.3",
   "music-assistant-frontend==2.11.5",
   "music-assistant-models==1.1.27",
+  "mutagen==1.47.0",
   "orjson==3.10.12",
   "pillow==11.1.0",
   "podcastparser==0.6.10",
diff --git a/requirements_all.txt b/requirements_all.txt
index 126f5924..d85cd324 100644
--- a/requirements_all.txt
+++ b/requirements_all.txt
@@ -19,7 +19,6 @@ cryptography==44.0.1
 deezer-python-async==0.3.0
 defusedxml==0.7.1
 duration-parser==1.0.1
-eyeD3==0.9.7
 faust-cchardet>=2.1.18
 hass-client==1.2.0
 ibroadcastaio==0.4.0
@@ -28,6 +27,7 @@ mashumaro==3.15
 memory-tempfile==2.2.3
 music-assistant-frontend==2.11.5
 music-assistant-models==1.1.27
+mutagen==1.47.0
 orjson==3.10.12
 pillow==11.1.0
 pkce==1.0.3