Fix: Temporary fix for parsing multi-value tags in ID3v2.4 tags
authorMarcel van der Veldt <m.vanderveldt@outlook.com>
Mon, 17 Feb 2025 19:06:09 +0000 (20:06 +0100)
committerMarcel van der Veldt <m.vanderveldt@outlook.com>
Mon, 17 Feb 2025 19:06:09 +0000 (20:06 +0100)
music_assistant/helpers/tags.py

index 987cdf115d5760fc38dcbfc1d996c70d5c6d4c6f..cd802845f607c34480ae5a339fded851962a9fa4 100644 (file)
@@ -45,7 +45,7 @@ def split_items(org_str: str, allow_unsafe_splitters: bool = False) -> tuple[str
     if org_str is None:
         return ()
     if isinstance(org_str, list):
-        return (x.strip() for x in org_str)
+        return tuple(x.strip() for x in org_str)
     org_str = org_str.strip()
     if TAG_SPLITTER in org_str:
         return clean_tuple(org_str.split(TAG_SPLITTER))
@@ -427,7 +427,7 @@ async def async_parse_tags(input_file: str, file_size: int | None = None) -> Aud
     return await asyncio.to_thread(parse_tags, input_file, file_size)
 
 
-def parse_tags(input_file: str, file_size: int | None = None) -> AudioTags:
+def parse_tags(input_file: str, file_size: int | None = None) -> AudioTags:  # noqa: PLR0915
     """
     Parse tags from a media file (or URL). NOT Async friendly.
 
@@ -469,7 +469,6 @@ def parse_tags(input_file: str, file_size: int | None = None) -> AudioTags:
         if (
             not input_file.startswith("http")
             and input_file.endswith(".mp3")
-            and "musicbrainzrecordingid" not in tags.tags
             and os.path.isfile(input_file)
         ):
             # eyed3 is able to extract the musicbrainzrecordingid from the unique file id
@@ -482,6 +481,36 @@ def parse_tags(input_file: str, file_size: int | None = None) -> AudioTags:
                     if uf_id.owner_id == b"http://musicbrainz.org" and uf_id.uniq_id:
                         tags.tags["musicbrainzrecordingid"] = uf_id.uniq_id.decode()
                         break
+                if audiofile.tag.version == (2, 4, 0):
+                    # ffmpeg messes up reading ID3v2.4 tags from mp3 files, especially
+                    # on multi-item tags. We need to read the TXXX frames manually
+                    if frameset := audiofile.tag.frame_set.get(b"TXXX"):
+                        for raw_tag in frameset:
+                            if not hasattr(raw_tag, "description"):
+                                continue
+                            if raw_tag.description.upper() == "ARTISTS":
+                                tags.tags["artists"] = raw_tag.text.split("\x00\ufeff")
+                            if raw_tag.description == "MusicBrainz Artist Id":
+                                tags.tags["musicbrainzartistid"] = raw_tag.text.split("\x00\ufeff")
+                            if raw_tag.description == "MusicBrainz Album Artist Id":
+                                tags.tags["musicbrainzalbumartistid"] = raw_tag.text.split(
+                                    "\x00\ufeff"
+                                )
+                    if frameset := audiofile.tag.frame_set.get(b"TSOP"):
+                        for raw_tag in frameset:
+                            if not hasattr(raw_tag, "text"):
+                                continue
+                            tags.tags["artistsort"] = raw_tag.text.split("\x00\ufeff")
+                    if frameset := audiofile.tag.frame_set.get(b"TSO2"):
+                        for raw_tag in frameset:
+                            if not hasattr(raw_tag, "text"):
+                                continue
+                            tags.tags["albumartistsort"] = raw_tag.text.split("\x00\ufeff")
+                    if frameset := audiofile.tag.frame_set.get(b"TCON"):
+                        for raw_tag in frameset:
+                            if not hasattr(raw_tag, "text"):
+                                continue
+                            tags.tags["genre"] = raw_tag.text.split("\x00\ufeff")
             del audiofile
         return tags
     except subprocess.CalledProcessError as err: