Add AAC/M4A support to tag parsing (#3004)
authorOzGav <gavnosp@hotmail.com>
Mon, 26 Jan 2026 11:39:22 +0000 (22:39 +1100)
committerGitHub <noreply@github.com>
Mon, 26 Jan 2026 11:39:22 +0000 (12:39 +0100)
Fix AAC/M4A tag parsing in parse_tags_mutagen

Add comprehensive MP4/AAC tag parsing support to the mutagen fallback parser.
Previously only ID3 tags were handled, leaving M4A/AAC files with missing metadata.

Changes:
- Add _parse_mp4_tags() function to handle MP4Tags format
- Support basic tags: title, artist, album, albumartist, genre
- Support sort tags: titlesort, artistsort, albumsort, albumartistsort
- Support MusicBrainz IDs via freeform tags (----:com.apple.iTunes:*)
- Support track/disc numbers with totals stored separately
- Support date, lyrics, compilation flag, album type, and ReplayGain tags
- Add helper functions for decoding MP4 freeform tags (bytes to strings)
- Add M4A test fixture and test_parse_metadata_from_mp4tags test

Co-authored-by: Claude <noreply@anthropic.com>
music_assistant/helpers/tags.py
pyproject.toml
tests/core/test_tags.py
tests/fixtures/MyArtist - MyTitle.m4a [new file with mode: 0644]

index 9c88174af8012ea162b804f3d48d02ae0b951ec1..2623251ad34669f19cd3a1e46e5773adbd7776fe 100644 (file)
@@ -1,4 +1,4 @@
-"""Helpers/utilities to parse ID3 tags from audio files with ffmpeg."""
+"""Helpers/utilities to parse tags from audio files with ffmpeg and mutagen."""
 
 from __future__ import annotations
 
@@ -16,6 +16,7 @@ from typing import Any
 import mutagen
 from music_assistant_models.enums import AlbumType
 from music_assistant_models.errors import InvalidDataError
+from mutagen.mp4 import MP4Tags
 
 from music_assistant.constants import MASS_LOGGER_NAME, UNKNOWN_ARTIST
 from music_assistant.helpers.json import json_loads
@@ -603,59 +604,231 @@ def get_file_duration(input_file: str) -> float:
         raise InvalidDataError(error_msg) from err
 
 
-def parse_tags_mutagen(input_file: str) -> dict[str, Any]:
+def _decode_mp4_freeform_single(values: list[Any]) -> str:
+    """Decode a single-value MP4 freeform tag (bytes to string).
+
+    :param values: List of MP4FreeForm values (typically contains one item).
     """
-    Parse tags from an audio file using Mutagen.
+    if not values:
+        return ""
+    val = values[0]
+    if isinstance(val, bytes):
+        return val.decode("utf-8", errors="replace")
+    return str(val)
 
-    NOT Async friendly.
+
+def _decode_mp4_freeform_list(values: list[Any]) -> list[str]:
+    """Decode a multi-value MP4 freeform tag (bytes to strings).
+
+    :param values: List of MP4FreeForm values.
+    """
+    result = []
+    for val in values:
+        if isinstance(val, bytes):
+            result.append(val.decode("utf-8", errors="replace"))
+        else:
+            result.append(str(val))
+    return result
+
+
+def _parse_mp4_tags(tags: MP4Tags) -> dict[str, Any]:  # noqa: PLR0915
+    """Parse MP4/M4A/AAC tags from mutagen MP4Tags object.
+
+    :param tags: The MP4Tags object from mutagen.
+    """
+    result: dict[str, Any] = {}
+
+    # Basic text tags (single value - extract first element)
+    if "©nam" in tags:
+        result["title"] = tags["©nam"][0]
+    if "©ART" in tags:
+        result["artist"] = tags["©ART"][0]
+    if "aART" in tags:
+        result["albumartist"] = tags["aART"][0]
+    if "©alb" in tags:
+        result["album"] = tags["©alb"][0]
+
+    # Genre can have multiple values
+    if "©gen" in tags:
+        result["genre"] = list(tags["©gen"])
+
+    # Sort tags (single value)
+    if "sonm" in tags:
+        result["titlesort"] = tags["sonm"][0]
+    if "soal" in tags:
+        result["albumsort"] = tags["soal"][0]
+    # Sort tags (multi-value to match ID3 behavior)
+    if "soar" in tags:
+        result["artistsort"] = list(tags["soar"])
+    if "soaa" in tags:
+        result["albumartistsort"] = list(tags["soaa"])
+
+    # iTunes freeform tags for MusicBrainz IDs
+    # Single value tags
+    if "----:com.apple.iTunes:MusicBrainz Album Id" in tags:
+        result["musicbrainzalbumid"] = _decode_mp4_freeform_single(
+            tags["----:com.apple.iTunes:MusicBrainz Album Id"]
+        )
+    if "----:com.apple.iTunes:MusicBrainz Release Group Id" in tags:
+        result["musicbrainzreleasegroupid"] = _decode_mp4_freeform_single(
+            tags["----:com.apple.iTunes:MusicBrainz Release Group Id"]
+        )
+    if "----:com.apple.iTunes:MusicBrainz Track Id" in tags:
+        result["musicbrainztrackid"] = _decode_mp4_freeform_single(
+            tags["----:com.apple.iTunes:MusicBrainz Track Id"]
+        )
+    if "----:com.apple.iTunes:MusicBrainz Recording Id" in tags:
+        result["musicbrainzrecordingid"] = _decode_mp4_freeform_single(
+            tags["----:com.apple.iTunes:MusicBrainz Recording Id"]
+        )
+
+    # Multi-value tags (return as list to match ID3 behavior)
+    if "----:com.apple.iTunes:MusicBrainz Artist Id" in tags:
+        result["musicbrainzartistid"] = _decode_mp4_freeform_list(
+            tags["----:com.apple.iTunes:MusicBrainz Artist Id"]
+        )
+    if "----:com.apple.iTunes:MusicBrainz Album Artist Id" in tags:
+        result["musicbrainzalbumartistid"] = _decode_mp4_freeform_list(
+            tags["----:com.apple.iTunes:MusicBrainz Album Artist Id"]
+        )
+
+    # Additional freeform tags
+    if "----:com.apple.iTunes:ARTISTS" in tags:
+        result["artists"] = _decode_mp4_freeform_list(tags["----:com.apple.iTunes:ARTISTS"])
+    if "----:com.apple.iTunes:BARCODE" in tags:
+        result["barcode"] = _decode_mp4_freeform_list(tags["----:com.apple.iTunes:BARCODE"])
+    if "----:com.apple.iTunes:ISRC" in tags:
+        result["tsrc"] = _decode_mp4_freeform_list(tags["----:com.apple.iTunes:ISRC"])
+
+    # Track and disc numbers (MP4 stores as tuple: [(number, total)])
+    # NOTE: type ignores needed because MP4Tags from mutagen uses an untyped get() internally
+    if tags.get("trkn"):  # type: ignore[no-untyped-call]
+        track_info = tags["trkn"][0]
+        if track_info[0]:
+            result["track"] = str(track_info[0])
+        if len(track_info) > 1 and track_info[1]:
+            result["tracktotal"] = str(track_info[1])
+    if tags.get("disk"):  # type: ignore[no-untyped-call]
+        disc_info = tags["disk"][0]
+        if disc_info[0]:
+            result["disc"] = str(disc_info[0])
+        if len(disc_info) > 1 and disc_info[1]:
+            result["disctotal"] = str(disc_info[1])
+
+    # Date
+    if "©day" in tags:
+        result["date"] = tags["©day"][0]
+
+    # Lyrics
+    if "©lyr" in tags:
+        result["lyrics"] = tags["©lyr"][0]
+
+    # Compilation flag
+    if tags.get("cpil"):  # type: ignore[no-untyped-call]
+        result["compilation"] = "1" if tags["cpil"] else "0"
+
+    # Album type (MusicBrainz)
+    if "----:com.apple.iTunes:MusicBrainz Album Type" in tags:
+        result["musicbrainzalbumtype"] = _decode_mp4_freeform_single(
+            tags["----:com.apple.iTunes:MusicBrainz Album Type"]
+        )
+
+    # ReplayGain tags
+    if "----:com.apple.iTunes:REPLAYGAIN_TRACK_GAIN" in tags:
+        result["replaygaintrackgain"] = _decode_mp4_freeform_single(
+            tags["----:com.apple.iTunes:REPLAYGAIN_TRACK_GAIN"]
+        )
+    if "----:com.apple.iTunes:REPLAYGAIN_ALBUM_GAIN" in tags:
+        result["replaygainalbumgain"] = _decode_mp4_freeform_single(
+            tags["----:com.apple.iTunes:REPLAYGAIN_ALBUM_GAIN"]
+        )
+
+    return result
+
+
+def _parse_id3_tags(tags: dict[str, Any]) -> dict[str, Any]:
+    """Parse ID3 tags (MP3 files) from mutagen tags dict.
+
+    :param tags: Dictionary of ID3 tags from mutagen.
+    """
+    result: dict[str, Any] = {}
+
+    # Basic tags (single value)
+    if "TIT2" in tags:
+        result["title"] = tags["TIT2"].text[0]
+    if "TPE1" in tags:
+        result["artist"] = tags["TPE1"].text[0]
+    if "TPE2" in tags:
+        result["albumartist"] = tags["TPE2"].text[0]
+    if "TALB" in tags:
+        result["album"] = tags["TALB"].text[0]
+
+    # Genre (multi-value)
+    if "TCON" in tags:
+        result["genre"] = tags["TCON"].text
+
+    # Multi-value artist tag
+    if "TXXX:ARTISTS" in tags:
+        result["artists"] = tags["TXXX:ARTISTS"].text
+
+    # MusicBrainz tags (single value)
+    if "TXXX:MusicBrainz Album Id" in tags:
+        result["musicbrainzalbumid"] = tags["TXXX:MusicBrainz Album Id"].text[0]
+    if "TXXX:MusicBrainz Release Group Id" in tags:
+        result["musicbrainzreleasegroupid"] = tags["TXXX:MusicBrainz Release Group Id"].text[0]
+    if "UFID:http://musicbrainz.org" in tags:
+        result["musicbrainzrecordingid"] = tags["UFID:http://musicbrainz.org"].data.decode()
+    if "TXXX:MusicBrainz Track Id" in tags:
+        result["musicbrainztrackid"] = tags["TXXX:MusicBrainz Track Id"].text[0]
+
+    # MusicBrainz tags (multi-value)
+    if "TXXX:MusicBrainz Album Artist Id" in tags:
+        result["musicbrainzalbumartistid"] = tags["TXXX:MusicBrainz Album Artist Id"].text
+    if "TXXX:MusicBrainz Artist Id" in tags:
+        result["musicbrainzartistid"] = tags["TXXX:MusicBrainz Artist Id"].text
+
+    # Additional tags
+    if "TXXX:BARCODE" in tags:
+        result["barcode"] = tags["TXXX:BARCODE"].text
+    if "TXXX:TSRC" in tags:
+        result["tsrc"] = tags["TXXX:TSRC"].text
+
+    # Sort tags (multi-value to support multiple artists)
+    if "TSOP" in tags:
+        result["artistsort"] = tags["TSOP"].text
+    if "TSO2" in tags:
+        result["albumartistsort"] = tags["TSO2"].text
+
+    # Sort tags (single value)
+    if tags.get("TSOT"):
+        result["titlesort"] = tags["TSOT"].text[0]
+    if tags.get("TSOA"):
+        result["albumsort"] = tags["TSOA"].text[0]
+
+    return result
+
+
+def parse_tags_mutagen(input_file: str) -> dict[str, Any]:
+    """Parse tags from an audio file using Mutagen.
+
+    Supports ID3 tags (MP3) and MP4 tags (AAC/M4A/ALAC).
+
+    :param input_file: Path to the audio file.
     """
     result: dict[str, Any] = {}
     try:
-        # TODO: extend with more tags and file types!
-        # https://mutagen.readthedocs.io/en/latest/user/gettingstarted.html
-        tags = mutagen.File(input_file)  # type: ignore[attr-defined]
-        if tags is None or not tags.tags:
+        audio = mutagen.File(input_file)  # type: ignore[attr-defined]
+        if audio is None or not audio.tags:
             return result
-        tags = dict(tags.tags)
-        # ID3 tags
-        if "TIT2" in tags:
-            result["title"] = tags["TIT2"].text[0]
-        if "TPE1" in tags:
-            result["artist"] = tags["TPE1"].text[0]
-        if "TPE2" in tags:
-            result["albumartist"] = tags["TPE2"].text[0]
-        if "TALB" in tags:
-            result["album"] = tags["TALB"].text[0]
-        if "TCON" in tags:
-            result["genre"] = tags["TCON"].text
-        if "TXXX:ARTISTS" in tags:
-            result["artists"] = tags["TXXX:ARTISTS"].text
-        if "TXXX:MusicBrainz Album Id" in tags:
-            result["musicbrainzalbumid"] = tags["TXXX:MusicBrainz Album Id"].text[0]
-        if "TXXX:MusicBrainz Album Artist Id" in tags:
-            result["musicbrainzalbumartistid"] = tags["TXXX:MusicBrainz Album Artist Id"].text
-        if "TXXX:MusicBrainz Artist Id" in tags:
-            result["musicbrainzartistid"] = tags["TXXX:MusicBrainz Artist Id"].text
-        if "TXXX:MusicBrainz Release Group Id" in tags:
-            result["musicbrainzreleasegroupid"] = tags["TXXX:MusicBrainz Release Group Id"].text[0]
-        if "UFID:http://musicbrainz.org" in tags:
-            result["musicbrainzrecordingid"] = tags["UFID:http://musicbrainz.org"].data.decode()
-        if "TXXX:MusicBrainz Track Id" in tags:
-            result["musicbrainztrackid"] = tags["TXXX:MusicBrainz Track Id"].text[0]
-        if "TXXX:BARCODE" in tags:
-            result["barcode"] = tags["TXXX:BARCODE"].text
-        if "TXXX:TSRC" in tags:
-            result["tsrc"] = tags["TXXX:TSRC"].text
-        if "TSOP" in tags:
-            result["artistsort"] = tags["TSOP"].text
-        if "TSO2" in tags:
-            result["albumartistsort"] = tags["TSO2"].text
-        if tags.get("TSOT"):
-            result["titlesort"] = tags["TSOT"].text[0]
-        if tags.get("TSOA"):
-            result["albumsort"] = tags["TSOA"].text[0]
-
-        del tags
+
+        # Check if MP4/M4A/AAC file (uses MP4Tags)
+        if isinstance(audio.tags, MP4Tags):
+            result = _parse_mp4_tags(audio.tags)
+        else:
+            # ID3 tags (MP3) and other formats
+            tags_dict = dict(audio.tags)
+            result = _parse_id3_tags(tags_dict)
+
         return result
     except Exception as err:
         LOGGER.debug(f"Error parsing mutagen tags for {input_file}: {err}")
index 677201d3e2b2754f37d192013696952d3f363f73..808f56b3e48ef6ba90c2eb3790275e707f7686bd 100644 (file)
@@ -73,7 +73,7 @@ mass = "music_assistant.__main__:main"
 [tool.codespell]
 # explicit is misspelled in the iTunes API
 # "additionals" is a fixed field name from the Nicovideo API fixtures
-ignore-words-list = "provid,hass,followings,childs,explict,additionals,commitish,"
+ignore-words-list = "provid,hass,followings,childs,explict,additionals,commitish,nam,"
 skip = """*.js,*.svg,\
 music_assistant/providers/itunes_podcasts/itunes_country_codes.json,\
 """
index 663b8eb1d514146ddc29a59073bb504edf5e5903..acd0a11bcee0b1d5c8410ba4d18cb893db045e87 100644 (file)
@@ -1,4 +1,4 @@
-"""Tests for parsing ID3 tags functions."""
+"""Tests for parsing audio file tags (ID3, MP4/AAC, etc.)."""
 
 import pathlib
 
@@ -7,7 +7,8 @@ from music_assistant.helpers import tags
 
 RESOURCES_DIR = pathlib.Path(__file__).parent.parent.resolve().joinpath("fixtures")
 
-FILE_1 = str(RESOURCES_DIR.joinpath("MyArtist - MyTitle.mp3"))
+FILE_MP3 = str(RESOURCES_DIR.joinpath("MyArtist - MyTitle.mp3"))
+FILE_M4A = str(RESOURCES_DIR.joinpath("MyArtist - MyTitle.m4a"))
 
 
 async def test_parse_metadata_from_id3tags() -> None:
@@ -44,6 +45,34 @@ async def test_parse_metadata_from_id3tags() -> None:
     assert _tags.year is None
 
 
+async def test_parse_metadata_from_mp4tags() -> None:
+    """Test parsing of metadata from MP4/AAC tags."""
+    filename = FILE_M4A
+    _tags = await tags.async_parse_tags(filename)
+    assert _tags.album == "MyAlbum"
+    assert _tags.title == "MyTitle"
+    assert _tags.album_artists == ("MyArtist",)
+    assert _tags.artists == ("MyArtist", "MyArtist2")
+    assert _tags.genres == ("Genre1", "Genre2")
+    assert _tags.musicbrainz_albumartistids == ("abcdefg",)
+    assert _tags.musicbrainz_artistids == ("abcdefg",)
+    assert _tags.musicbrainz_releasegroupid == "abcdefg"
+    assert _tags.musicbrainz_recordingid == "abcdefg"
+    # test track/disc from MP4 tuples
+    assert _tags.track == 5
+    assert _tags.disc == 1
+    # test total track/disc
+    assert _tags.tags.get("tracktotal") == "12"
+    assert _tags.tags.get("disctotal") == "2"
+    # test year
+    assert _tags.year == 2022
+    # test sort tags (artistsort/albumartistsort returned as lists to match ID3 behavior)
+    assert _tags.tags.get("titlesort") == "MyTitle Sort"
+    assert _tags.tags.get("artistsort") == ["MyArtist Sort"]  # type: ignore[comparison-overlap]
+    assert _tags.tags.get("albumsort") == "MyAlbum Sort"
+    assert _tags.tags.get("albumartistsort") == ["MyAlbumArtist Sort"]  # type: ignore[comparison-overlap]
+
+
 async def test_parse_metadata_from_filename() -> None:
     """Test parsing of parsing metadata from filename."""
     filename = str(RESOURCES_DIR.joinpath("MyArtist - MyTitle without Tags.mp3"))
diff --git a/tests/fixtures/MyArtist - MyTitle.m4a b/tests/fixtures/MyArtist - MyTitle.m4a
new file mode 100644 (file)
index 0000000..52a6758
Binary files /dev/null and b/tests/fixtures/MyArtist - MyTitle.m4a differ