return clean_tuple((org_str,))
+# Artist splitting logic:
+# When not using the multi-artist tag (ARTISTS), the artist string may contain
+# multiple artists in freeform. Featuring artists may also be included in this
+# string. We parse and separate them based on common splitter patterns.
+#
+# We use the MusicBrainz Artist ID count as a guide for how many artists to extract:
+# - 0 IDs: only split on "featuring" splitters (to capture feat. artists in DB)
+# - 1 ID: don't split at all (single artist confirmed)
+# - 2+ IDs: split on featuring first, then extra splitters until we reach the target count
+#
+# TODO: If a MusicBrainz mirror/local database was available, artist names could be
+# looked up directly using the MB Artist IDs from the tags, eliminating the need for
+# ARTISTS tag parsing or ARTIST tag splitting entirely.
+#
+# Featuring splitters - always split on these to capture featuring artists in the database
+FEATURING_SPLITTERS = [
+ " featuring ",
+ " Featuring ",
+ " feat. ",
+ " Feat. ",
+ " feat ",
+ " Feat ",
+ " duet with ",
+ " Duet With ",
+ " ft. ",
+ " Ft. ",
+ " vs. ",
+ " Vs. ",
+]
+
+# Extra splitters - only use these when we have MB ID evidence of multiple artists
+EXTRA_SPLITTERS = [" & ", ", ", " + ", " with ", " With "]
+
+
+def _split_on_featuring(item: str) -> list[str]:
+ """Split a string on featuring splitters, returns list of parts."""
+ for splitter in FEATURING_SPLITTERS:
+ if splitter in item:
+ parts = []
+ for subitem in item.split(splitter):
+ clean_item = subitem.strip()
+ if clean_item:
+ # Recursively process each part for nested featuring splitters
+ parts.extend(_split_on_featuring(clean_item))
+ return parts
+ return [item]
+
+
+def _split_to_target_count(
+ artists: list[str],
+ expected_count: int,
+ org_artists: str | tuple[str, ...],
+) -> list[str]:
+ """Split artists on extra splitters to reach expected count.
+
+ :param artists: List of artists after featuring splits.
+ :param expected_count: Target number of artists.
+ :param org_artists: Original input for logging.
+ """
+ current_artists = list(artists)
+ stopped_early = False
+
+ # Keep iterating until we reach the target or can't split anymore
+ while len(current_artists) < expected_count:
+ made_progress = False
+
+ for i, item in enumerate(current_artists):
+ if len(current_artists) >= expected_count:
+ break
+
+ for splitter in EXTRA_SPLITTERS:
+ if splitter not in item:
+ continue
+
+ parts = [p.strip() for p in item.split(splitter) if p.strip()]
+ if len(parts) <= 1:
+ continue
+
+ potential_count = len(current_artists) - 1 + len(parts)
+
+ if potential_count <= expected_count:
+ # Safe to split fully - replace item with its parts
+ current_artists = current_artists[:i] + parts + current_artists[i + 1 :]
+ made_progress = True
+ else:
+ # Splitting would exceed target - do partial split
+ needed = expected_count - len(current_artists) + 1
+ if needed >= 2:
+ new_parts = [*parts[: needed - 1], splitter.join(parts[needed - 1 :])]
+ current_artists = current_artists[:i] + new_parts + current_artists[i + 1 :]
+ made_progress = True
+ stopped_early = True
+ break # Only use first matching splitter for this item
+
+ if made_progress:
+ break # Restart the outer loop with updated list
+
+ if not made_progress:
+ break # No more splitting possible
+
+ # Remove duplicates while preserving order
+ seen: set[str] = set()
+ final_artists = []
+ for artist in current_artists:
+ if artist and artist not in seen:
+ seen.add(artist)
+ final_artists.append(artist)
+
+ if stopped_early:
+ LOGGER.warning(
+ "Artist splitting stopped early to match expected count %d: '%s'",
+ expected_count,
+ org_artists,
+ )
+ elif len(final_artists) < expected_count:
+ LOGGER.warning(
+ "Could not split artist string to match expected count %d (got %d): '%s'",
+ expected_count,
+ len(final_artists),
+ org_artists,
+ )
+
+ return final_artists
+
+
def split_artists(
- org_artists: str | tuple[str, ...], allow_extra_splitters: bool = False
+ org_artists: str | tuple[str, ...],
+ expected_count: int | None = None,
) -> tuple[str, ...]:
- """Parse all artists from a string."""
- final_artists: list[str] = []
- # when not using the multi artist tag, the artist string may contain
- # multiple artists in freeform, even featuring artists may be included in this
- # string. Try to parse the featuring artists and separate them.
- splitters = [
- " featuring ",
- " feat. ",
- " feat ",
- " duet with ",
- " with ",
- " ft. ",
- " vs. ",
- ]
- splitters += [x.title() for x in splitters]
- if allow_extra_splitters:
- splitters += [" & ", ", ", " + "]
+ """Parse artists from a string, guided by expected artist count.
+
+ :param org_artists: The artist string or tuple of strings to parse.
+ :param expected_count: Expected number of artists (typically from MB artist IDs).
+ If None or 0: only split on "featuring" splitters, no extra splitting.
+ If 1: return as-is without any splitting.
+ If > 1: split on featuring splitters first, then extra splitters to reach target.
+ """
artists = split_items(org_artists, allow_unsafe_splitters=False)
- for item in artists:
- for splitter in splitters:
- if splitter not in item:
- continue
- for subitem in item.split(splitter):
- clean_item = subitem.strip()
- if clean_item and clean_item not in final_artists:
- final_artists.append(subitem.strip())
- if not final_artists:
- # none of the extra splitters was found
+
+ # If expected_count is 1, return as-is without any splitting
+ if expected_count == 1:
return artists
- return tuple(final_artists)
+
+ # Step 1: Always split on featuring splitters
+ final_artists: list[str] = []
+ for item in artists:
+ for part in _split_on_featuring(item):
+ if part and part not in final_artists:
+ final_artists.append(part)
+
+ # Step 2: If no expected_count or already at/above target, we're done
+ if not expected_count or expected_count <= 1 or len(final_artists) >= expected_count:
+ return tuple(final_artists) if final_artists else artists
+
+ # Step 3: Need more artists - split on extra splitters to reach expected_count
+ final_artists = _split_to_target_count(final_artists, expected_count, org_artists)
+
+ return tuple(final_artists) if final_artists else artists
@dataclass
@property
def artists(self) -> tuple[str, ...]:
"""Return track artists."""
- # prefer multi-artist tag
+ # prefer multi-artist tag (ARTISTS plural)
if tag := self.tags.get("artists"):
- return split_items(tag)
+ artists = split_items(tag)
+ # Warn if ARTISTS tag count doesn't match MB Artist ID count
+ mb_id_count = len(self.musicbrainz_artistids)
+ if mb_id_count and mb_id_count != len(artists):
+ LOGGER.warning(
+ "ARTISTS tag count (%d) does not match MusicBrainz Artist ID count (%d): %s",
+ len(artists),
+ mb_id_count,
+ tag,
+ )
+ return artists
# fallback to regular artist string
if tag := self.tags.get("artist"):
if TAG_SPLITTER in tag:
return split_items(tag)
- if len(self.musicbrainz_artistids) > 1:
- # special case: artist noted as 2 artists with ampersand or other splitter
- # but with 2 mb ids so they should be treated as 2 artists
- # example: John Travolta & Olivia Newton John on the Grease album
- return split_artists(tag, allow_extra_splitters=True)
-
- # Check if we have evidence of a SINGLE artist (should NOT split)
- has_single_mb_id = len(self.musicbrainz_artistids) == 1
- artists_plural = self.tags.get("artists", "")
- has_single_in_artists_tag = artists_plural and TAG_SPLITTER not in artists_plural
-
- if has_single_mb_id or has_single_in_artists_tag:
- # Single artist confirmed by either single MB ID or ARTISTS tag without semicolons
- # Return as-is without splitting to avoid incorrectly splitting artist names
- # containing "with", "featuring", etc.
- # Example: "Jerk With a Bomb" should not be split into "Jerk" and "a Bomb"
- return (tag,)
-
- # No evidence of single artist, proceed with splitting
- return split_artists(tag)
+ # Use MB artist ID count to guide splitting
+ # - 0 IDs: only split on "feat." etc., not on "&" or ","
+ # - 1 ID: don't split at all
+ # - 2+ IDs: split to match the expected count
+ mb_id_count = len(self.musicbrainz_artistids)
+ return split_artists(tag, expected_count=mb_id_count if mb_id_count else None)
# fallback to parsing from filename
title = self.filename.rsplit(os.sep, 1)[-1].split(".")[0]
if " - " in title:
title_parts = title.split(" - ")
if len(title_parts) >= 2:
- return split_artists(title_parts[0])
+ # No MB IDs from filename, only split on featuring splitters
+ return split_artists(title_parts[0], expected_count=None)
return (UNKNOWN_ARTIST,)
@property
if tag := self.tags.get("writer"):
if TAG_SPLITTER in tag:
return split_items(tag)
- return split_artists(tag)
+ # No MB IDs for writers, only split on featuring splitters
+ return split_artists(tag, expected_count=None)
return ()
@property
def album_artists(self) -> tuple[str, ...]:
"""Return (all) album artists (if any)."""
- # prefer multi-artist tag
+ # prefer multi-artist tag (ALBUMARTISTS plural)
if tag := self.tags.get("albumartists"):
- return split_items(tag)
- # fallback to regular artist string
+ artists = split_items(tag)
+ # Warn if ALBUMARTISTS tag count doesn't match MB Album Artist ID count
+ mb_id_count = len(self.musicbrainz_albumartistids)
+ if mb_id_count and mb_id_count != len(artists):
+ LOGGER.warning(
+ "ALBUMARTISTS tag count (%d) does not match MusicBrainz Album Artist ID "
+ "count (%d): %s",
+ len(artists),
+ mb_id_count,
+ tag,
+ )
+ return artists
+ # fallback to regular album artist string
if tag := self.tags.get("albumartist"):
if TAG_SPLITTER in tag:
return split_items(tag)
- if len(self.musicbrainz_albumartistids) > 1:
- # special case: album artist noted as 2 artists with ampersand or other splitter
- # but with 2 mb ids so they should be treated as 2 artists
- # example: John Travolta & Olivia Newton John on the Grease album
- return split_artists(tag, allow_extra_splitters=True)
-
- # Check if we have evidence of a SINGLE album artist (should NOT split)
- has_single_mb_id = len(self.musicbrainz_albumartistids) == 1
- albumartists_plural = self.tags.get("albumartists", "")
- has_single_in_albumartists_tag = (
- albumartists_plural and TAG_SPLITTER not in albumartists_plural
- )
-
- if has_single_mb_id or has_single_in_albumartists_tag:
- # Single album artist confirmed by either single MB ID or ALBUMARTISTS tag
- # without semicolons. Return as-is without splitting.
- return (tag,)
-
- # No evidence of single artist, proceed with splitting
- return split_artists(tag)
+ # Use MB album artist ID count to guide splitting
+ mb_id_count = len(self.musicbrainz_albumartistids)
+ return split_artists(tag, expected_count=mb_id_count if mb_id_count else None)
return ()
@property
from music_assistant.constants import UNKNOWN_ARTIST
from music_assistant.helpers import tags
+from music_assistant.helpers.tags import split_artists
RESOURCES_DIR = pathlib.Path(__file__).parent.parent.resolve().joinpath("fixtures")
assert _tags.musicbrainz_artistids == ()
assert _tags.musicbrainz_releasegroupid is None
assert _tags.musicbrainz_recordingid is None
+
+
+def test_split_artists_with_expected_count() -> None:
+ """Test splitting artists guided by expected count (from MB IDs)."""
+ # With expected_count=3, should split on extra splitters to reach target
+ result = split_artists("Shabson, Krgovich & Harris", expected_count=3)
+ assert result == ("Shabson", "Krgovich", "Harris")
+
+ # With expected_count=3, ampersands should split
+ result = split_artists("Shabson & Krgovich & Harris", expected_count=3)
+ assert result == ("Shabson", "Krgovich", "Harris")
+
+ # With expected_count=3, commas should split
+ result = split_artists("Shabson, Krgovich, Harris", expected_count=3)
+ assert result == ("Shabson", "Krgovich", "Harris")
+
+ # With expected_count=1, should NOT split at all
+ result = split_artists("Shabson & Krgovich", expected_count=1)
+ assert result == ("Shabson & Krgovich",)
+
+ # With expected_count=None (no MB IDs), should NOT split on extra splitters
+ result = split_artists("Shabson & Krgovich", expected_count=None)
+ assert result == ("Shabson & Krgovich",)
+
+ # With expected_count=0 (no MB IDs), should NOT split on extra splitters
+ result = split_artists("Shabson & Krgovich", expected_count=0)
+ assert result == ("Shabson & Krgovich",)
+
+
+def test_split_artists_featuring() -> None:
+ """Test that featuring splitters always work regardless of expected_count."""
+ # "feat." should always split, even with no expected_count
+ result = split_artists("John Lennon feat. Yoko Ono", expected_count=None)
+ assert result == ("John Lennon", "Yoko Ono")
+
+ # "feat." should split even with expected_count=1 (featuring overrides)
+ # Actually, expected_count=1 means single artist, so we return as-is
+ result = split_artists("John Lennon feat. Yoko Ono", expected_count=1)
+ assert result == ("John Lennon feat. Yoko Ono",)
+
+ # "featuring" should work
+ result = split_artists("Artist A featuring Artist B", expected_count=None)
+ assert result == ("Artist A", "Artist B")
+
+ # "ft." should work
+ result = split_artists("Artist A ft. Artist B", expected_count=None)
+ assert result == ("Artist A", "Artist B")
+
+
+def test_split_artists_no_oversplit() -> None:
+ """Test that split_artists stops at expected_count and doesn't over-split."""
+ # Hall & Oates is a duo, with 2 MB IDs we should split on feat. first
+ # and get exactly 2 artists
+ result = split_artists("Hall & Oates feat. David Ruffin", expected_count=2)
+ assert result == ("Hall & Oates", "David Ruffin")
+
+ # With 3 MB IDs, we should split further
+ result = split_artists("Hall & Oates feat. David Ruffin", expected_count=3)
+ assert result == ("Hall", "Oates", "David Ruffin")
+
+ # Simon & Garfunkel with 1 MB ID (the duo) should stay as one
+ result = split_artists("Simon & Garfunkel", expected_count=1)
+ assert result == ("Simon & Garfunkel",)
+
+ # Simon & Garfunkel with 2 MB IDs (Paul + Art) should split
+ result = split_artists("Simon & Garfunkel", expected_count=2)
+ assert result == ("Simon", "Garfunkel")
+
+
+def test_split_artists_with_not_split() -> None:
+ """Test that 'with' is only split when we have MB ID evidence."""
+ # "with" should NOT split without expected_count (could be artist name)
+ result = split_artists("Jerk With a Bomb", expected_count=None)
+ assert result == ("Jerk With a Bomb",)
+
+ # "with" should NOT split with expected_count=1
+ result = split_artists("Jerk With a Bomb", expected_count=1)
+ assert result == ("Jerk With a Bomb",)
+
+ # "with" SHOULD split when expected_count=2 indicates multiple artists
+ result = split_artists("Artist A with Artist B", expected_count=2)
+ assert result == ("Artist A", "Artist B")