From: Marcel van der Veldt Date: Mon, 25 Jul 2022 12:04:55 +0000 (+0200) Subject: Better handling of missing or malformed ID3 tags (#434) X-Git-Url: https://git.kitaultman.com/?a=commitdiff_plain;h=31f0258c588b5779d603b1f3d37eefb3151210cc;p=music-assistant-server.git Better handling of missing or malformed ID3 tags (#434) * Better handling of missing or malformed ID3 tags * Add tests for parser logic * Enforce Various Artists name to be consistent --- diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9eca9d5f..80301834 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,6 +24,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | + apt-get update + apt-get install ffmpeg python -m pip install --upgrade pip pip install -r requirements_dev.txt pre-commit install-hooks diff --git a/music_assistant/constants.py b/music_assistant/constants.py index 72168bb2..aa55eef2 100755 --- a/music_assistant/constants.py +++ b/music_assistant/constants.py @@ -1,3 +1,7 @@ """All constants for Music Assistant.""" ROOT_LOGGER_NAME = "music_assistant" + +UNKNOWN_ARTIST = "Unknown Artist" +VARIOUS_ARTISTS = "Various Artists" +VARIOUS_ARTISTS_ID = "89ad4ac3-39f7-470e-963a-56509c546377" diff --git a/music_assistant/controllers/music/albums.py b/music_assistant/controllers/music/albums.py index 8bb1aa25..76575cdd 100644 --- a/music_assistant/controllers/music/albums.py +++ b/music_assistant/controllers/music/albums.py @@ -4,10 +4,10 @@ from __future__ import annotations import asyncio from typing import List, Optional, Union +from music_assistant.constants import VARIOUS_ARTISTS from music_assistant.helpers.compare import compare_album, loose_compare_strings from music_assistant.helpers.database import TABLE_ALBUMS, TABLE_TRACKS from music_assistant.helpers.json import json_serializer -from music_assistant.helpers.tags import FALLBACK_ARTIST from music_assistant.models.enums import EventType, MusicProviderFeature, ProviderType from music_assistant.models.errors import MediaNotFoundError from music_assistant.models.event import MassEvent @@ -352,7 +352,7 @@ class AlbumsController(MediaControllerBase[Album]): # use intermediate set to prevent duplicates # filter various artists if multiple artists if len(album_artists) > 1: - album_artists = {x for x in album_artists if x.name != FALLBACK_ARTIST} + album_artists = {x for x in album_artists if (x.name != VARIOUS_ARTISTS)} return list(album_artists) async def _get_artist_mapping( diff --git a/music_assistant/controllers/music/artists.py b/music_assistant/controllers/music/artists.py index 489a3d2d..b973ac90 100644 --- a/music_assistant/controllers/music/artists.py +++ b/music_assistant/controllers/music/artists.py @@ -5,6 +5,8 @@ import itertools from time import time from typing import Any, Dict, List, Optional +from music_assistant.constants import VARIOUS_ARTISTS, VARIOUS_ARTISTS_ID +from music_assistant.helpers.compare import compare_strings from music_assistant.helpers.database import TABLE_ALBUMS, TABLE_ARTISTS, TABLE_TRACKS from music_assistant.helpers.json import json_serializer from music_assistant.models.enums import EventType, MusicProviderFeature, ProviderType @@ -223,6 +225,12 @@ class ArtistsController(MediaControllerBase[Artist]): """Add a new item record to the database.""" assert isinstance(item, Artist), "Not a full Artist object" assert item.provider_ids, "Artist is missing provider id(s)" + # enforce various artists name + id + if compare_strings(item.name, VARIOUS_ARTISTS): + item.musicbrainz_id = VARIOUS_ARTISTS_ID + if item.musicbrainz_id == VARIOUS_ARTISTS_ID: + item.name = VARIOUS_ARTISTS + async with self._db_add_lock: # always try to grab existing item by musicbrainz_id cur_item = None @@ -274,6 +282,12 @@ class ArtistsController(MediaControllerBase[Artist]): metadata = cur_item.metadata.update(item.metadata, item.provider.is_file()) provider_ids = {*cur_item.provider_ids, *item.provider_ids} + # enforce various artists name + id + if compare_strings(item.name, VARIOUS_ARTISTS): + item.musicbrainz_id = VARIOUS_ARTISTS_ID + if item.musicbrainz_id == VARIOUS_ARTISTS_ID: + item.name = VARIOUS_ARTISTS + await self.mass.database.update( self.db_table, {"item_id": item_id}, diff --git a/music_assistant/controllers/music/playlists.py b/music_assistant/controllers/music/playlists.py index 018578e3..2656d570 100644 --- a/music_assistant/controllers/music/playlists.py +++ b/music_assistant/controllers/music/playlists.py @@ -259,6 +259,7 @@ class PlaylistController(MediaControllerBase[Playlist]): self.db_table, {"item_id": item_id}, { + # always prefer name/owner from updated item here "name": item.name, "sort_name": item.sort_name, "owner": item.owner, diff --git a/music_assistant/controllers/music/radio.py b/music_assistant/controllers/music/radio.py index 0791c6a0..a64aa2fc 100644 --- a/music_assistant/controllers/music/radio.py +++ b/music_assistant/controllers/music/radio.py @@ -105,6 +105,7 @@ class RadioController(MediaControllerBase[Radio]): self.db_table, match, { + # always prefer name from updated item here "name": item.name, "sort_name": item.sort_name, "metadata": json_serializer(metadata), diff --git a/music_assistant/helpers/tags.py b/music_assistant/helpers/tags.py index d3ce7da9..3dbdfda1 100644 --- a/music_assistant/helpers/tags.py +++ b/music_assistant/helpers/tags.py @@ -4,31 +4,43 @@ from __future__ import annotations import json import os from dataclasses import dataclass -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple, Union from requests import JSONDecodeError +from music_assistant.constants import UNKNOWN_ARTIST from music_assistant.helpers.process import AsyncProcess +from music_assistant.helpers.util import try_parse_int from music_assistant.models.errors import InvalidDataError -FALLBACK_ARTIST = "Various Artists" - -# allowed splitters for titles and artists strings -# NOTE: do not use '&' or '/' as splitter here as it will cause issues with artists -# actually having that in the name -SPLITTERS = (";", ",", "Featuring", " Feat. ", " Feat ", "feat.") +# the only multi-item splitter we accept is the semicolon, +# which is also the default in Musicbrainz Picard. +# the slash is also a common splitter but causes colissions with +# artists actually containing a slash in the name, such as ACDC +TAG_SPLITTER = ";" def split_items(org_str: str) -> Tuple[str]: """Split up a tags string by common splitter.""" - if isinstance(org_str, list): - return org_str if not org_str: return tuple() - for splitter in SPLITTERS: - if splitter in org_str: - return tuple((x.strip() for x in org_str.split(splitter))) - return (org_str,) + if isinstance(org_str, list): + return org_str + return tuple(x.strip() for x in org_str.split(TAG_SPLITTER)) + + +def split_artists(org_artists: Union[str, Tuple[str]]) -> Tuple[str]: + """Parse all artists from a string.""" + final_artists = set() + # when not using the multi artist tag, the artist string may contain + # multiple artistsin freeform, even featuring artists may be included in this + # string. Try to parse the featuring artists and seperate them. + splitters = ("featuring", " feat. ", " feat ", "feat.") + for item in split_items(org_artists): + for splitter in splitters: + for subitem in item.split(splitter): + final_artists.add(subitem.strip()) + return tuple(final_artists) @dataclass @@ -46,18 +58,6 @@ class AudioTags: has_cover_image: bool filename: str - @property - def artist(self) -> str: - """Return artist tag (as-is).""" - if tag := self.tags.get("artist"): - return tag - # fallback to parsing from filename - title = self.filename.rsplit(os.sep, 1)[-1].split(".")[0] - title_parts = title.split(" - ") - if len(title_parts) >= 2: - return title_parts[0].strip() - return FALLBACK_ARTIST - @property def title(self) -> str: """Return title tag (as-is).""" @@ -65,9 +65,10 @@ class AudioTags: return tag # fallback to parsing from filename title = self.filename.rsplit(os.sep, 1)[-1].split(".")[0] - title_parts = title.split(" - ") - if len(title_parts) >= 2: - return title_parts[1].strip() + if " - " in title: + title_parts = title.split(" - ") + if len(title_parts) >= 2: + return title_parts[1].strip() return title @property @@ -78,41 +79,63 @@ class AudioTags: @property def artists(self) -> Tuple[str]: """Return track artists.""" - return split_items(self.artist) + # prefer multi-artist tag + if tag := self.tags.get("artists"): + return split_items(tag) + # fallback to regular artist string + if tag := self.tags.get("artist"): + if ";" in tag: + return split_items(tag) + return split_artists(tag) + # fallback to parsing from filename + title = self.filename.rsplit(os.sep, 1)[-1].split(".")[0] + if " - " in title: + title_parts = title.split(" - ") + if len(title_parts) >= 2: + return split_artists(title_parts[0]) + return (UNKNOWN_ARTIST,) @property def album_artists(self) -> Tuple[str]: """Return (all) album artists (if any).""" - return split_items(self.tags.get("albumartist")) + # prefer multi-artist tag + if tag := self.tags.get("albumartists"): + return split_items(tag) + # fallback to regular artist string + if tag := self.tags.get("albumartist"): + if ";" in tag: + return split_items(tag) + return split_artists(tag) + return tuple() @property def genres(self) -> Tuple[str]: """Return (all) genres, if any.""" - return split_items(self.tags.get("genre", "")) + return split_items(self.tags.get("genre")) @property def disc(self) -> int | None: """Return disc tag if present.""" if tag := self.tags.get("disc"): - return int(tag.split("/")[0]) + return try_parse_int(tag.split("/")[0], None) return None @property def track(self) -> int | None: """Return track tag if present.""" if tag := self.tags.get("track"): - return int(tag.split("/")[0]) + return try_parse_int(tag.split("/")[0], None) return None @property def year(self) -> int | None: """Return album's year if present, parsed from date.""" if tag := self.tags.get("originalyear"): - return int(tag.split("-")[0]) - if tag := self.tags.get("otiginaldate"): - return int(tag.split("-")[0]) + return try_parse_int(tag.split("-")[0], None) + if tag := self.tags.get("originaldate"): + return try_parse_int(tag.split("-")[0], None) if tag := self.tags.get("date"): - return int(tag.split("-")[0]) + return try_parse_int(tag.split("-")[0], None) return None @property diff --git a/music_assistant/helpers/util.py b/music_assistant/helpers/util.py index 70931153..3b18a9f3 100755 --- a/music_assistant/helpers/util.py +++ b/music_assistant/helpers/util.py @@ -19,38 +19,31 @@ CALLBACK_TYPE = Callable[[], None] # pylint: enable=invalid-name -def filename_from_string(string): +def filename_from_string(string: str) -> str: """Create filename from unsafe string.""" keepcharacters = (" ", ".", "_") return "".join(c for c in string if c.isalnum() or c in keepcharacters).rstrip() -def try_parse_int(possible_int): +def try_parse_int(possible_int: Any, default: Optional[int] = 0) -> Optional[int]: """Try to parse an int.""" try: return int(possible_int) except (TypeError, ValueError): - return 0 + return default -async def iter_items(items): - """Fake async iterator for compatability reasons.""" - if not isinstance(items, list): - yield items - else: - for item in items: - yield item - - -def try_parse_float(possible_float): +def try_parse_float( + possible_float: Any, default: Optional[float] = 0.0 +) -> Optional[float]: """Try to parse a float.""" try: return float(possible_float) except (TypeError, ValueError): - return 0.0 + return default -def try_parse_bool(possible_bool): +def try_parse_bool(possible_bool: Any) -> str: """Try to parse a bool.""" if isinstance(possible_bool, bool): return possible_bool diff --git a/music_assistant/music_providers/filesystem.py b/music_assistant/music_providers/filesystem.py index 78c5f7b4..590cf782 100644 --- a/music_assistant/music_providers/filesystem.py +++ b/music_assistant/music_providers/filesystem.py @@ -15,8 +15,9 @@ import xmltodict from aiofiles.os import wrap from aiofiles.threadpool.binary import AsyncFileIO +from music_assistant.constants import VARIOUS_ARTISTS, VARIOUS_ARTISTS_ID from music_assistant.helpers.compare import compare_strings -from music_assistant.helpers.tags import FALLBACK_ARTIST, parse_tags, split_items +from music_assistant.helpers.tags import parse_tags, split_items from music_assistant.helpers.util import create_safe_string, parse_title_and_version from music_assistant.models.enums import MusicProviderFeature, ProviderType from music_assistant.models.errors import MediaNotFoundError, MusicAssistantError @@ -494,9 +495,9 @@ class FileSystemProvider(MusicProvider): self.logger.warning( "%s is missing ID3 tag [albumartist], using %s as fallback", track_path, - FALLBACK_ARTIST, + VARIOUS_ARTISTS, ) - album_artists = [await self._parse_artist(name=FALLBACK_ARTIST)] + album_artists = [await self._parse_artist(name=VARIOUS_ARTISTS)] track.album = await self._parse_album( tags.album, @@ -633,6 +634,9 @@ class FileSystemProvider(MusicProvider): provider_ids={ MediaItemProviderId(artist_item_id, self.type, self.id, url=artist_path) }, + musicbrainz_id=VARIOUS_ARTISTS_ID + if compare_strings(name, VARIOUS_ARTISTS) + else None, ) if not await self.exists(artist_path): diff --git a/requirements_dev.txt b/requirements_dev.txt index c0049194..754759f5 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -4,7 +4,13 @@ flake8==4.0.1 mypy==0.961 pydocstyle==6.1.1 pylint==2.14.5 -pytest==7.1.2 +pytest-aiohttp==0.3.0 pytest-cov==3.0.0 +pytest-freezegun==0.4.2 +pytest-socket==0.5.1 +pytest-test-groups==1.0.3 +pytest-sugar==0.9.4 pytest-timeout==2.1.0 +pytest-xdist==2.5.0 +pytest==7.1.2 pre-commit==2.20.0 diff --git a/tests/fixtures/MyArtist - MyTitle without Tags.mp3 b/tests/fixtures/MyArtist - MyTitle without Tags.mp3 new file mode 100644 index 00000000..6be8c437 Binary files /dev/null and b/tests/fixtures/MyArtist - MyTitle without Tags.mp3 differ diff --git a/tests/fixtures/MyArtist - MyTitle.mp3 b/tests/fixtures/MyArtist - MyTitle.mp3 new file mode 100644 index 00000000..f1b5988f Binary files /dev/null and b/tests/fixtures/MyArtist - MyTitle.mp3 differ diff --git a/tests/fixtures/test.mp3 b/tests/fixtures/test.mp3 new file mode 100644 index 00000000..6be8c437 Binary files /dev/null and b/tests/fixtures/test.mp3 differ diff --git a/tests/test_tags.py b/tests/test_tags.py new file mode 100644 index 00000000..87052005 --- /dev/null +++ b/tests/test_tags.py @@ -0,0 +1,75 @@ +"""Tests for parsing ID3 tags functions.""" + +import pathlib + +from music_assistant.helpers import tags + +RESOURCES_DIR = pathlib.Path(__file__).parent.resolve().joinpath("fixtures") + +FILE_1 = str(RESOURCES_DIR.joinpath("MyArtist - MyTitle.mp3")) + + +async def test_parse_metadata_from_id3tags(): + """Test parsing of parsing metadata from ID3 tags.""" + filename = str(RESOURCES_DIR.joinpath("MyArtist - MyTitle.mp3")) + _tags = await tags.parse_tags(filename) + assert _tags.album == "MyAlbum" + assert _tags.title == "MyTitle" + assert _tags.duration == 1 + assert _tags.album_artists == ("MyArtist",) + assert _tags.artists == ("MyArtist", "MyArtist2") + assert _tags.genres == ("Genre1", "Genre2") + assert _tags.musicbrainz_albumartistids == ("abcdefg",) + assert _tags.musicbrainz_artistids == ("abcdefg",) + assert _tags.musicbrainz_releasegroupid == "abcdefg" + assert _tags.musicbrainz_trackid == "abcdefg" + # test parsing disc/track number + _tags.tags["disc"] = "1" + assert _tags.disc == 1 + _tags.tags["disc"] = "1/1" + assert _tags.disc == 1 + _tags.tags["disc"] = "" + assert _tags.disc is None + # test parsing album year + _tags.tags["date"] = "2022" + assert _tags.year == 2022 + _tags.tags["date"] = "2022-05-05" + assert _tags.year == 2022 + _tags.tags["date"] = "blah" + assert _tags.year is None + _tags.tags["date"] = "" + assert _tags.year is None + _tags.tags.pop("date", None) + assert _tags.year is None + + +async def test_parse_metadata_from_filename(): + """Test parsing of parsing metadata from filename.""" + filename = str(RESOURCES_DIR.joinpath("MyArtist - MyTitle without Tags.mp3")) + _tags = await tags.parse_tags(filename) + assert _tags.album is None + assert _tags.title == "MyTitle without Tags" + assert _tags.duration == 1 + assert _tags.album_artists == tuple() + assert _tags.artists == ("MyArtist",) + assert _tags.genres == tuple() + assert _tags.musicbrainz_albumartistids == tuple() + assert _tags.musicbrainz_artistids == tuple() + assert _tags.musicbrainz_releasegroupid is None + assert _tags.musicbrainz_trackid is None + + +async def test_parse_metadata_from_invalid_filename(): + """Test parsing of parsing metadata from (invalid) filename.""" + filename = str(RESOURCES_DIR.joinpath("test.mp3")) + _tags = await tags.parse_tags(filename) + assert _tags.album is None + assert _tags.title == "test" + assert _tags.duration == 1 + assert _tags.album_artists == tuple() + assert _tags.artists == (tags.UNKNOWN_ARTIST,) + assert _tags.genres == tuple() + assert _tags.musicbrainz_albumartistids == tuple() + assert _tags.musicbrainz_artistids == tuple() + assert _tags.musicbrainz_releasegroupid is None + assert _tags.musicbrainz_trackid is None