From: Marek Skrobacki Date: Wed, 20 Nov 2024 17:54:40 +0000 (+0000) Subject: Fix: Improve accuracy of matching album in directory structure for local filesystem... X-Git-Url: https://git.kitaultman.com/?a=commitdiff_plain;h=324d360f53eafac7dfb793053734e8abc2596990;p=music-assistant-server.git Fix: Improve accuracy of matching album in directory structure for local filesystem (#1779) --- diff --git a/music_assistant/providers/filesystem_local/helpers.py b/music_assistant/providers/filesystem_local/helpers.py index 57e87b93..7e15e550 100644 --- a/music_assistant/providers/filesystem_local/helpers.py +++ b/music_assistant/providers/filesystem_local/helpers.py @@ -67,6 +67,48 @@ def get_artist_dir( return matched_dir +def tokenize(input_str: str, delimiters: str) -> list[str]: + """Tokenizes the album names or paths.""" + normalised = re.sub(delimiters, "^^^", input_str) + return [x for x in normalised.split("^^^") if x != ""] + + +def _dir_contains_album_name(id3_album_name: str, directory_name: str) -> bool: + """Check if a directory name contains an album name. + + This function tokenizes both input strings using different delimiters and + checks if the album name is a substring of the directory name. + + First iteration considers the literal dash as one of the separators. The + second pass is to catch edge cases where the literal dash is part of the + album's name, not an actual separator. For example, an album like 'Aphex + Twin - Selected Ambient Works 85-92' would be correctly handled. + + Args: + id3_album_name (str): The album name to search for. + directory_name (str): The directory name to search in. + + Returns: + bool: True if the directory name contains the album name, False otherwise. + """ + for delims in ["[-_ ]", "[_ ]"]: + tokenized_album_name = tokenize(id3_album_name, delims) + tokenized_dirname = tokenize(directory_name, delims) + + # Exact match, potentially just on the album name + # in case artist's name is not included in id3_album_name + if all(token in tokenized_dirname for token in tokenized_album_name): + return True + + if len(tokenized_album_name) <= len(tokenized_dirname) and compare_strings( + "".join(tokenized_album_name), + "".join(tokenized_dirname[0 : len(tokenized_album_name)]), + False, + ): + return True + return False + + def get_album_dir(track_dir: str, album_name: str) -> str | None: """Return album/parent directory of a track.""" parentdir = track_dir @@ -82,6 +124,20 @@ def get_album_dir(track_dir: str, album_name: str) -> str | None: if compare_strings(album_name, dirname.split(" - ")[-1].split("(")[0], False): # account for ArtistName - AlbumName (Version) format in the directory name return parentdir + + if any(sep in dirname for sep in ["-", " ", "_"]) and album_name: + album_chunks = album_name.split(" - ", 1) + album_name_includes_artist = len(album_chunks) > 1 + just_album_name = album_chunks[1] if album_name_includes_artist else None + + # attempt matching using tokenized version of path and album name + # with _dir_contains_album_name() + if just_album_name and _dir_contains_album_name(just_album_name, dirname): + return parentdir + + if _dir_contains_album_name(album_name, dirname): + return parentdir + if compare_strings(album_name.split("(")[0], dirname, False): # account for AlbumName (Version) format in the album name return parentdir diff --git a/tests/providers/filesystem/test_helpers.py b/tests/providers/filesystem/test_helpers.py index 591a36ec..5145d9d9 100644 --- a/tests/providers/filesystem/test_helpers.py +++ b/tests/providers/filesystem/test_helpers.py @@ -62,17 +62,40 @@ def test_get_artist_dir() -> None: "/home/user/Music/Aphex Twin - Selected Ambient Works 85-92 (Remastered) - WEB", "/home/user/Music/Aphex Twin - Selected Ambient Works 85-92 (Remastered) - WEB", ), + # Test tokenizer - dirname with extras + ( + "Fokus - Prewersje", + "/home/user/Fokus-Prewersje-PL-WEB-FLAC-2021-PS_INT", + "/home/user/Fokus-Prewersje-PL-WEB-FLAC-2021-PS_INT", + ), + # Test tokenizer - dirname with version and extras + ( + "Layo And Bushwacka - Night Works", + "/home/music/Layo_And_Bushwacka-Night_Works_(Reissue)-(XLCD_154X)-FLAC-2003", + "/home/music/Layo_And_Bushwacka-Night_Works_(Reissue)-(XLCD_154X)-FLAC-2003", + ), + # Test tokenizer - extras and approximate match on diacratics + ( + "Łona i Webber - Wyślij Sobie Pocztówkę", + "/usr/others/Lona-Discography-PL-FLAC-2020-INT/Lona_I_Webber-Wyslij_Sobie_Pocztowke-PL-WEB-FLAC-2014-PS", + "/usr/others/Lona-Discography-PL-FLAC-2020-INT/Lona_I_Webber-Wyslij_Sobie_Pocztowke-PL-WEB-FLAC-2014-PS", + ), + ( + "NIC", + "/nas/downloads/others/Sokol-NIC-PL-WEB-FLAC-2021", + "/nas/downloads/others/Sokol-NIC-PL-WEB-FLAC-2021", + ), # Test album (version) format ( - "Selected Ambient Works 85-92", + "Aphex Twin - Selected Ambient Works 85-92", "/home/user/Music/Aphex Twin/Selected Ambient Works 85-92 (Remastered)", "/home/user/Music/Aphex Twin/Selected Ambient Works 85-92 (Remastered)", ), # Test album name in dir ( - "Selected Ambient Works 85-92", - "/home/user/Music/RandomDirWithSelected Ambient Works 85-92InIt", - "/home/user/Music/RandomDirWithSelected Ambient Works 85-92InIt", + "Aphex Twin - Selected Ambient Works 85-92", + "/home/user/Music/RandomDirWithAphex Twin - Selected Ambient Works 85-92InIt", + "/home/user/Music/RandomDirWithAphex Twin - Selected Ambient Works 85-92InIt", ), # Test no match (