Gracefully skip files/folders with emoji names on SMB mounts (#3183)
authorOzGav <gavnosp@hotmail.com>
Sat, 21 Feb 2026 23:29:56 +0000 (10:29 +1100)
committerGitHub <noreply@github.com>
Sat, 21 Feb 2026 23:29:56 +0000 (00:29 +0100)
music_assistant/providers/filesystem_local/__init__.py
music_assistant/providers/filesystem_local/helpers.py
music_assistant/providers/filesystem_smb/__init__.py

index d5d93e991a6078def1527380aa0a303cfb88b60e..c1d166c71d6e6095b1bbdc5c74cc474e7c23d61f 100644 (file)
@@ -9,7 +9,7 @@ import os
 import os.path
 import time
 import urllib.parse
-from collections.abc import AsyncGenerator, Iterator, Sequence
+from collections.abc import AsyncGenerator, Sequence
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, cast
@@ -96,12 +96,12 @@ from .constants import (
     IsChapterFile,
 )
 from .helpers import (
-    IGNORE_DIRS,
     FileSystemItem,
     get_absolute_path,
     get_album_dir,
     get_artist_dir,
     get_relative_path,
+    recursive_iter,
     sorted_scandir,
 )
 
@@ -351,38 +351,13 @@ class LocalFileSystemProvider(MusicProvider):
 
         # NOTE: we do the entire traversing of the directory structure, including parsing tags
         # in a single executor thread to save the overhead of having to spin up tons of tasks
-        def listdir(path: str) -> Iterator[FileSystemItem]:
-            """Recursively traverse directory entries."""
-            for item in os.scandir(path):
-                # ignore invalid filenames
-                if item.name in IGNORE_DIRS or item.name.startswith((".", "_")):
-                    continue
-                if item.is_dir(follow_symlinks=False):
-                    yield from listdir(item.path)
-                elif item.is_file(follow_symlinks=False):
-                    # skip files without extension
-                    if "." not in item.name:
-                        continue
-                    ext = item.name.rsplit(".", 1)[1].lower()
-                    if ext not in SUPPORTED_EXTENSIONS:
-                        # skip unsupported file extension
-                        continue
-                    try:
-                        yield FileSystemItem.from_dir_entry(item, self.base_path)
-                    except OSError as err:
-                        # Skip files that cannot be stat'd (e.g., invalid encoding on SMB mounts)
-                        # This typically happens with emoji or special unicode characters
-                        self.logger.debug(
-                            "Skipping file %s due to stat error: %s",
-                            item.path,
-                            str(err),
-                        )
-
         def run_sync() -> None:
             """Run the actual sync (in an executor job)."""
             self.sync_running = True
             try:
-                for item in listdir(self.base_path):
+                for item in recursive_iter(
+                    self.base_path, self.base_path, SUPPORTED_EXTENSIONS, self.logger
+                ):
                     prev_checksum = file_checksums.get(item.relative_path)
                     if self._process_item(item, prev_checksum):
                         cur_filenames.add(item.relative_path)
index 30b6f6e9691b030470a6de48711aa78d13d1f2c0..eb31afc63bf12dffed2d9e14d7ebc3dfd477e4da 100644 (file)
@@ -2,12 +2,17 @@
 
 from __future__ import annotations
 
+import errno
+import logging
 import os
 import re
+from collections.abc import Iterator
 from dataclasses import dataclass
 
 from music_assistant.helpers.compare import compare_strings
 
+logger = logging.getLogger(__name__)
+
 IGNORE_DIRS = ("recycle", "Recently-Snaphot", "#recycle", "System Volume Information", "lost+found")
 
 
@@ -186,10 +191,10 @@ def get_album_dir(track_dir: str, album_name: str) -> str | None:
             if _dir_contains_album_name(album_name, dirname):
                 return parentdir
 
-        if compare_strings(album_name.split("(")[0], dirname, False):
+        if compare_strings(album_name.split("(", maxsplit=1)[0], dirname, False):
             # account for AlbumName (Version) format in the album name
             return parentdir
-        if compare_strings(album_name.split("(")[0], dirname.split(" - ")[-1], False):
+        if compare_strings(album_name.split("(", maxsplit=1)[0], dirname.split(" - ")[-1], False):
             # account for ArtistName - AlbumName (Version) format
             return parentdir
         if len(album_name) > 8 and album_name in dirname:
@@ -217,6 +222,68 @@ def get_absolute_path(base_path: str, path: str) -> str:
     return os.path.join(base_path, path)
 
 
+def recursive_iter(
+    path: str,
+    base_path: str,
+    supported_extensions: set[str],
+    log: logging.Logger,
+) -> Iterator[FileSystemItem]:
+    """Recursively traverse directory entries yielding supported files.
+
+    :param path: The directory path to scan.
+    :param base_path: The root base path for constructing relative paths.
+    :param supported_extensions: Set of file extensions to include (lowercase, no dot).
+    :param log: Logger instance to use for warnings/debug messages.
+    """
+    try:
+        scan_iter = os.scandir(path)
+    except OSError as err:
+        if err.errno == errno.EINVAL:
+            log.warning(
+                "Skipping directory '%s' - unsupported characters in path",
+                path,
+            )
+        else:
+            log.warning("Unable to scan directory %s: %s", path, err)
+        return
+    with scan_iter:
+        for item in scan_iter:
+            if item.name in IGNORE_DIRS or item.name.startswith((".", "_")):
+                continue
+            try:
+                is_dir = item.is_dir(follow_symlinks=False)
+                is_file = item.is_file(follow_symlinks=False)
+            except OSError as err:
+                if err.errno == errno.EINVAL:
+                    log.warning(
+                        "Skipping '%s' - unsupported characters in name",
+                        item.name,
+                    )
+                continue
+            if is_dir:
+                yield from recursive_iter(item.path, base_path, supported_extensions, log)
+            elif is_file:
+                if "." not in item.name:
+                    continue
+                ext = item.name.rsplit(".", 1)[1].lower()
+                if ext not in supported_extensions:
+                    continue
+                try:
+                    yield FileSystemItem.from_dir_entry(item, base_path)
+                except OSError as err:
+                    if err.errno == errno.EINVAL:
+                        log.warning(
+                            "Skipping '%s' - unsupported characters in name",
+                            item.name,
+                        )
+                    else:
+                        log.debug(
+                            "Skipping file %s due to OS error: %s",
+                            item.path,
+                            str(err),
+                        )
+
+
 def sorted_scandir(base_path: str, sub_path: str, sort: bool = False) -> list[FileSystemItem]:
     """
     Implement os.scandir that returns (optionally) sorted entries.
@@ -230,19 +297,44 @@ def sorted_scandir(base_path: str, sub_path: str, sort: bool = False) -> list[Fi
 
     if base_path not in sub_path:
         sub_path = os.path.join(base_path, sub_path)
-    items = []
-    for entry in os.scandir(sub_path):
-        # filter out invalid dirs and hidden files
-        if not (entry.is_dir(follow_symlinks=False) or entry.is_file(follow_symlinks=False)):
-            continue
-        if entry.name in IGNORE_DIRS or entry.name.startswith("."):
-            continue
-        try:
-            items.append(FileSystemItem.from_dir_entry(entry, base_path))
-        except OSError:
-            # Skip files that cannot be stat'd (e.g., invalid encoding on SMB mounts)
-            # This typically happens with emoji or special unicode characters
-            continue
+    items: list[FileSystemItem] = []
+    try:
+        entries = os.scandir(sub_path)
+    except OSError as err:
+        if err.errno == errno.EINVAL:
+            logger.warning(
+                "Skipping directory '%s' - unsupported characters in path",
+                sub_path,
+            )
+            return items
+        raise
+    with entries:
+        for entry in entries:
+            try:
+                is_dir = entry.is_dir(follow_symlinks=False)
+                is_file = entry.is_file(follow_symlinks=False)
+            except OSError as err:
+                if err.errno == errno.EINVAL:
+                    logger.warning(
+                        "Skipping '%s' - unsupported characters in name",
+                        entry.name,
+                    )
+                continue
+            if not (is_dir or is_file):
+                continue
+            if entry.name in IGNORE_DIRS or entry.name.startswith("."):
+                continue
+            try:
+                items.append(FileSystemItem.from_dir_entry(entry, base_path))
+            except OSError as err:
+                if err.errno == errno.EINVAL:
+                    logger.warning(
+                        "Skipping '%s' - unsupported characters in name",
+                        entry.name,
+                    )
+                else:
+                    logger.debug("Skipping '%s' due to OS error: %s", entry.name, err)
+                continue
 
     if sort:
         return sorted(
index cdbd033eb942c9eedc811ce4ccc9f786bf15ca31..28672f835ef81f254137d39b08b84bc589a79916 100644 (file)
@@ -321,11 +321,13 @@ class SMBFileSystemProvider(LocalFileSystemProvider):
         cache_mode = str(self.config.get_value(CONF_CACHE_MODE) or "loose")
         options.append(f"cache={cache_mode}")
 
-        # Case insensitive by default (standard for SMB) and other performance options
-        # Note: iocharset is omitted to allow CIFS native Unicode handling for emoji
-        # and other 4-byte UTF-8 characters.
+        # Case insensitive by default (standard for SMB) and other performance options.
+        # Note: emoji and other 4-byte UTF-8 characters (U+10000+) in folder/file names
+        # are NOT supported due to a Linux kernel limitation in the CIFS client's NLS layer.
+        # Items with such characters will be skipped during library sync.
         options.extend(
             [
+                "iocharset=utf8",
                 "nocase",
                 "file_mode=0755",
                 "dir_mode=0755",