From 119d7a9905f6c6e240efbe9b7e319aba4fdec34c Mon Sep 17 00:00:00 2001 From: Marcel van der Veldt Date: Tue, 10 May 2022 21:46:40 +0200 Subject: [PATCH] Improve filesystem provider (#301) * address issues in filesystem provider * fix get artist tracks * add support for embedded images * fix artist albums --- music_assistant/controllers/music/albums.py | 1 + music_assistant/helpers/cache.py | 19 +- music_assistant/helpers/database.py | 27 +- music_assistant/models/media_items.py | 1 + music_assistant/providers/filesystem.py | 283 ++++++++++++-------- 5 files changed, 180 insertions(+), 151 deletions(-) diff --git a/music_assistant/controllers/music/albums.py b/music_assistant/controllers/music/albums.py index 188a4a93..827da2e7 100644 --- a/music_assistant/controllers/music/albums.py +++ b/music_assistant/controllers/music/albums.py @@ -118,6 +118,7 @@ class AlbumsController(MediaControllerBase[Album]): return await self.update_db_item(cur_item.item_id, album) # insert new album + assert album.artist if album.artist.musicbrainz_id and album.artist.provider != "database": album_artist = await self.mass.music.artists.add_db_item(album.artist) else: diff --git a/music_assistant/helpers/cache.py b/music_assistant/helpers/cache.py index fff87ef1..53284e21 100644 --- a/music_assistant/helpers/cache.py +++ b/music_assistant/helpers/cache.py @@ -6,10 +6,9 @@ import functools import json import time +from music_assistant.helpers.database import TABLE_CACHE from music_assistant.helpers.typing import MusicAssistant -DB_TABLE = "cache" - class Cache: """Basic cache using both memory and database.""" @@ -22,12 +21,6 @@ class Cache: async def setup(self) -> None: """Async initialize of cache module.""" - # prepare database - async with self.mass.database.get_db() as _db: - await _db.execute( - f"""CREATE TABLE IF NOT EXISTS {DB_TABLE}( - key TEXT UNIQUE, expires INTEGER, data TEXT, checksum INTEGER)""" - ) self.__schedule_cleanup_task() async def get(self, cache_key, checksum="", default=None): @@ -50,7 +43,7 @@ class Cache: ): return cache_data[0] # fall back to db cache - if db_row := await self.mass.database.get_row(DB_TABLE, {"key": cache_key}): + if db_row := await self.mass.database.get_row(TABLE_CACHE, {"key": cache_key}): if ( not checksum or db_row["checksum"] == checksum @@ -80,26 +73,26 @@ class Cache: checksum = self._get_checksum(checksum) expires = int(time.time() + expiration) self._mem_cache[cache_key] = (data, checksum, expires) - if (time.time() - expires) < 3600 * 4: + if (expires - time.time()) < 3600 * 4: # do not cache items in db with short expiration return data = await asyncio.get_running_loop().run_in_executor(None, json.dumps, data) await self.mass.database.insert_or_replace( - DB_TABLE, + TABLE_CACHE, {"key": cache_key, "expires": expires, "checksum": checksum, "data": data}, ) async def delete(self, cache_key): """Delete data from cache.""" self._mem_cache.pop(cache_key, None) - await self.mass.database.delete(DB_TABLE, {"key": cache_key}) + await self.mass.database.delete(TABLE_CACHE, {"key": cache_key}) async def auto_cleanup(self): """Sceduled auto cleanup task.""" # for now we simply reset the memory cache self._mem_cache = {} cur_timestamp = int(time.time()) - for db_row in await self.mass.database.get_rows(DB_TABLE): + for db_row in await self.mass.database.get_rows(TABLE_CACHE): # clean up db cache object only if expired if db_row["expires"] < cur_timestamp: await self.delete(db_row["key"]) diff --git a/music_assistant/helpers/database.py b/music_assistant/helpers/database.py index 7c66ed72..1c730ab2 100755 --- a/music_assistant/helpers/database.py +++ b/music_assistant/helpers/database.py @@ -11,7 +11,7 @@ from music_assistant.helpers.typing import MusicAssistant # pylint: disable=invalid-name -SCHEMA_VERSION = 6 +SCHEMA_VERSION = 7 TABLE_PROV_MAPPINGS = "provider_mappings" TABLE_TRACK_LOUDNESS = "track_loudness" @@ -185,33 +185,18 @@ class Database: # always create db tables if they don't exist to prevent errors trying to access them later await self.__create_database_tables(db) - if prev_version < 4: - # schema version 3: too many breaking changes, rebuild db + if prev_version < 7: + # refactored file provider, start clean just in case. + await db.execute("DROP TABLE IF EXISTS filesystem_mappings") await db.execute(f"DROP TABLE IF EXISTS {TABLE_ARTISTS}") await db.execute(f"DROP TABLE IF EXISTS {TABLE_ALBUMS}") await db.execute(f"DROP TABLE IF EXISTS {TABLE_TRACKS}") await db.execute(f"DROP TABLE IF EXISTS {TABLE_PLAYLISTS}") await db.execute(f"DROP TABLE IF EXISTS {TABLE_RADIOS}") await db.execute(f"DROP TABLE IF EXISTS {TABLE_PROV_MAPPINGS}") - await db.execute(f"DROP TABLE IF EXISTS {TABLE_CACHE}") # recreate missing tables await self.__create_database_tables(db) - if prev_version < 5: - # delete player_settings table: use generic settings table instead - await db.execute("DROP TABLE IF EXISTS queue_settings") - # recreate table - await self.__create_database_tables(db) - - if prev_version < 6: - # recreate radio items due to some changes - await db.execute(f"DROP TABLE IF EXISTS {TABLE_RADIOS}") - # recreate table - await self.__create_database_tables(db) - match = {"media_type": "radio"} - if await self.get_count(TABLE_PROV_MAPPINGS, match): - await self.delete(TABLE_PROV_MAPPINGS, match, db=db) - # store current schema version await self.set_setting("version", str(SCHEMA_VERSION), db=db) @@ -311,3 +296,7 @@ class Database: provider_ids json );""" ) + await db.execute( + f"""CREATE TABLE IF NOT EXISTS {TABLE_CACHE}( + key TEXT UNIQUE, expires INTEGER, data TEXT, checksum INTEGER)""" + ) diff --git a/music_assistant/models/media_items.py b/music_assistant/models/media_items.py index 2fe9b9d8..58236035 100755 --- a/music_assistant/models/media_items.py +++ b/music_assistant/models/media_items.py @@ -96,6 +96,7 @@ class ImageType(Enum): CUTOUT = "cutout" BACK = "back" CDART = "cdart" + EMBEDDED_THUMB = "embedded_thumb" OTHER = "other" diff --git a/music_assistant/providers/filesystem.py b/music_assistant/providers/filesystem.py index 9a819f55..e943a75a 100644 --- a/music_assistant/providers/filesystem.py +++ b/music_assistant/providers/filesystem.py @@ -1,11 +1,12 @@ """Filesystem musicprovider support for MusicAssistant.""" from __future__ import annotations +import base64 import os from typing import List, Optional, Tuple import aiofiles -from tinytag import TinyTag +from tinytag.tinytag import TinyTag from music_assistant.helpers.compare import compare_strings from music_assistant.helpers.util import parse_title_and_version, try_parse_int @@ -15,6 +16,8 @@ from music_assistant.models.media_items import ( AlbumType, Artist, ContentType, + ImageType, + MediaItemImage, MediaItemProviderId, MediaItemType, MediaQuality, @@ -27,17 +30,20 @@ from music_assistant.models.media_items import ( from music_assistant.models.provider import MusicProvider -def split_items(org_str: str) -> Tuple[str]: - """Split up a tag string by common splitter.""" +def split_items(org_str: str, splitters: Tuple[str] = None) -> Tuple[str]: + """Split up a tags string by common splitter.""" + if splitters is None: + splitters = ("/", ";", ",") if org_str is None: return tuple() - for splitter in ["/", ";", ","]: + for splitter in splitters: if splitter in org_str: return tuple((x.strip() for x in org_str.split(splitter))) return (org_str,) -DB_TABLE = "filesystem_mappings" +FALLBACK_ARTIST = "Various Artists" +ARTIST_SPLITTERS = (";", ",", "Featuring", " Feat. ", " Feat ", "feat.", " & ") class FileSystemProvider(MusicProvider): @@ -80,16 +86,6 @@ class FileSystemProvider(MusicProvider): raise FileNotFoundError( f"Playlist Directory {self._playlists_dir} does not exist" ) - # simple db table to keep a mapping of filename to id - async with self.mass.database.get_db() as _db: - await _db.execute( - f"""CREATE TABLE IF NOT EXISTS {DB_TABLE}( - item_id INTEGER PRIMARY KEY AUTOINCREMENT, - filename TEXT NOT NULL, - media_type TEXT NOT NULL, - UNIQUE(filename, media_type) - );""" - ) async def search( self, search_query: str, media_types=Optional[List[MediaType]], limit: int = 5 @@ -121,31 +117,47 @@ class FileSystemProvider(MusicProvider): """Retrieve all library artists.""" result = [] cur_ids = set() - for track in await self.get_library_tracks(False): - if track.album is not None and track.album.artist is not None: - if track.album.artist.item_id not in cur_ids: - result.append(track.album.artist) - cur_ids.add(track.album.artist.item_id) + # for the sake of simplicity we only iterate over the files in one location only, + # which is the library tracks where we recursively enumerate the directory structure + # library artists = unique album artists across all tracks + # the track listing is cached so this should be (pretty) fast + for track in await self.get_library_tracks(True): + if track.album is None or track.album is None: + continue + if track.album.artist.item_id in cur_ids: + continue + result.append(track.album.artist) + cur_ids.add(track.album.artist.item_id) return result async def get_library_albums(self) -> List[Album]: """Get album folders recursively.""" result = [] cur_ids = set() - for track in await self.get_library_tracks(False): - if track.album is not None: - if track.album.item_id not in cur_ids: - result.append(track.album) - cur_ids.add(track.album.item_id) + # for the sake of simplicity we only iterate over the files in one location only, + # which is the library tracks where we recurisvely enumerate the directory structure + # library albums = unique albums across all tracks + # the track listing is cached so this should be (pretty) fast + for track in await self.get_library_tracks(True): + if track.album is None: + continue + if track.album.item_id in cur_ids: + continue + result.append(track.album) + cur_ids.add(track.album.item_id) return result async def get_library_tracks(self, allow_cache=False) -> List[Track]: """Get all tracks recursively.""" # pylint: disable = arguments-differ + # we cache this listing in memory for performance and convenience reasons + # so we can easy retrieve the library artists and albums from the tracks listing + # if this may ever lead to memory issues, we can do the caching in db instead. if allow_cache and self._cached_tracks: return self._cached_tracks result = [] cur_ids = set() + # find all music files in the music directory and all subfolders for _root, _dirs, _files in os.walk(self._music_dir): for file in _files: filename = os.path.join(_root, file) @@ -207,22 +219,14 @@ class FileSystemProvider(MusicProvider): async def get_track(self, prov_track_id: str) -> Track: """Get full track details by id.""" - if os.sep in prov_track_id: - # this is already a filename - itempath = prov_track_id - else: - itempath = await self._get_filename(prov_track_id, MediaType.TRACK) + itempath = self._get_filename(prov_track_id) if not os.path.isfile(itempath): raise MediaNotFoundError(f"Track path does not exist: {itempath}") return await self._parse_track(itempath) async def get_playlist(self, prov_playlist_id: str) -> Playlist: """Get full playlist details by id.""" - if os.sep in prov_playlist_id: - # this is already a filename - itempath = prov_playlist_id - else: - itempath = await self._get_filename(prov_playlist_id, MediaType.PLAYLIST) + itempath = self._get_filename(prov_playlist_id) if not os.path.isfile(itempath): raise MediaNotFoundError(f"playlist path does not exist: {itempath}") return await self._parse_playlist(itempath) @@ -238,11 +242,7 @@ class FileSystemProvider(MusicProvider): async def get_playlist_tracks(self, prov_playlist_id: str) -> List[Track]: """Get playlist tracks for given playlist id.""" result = [] - if os.sep in prov_playlist_id: - # this is already a filename - itempath = prov_playlist_id - else: - itempath = await self._get_filename(prov_playlist_id, MediaType.PLAYLIST) + itempath = self._get_filename(prov_playlist_id) if not os.path.isfile(itempath): raise MediaNotFoundError(f"playlist path does not exist: {itempath}") index = 0 @@ -257,13 +257,20 @@ class FileSystemProvider(MusicProvider): async def get_artist_albums(self, prov_artist_id: str) -> List[Album]: """Get a list of albums for the given artist.""" - return [ - track.album - for track in await self.get_library_tracks(True) - if track.album is not None - and track.album.artist is not None - and track.album.artist.item_id == prov_artist_id - ] + result = [] + cur_ids = set() + for track in await self.get_library_tracks(True): + if track.album is None: + continue + if track.album.item_id in cur_ids: + continue + if track.album.artist is None: + continue + if track.album.artist.item_id != prov_artist_id: + continue + result.append(track.album) + cur_ids.add(track.album.item_id) + return result async def get_artist_toptracks(self, prov_artist_id: str) -> List[Track]: """Get a list of all tracks as we have no clue about preference.""" @@ -271,23 +278,19 @@ class FileSystemProvider(MusicProvider): track for track in await self.get_library_tracks(True) if track.artists is not None - and prov_artist_id in (x.item_id for x in track.provider_ids) + and prov_artist_id in (x.item_id for x in track.artists) ] async def get_stream_details(self, item_id: str) -> StreamDetails: """Return the content details for the given track when it will be streamed.""" - if os.sep in item_id: - # this is already a filename - itempath = item_id - else: - itempath = await self._get_filename(item_id, MediaType.TRACK) + itempath = self._get_filename(item_id) if not os.path.isfile(itempath): raise MediaNotFoundError(f"Track path does not exist: {itempath}") def parse_tag(): return TinyTag.get(itempath) - tag = await self.mass.loop.run_in_executor(None, parse_tag) + tags = await self.mass.loop.run_in_executor(None, parse_tag) return StreamDetails( type=StreamType.FILE, @@ -295,92 +298,143 @@ class FileSystemProvider(MusicProvider): item_id=item_id, content_type=ContentType(itempath.split(".")[-1]), path=itempath, - sample_rate=tag.samplerate or 44100, + sample_rate=tags.samplerate or 44100, bit_depth=16, # TODO: parse bitdepth ) + async def get_embedded_image(self, filename: str) -> str | None: + """Return the embedded image of an audio file as base64 string.""" + if not TinyTag.is_supported(filename): + return None + + def parse_tags(): + return TinyTag.get(filename, tags=True, image=True, ignore_errors=True) + + tags = await self.mass.loop.run_in_executor(None, parse_tags) + if image_data := tags.get_image(): + enc_image = base64.b64encode(image_data).decode() + enc_image = f"data:image/png;base64,{enc_image}" + return enc_image + async def _parse_track(self, filename: str) -> Track | None: """Try to parse a track from a filename by reading its tags.""" if not TinyTag.is_supported(filename): return None - def parse_tag(): - return TinyTag.get(filename) + def parse_tags(): + return TinyTag.get(filename, image=True, ignore_errors=True) - # TODO: Fall back to parsing base details from filename if no tags found/supported - tag = await self.mass.loop.run_in_executor(None, parse_tag) + # parse ID3 tags with TinyTag + tags = await self.mass.loop.run_in_executor(None, parse_tags) - # we need at least a title and artist - if tag.title is None or tag.artist is None: - self.logger.warning("Skipping track due to invalid ID3 tags: %s", filename) - return None + # use the relative filename as item_id + filename_base = filename.replace(self._music_dir, "") + if filename_base.startswith(os.sep): + filename_base = filename_base[1:] + prov_item_id = filename_base - prov_item_id = await self._get_item_id(filename, MediaType.TRACK) - name, version = parse_title_and_version(tag.title) + # work out if we have an artist/album/track.ext structure + filename_base = filename.replace(self._music_dir, "") + if filename_base.startswith(os.sep): + filename_base = filename_base[1:] + track_parts = filename_base.rsplit(os.sep) + if track_parts == 3: + album_artist_name = track_parts[0] + album_name = track_parts[1] + album_artist_name = tags.albumartist + album_name = tags.album + + # prefer title from tag, fallback to filename + if tags.title: + track_title = tags.title + else: + ext = filename_base.split(".")[-1] + track_title = filename_base.replace(f".{ext}", "").replace("_", " ") + self.logger.warning( + "%s is missing ID3 tags, use filename as fallback", filename_base + ) + + name, version = parse_title_and_version(track_title) track = Track( item_id=prov_item_id, provider=self.id, name=name, version=version ) - track.duration = tag.duration - # parse track artists + + # Parse track artist(s) from artist string using common splitters used in ID3 tags + # NOTE: do not use a '/' or '&' to prevent artists like AC/DC become messed up + track_artists_str = tags.artist or album_artist_name or FALLBACK_ARTIST track.artists = [ Artist( item_id=item, provider=self._attr_id, name=item, ) - for item in split_items(tag.artist) + for item in split_items(track_artists_str, ARTIST_SPLITTERS) ] - # parse album - if tag.album is not None: + # Check if track has embedded metadata + if tags.get_image(): + # we do not actually embed the image in the metadata because that would consume too + # much space and bandwidth. Instead we set the filename as value so the image can + # be retrieved later in realtime. + track.metadata.images = {MediaItemImage(ImageType.EMBEDDED_THUMB, filename)} + + # Parse album (only if we have album + album artist tags) + if album_name and album_artist_name: + album_id = album_name + album_name, album_version = parse_title_and_version(album_name) track.album = Album( - item_id=tag.album, + item_id=album_id, provider=self._attr_id, - name=tag.album, - year=try_parse_int(tag.year), - ) - if tag.albumartist is not None: - track.album.artist = Artist( - item_id=tag.albumartist, + name=album_name, + version=album_version, + year=try_parse_int(tags.year) if tags.year else None, + artist=Artist( + item_id=album_artist_name, provider=self._attr_id, - name=tag.albumartist, - ) - if tag.title.lower().startswith(tag.album.lower()): + name=album_artist_name, + ), + ) + track.album.metadata.images = track.metadata.images + + # try to guess the album type + if name.lower() == album_name.lower(): track.album.album_type = AlbumType.SINGLE - elif tag.albumartist not in split_items(tag.artist): + elif album_artist_name not in (x.name for x in track.artists): track.album.album_type = AlbumType.COMPILATION else: track.album.album_type = AlbumType.ALBUM + # parse other info - track.metadata.genres = set(split_items(tag.genre)) - track.disc_number = try_parse_int(tag.disc) - track.track_number = try_parse_int(tag.track) - track.isrc = tag.extra.get("isrc", "") - if "copyright" in tag.extra: - track.metadata.copyright = tag.extra["copyright"] - if "lyrics" in tag.extra: - track.metadata.lyrics = tag.extra["lyrics"] + track.duration = tags.duration + track.metadata.genres = set(split_items(tags.genre)) + track.disc_number = try_parse_int(tags.disc) + track.track_number = try_parse_int(tags.track) + track.isrc = tags.extra.get("isrc", "") + if "copyright" in tags.extra: + track.metadata.copyright = tags.extra["copyright"] + if "lyrics" in tags.extra: + track.metadata.lyrics = tags.extra["lyrics"] quality_details = "" if filename.endswith(".flac"): # TODO: get bit depth quality = MediaQuality.FLAC_LOSSLESS - if tag.samplerate > 192000: + if tags.samplerate > 192000: quality = MediaQuality.FLAC_LOSSLESS_HI_RES_4 - elif tag.samplerate > 96000: + elif tags.samplerate > 96000: quality = MediaQuality.FLAC_LOSSLESS_HI_RES_3 - elif tag.samplerate > 48000: + elif tags.samplerate > 48000: quality = MediaQuality.FLAC_LOSSLESS_HI_RES_2 - quality_details = f"{tag.samplerate / 1000} Khz" + quality_details = f"{tags.samplerate / 1000} Khz" elif filename.endswith(".ogg"): quality = MediaQuality.LOSSY_OGG - quality_details = f"{tag.bitrate} kbps" + quality_details = f"{tags.bitrate} kbps" elif filename.endswith(".m4a"): quality = MediaQuality.LOSSY_AAC - quality_details = f"{tag.bitrate} kbps" + quality_details = f"{tags.bitrate} kbps" else: quality = MediaQuality.LOSSY_MP3 - quality_details = f"{tag.bitrate} kbps" + quality_details = f"{tags.bitrate} kbps" track.add_provider_id( MediaItemProviderId( provider=self.id, @@ -394,8 +448,14 @@ class FileSystemProvider(MusicProvider): async def _parse_playlist(self, filename: str) -> Playlist | None: """Parse playlist from file.""" + # use the relative filename as item_id + filename_base = filename.replace(self._music_dir, "") + if filename_base.startswith(os.sep): + filename_base = filename_base[1:] + prov_item_id = filename_base + name = filename.split(os.sep)[-1].replace(".m3u", "") - prov_item_id = await self._get_item_id(filename, MediaType.PLAYLIST) + playlist = Playlist(prov_item_id, provider=self.id, name=name) playlist.is_editable = True playlist.add_provider_id( @@ -422,25 +482,10 @@ class FileSystemProvider(MusicProvider): except MediaNotFoundError: return None - async def _get_item_id(self, filename: str, media_type: MediaType) -> str: - """Get/create item ID for given filename.""" - # we store the relative path in db - filename_base = filename.replace(self._music_dir, "") - if filename_base.startswith(os.sep): - filename_base = filename_base[1:] - match = {"filename": filename_base, "media_type": media_type.value} - if db_row := await self.mass.database.get_row(DB_TABLE, match): - return str(db_row["item_id"]) - # filename not yet known in db, create new record - db_row = await self.mass.database.insert_or_replace(DB_TABLE, match) - return str(db_row["item_id"]) - - async def _get_filename(self, item_id: str, media_type: MediaType) -> str: - """Get/create ID for given filename.""" - match = {"item_id": int(item_id), "media_type": media_type.value} - db_row = await self.mass.database.get_row(DB_TABLE, match) - if not db_row: - raise MediaNotFoundError(f"Item not found: {item_id}") - if media_type == MediaType.PLAYLIST: - return os.path.join(self._playlists_dir, db_row["filename"]) - return os.path.join(self._music_dir, db_row["filename"]) + def _get_filename(self, item_id: str, playlist: bool = False) -> str: + """Get filename for item_id.""" + if self._music_dir in item_id: + return item_id + if playlist: + return os.path.join(self._playlists_dir, item_id) + return os.path.join(self._music_dir, item_id) -- 2.34.1