From ba13942f5e033f5a1809d0e9fb98f2905158b85d Mon Sep 17 00:00:00 2001 From: Marcel van der Veldt Date: Mon, 29 Apr 2024 21:22:57 +0200 Subject: [PATCH] Fix cleanup on removal of file/provider/item (#1261) --- .../server/controllers/media/albums.py | 2 + .../server/controllers/media/artists.py | 6 +- .../server/controllers/media/base.py | 24 +++++++- .../server/controllers/media/tracks.py | 2 + music_assistant/server/controllers/music.py | 26 +++++++-- music_assistant/server/helpers/database.py | 8 ++- .../server/providers/filesystem_local/base.py | 57 +++++-------------- 7 files changed, 67 insertions(+), 58 deletions(-) diff --git a/music_assistant/server/controllers/media/albums.py b/music_assistant/server/controllers/media/albums.py index 0e363907..58ac45df 100644 --- a/music_assistant/server/controllers/media/albums.py +++ b/music_assistant/server/controllers/media/albums.py @@ -268,6 +268,8 @@ class AlbumsController(MediaControllerBase[Album]): await self.mass.music.tracks.remove_item_from_library(db_track.item_id) # delete entry(s) from albumtracks table await self.mass.music.database.delete(DB_TABLE_ALBUM_TRACKS, {"album_id": db_id}) + # delete entry(s) from album artists table + await self.mass.music.database.delete(DB_TABLE_ALBUM_ARTISTS, {"album_id": db_id}) # delete the album itself from db await super().remove_item_from_library(item_id) diff --git a/music_assistant/server/controllers/media/artists.py b/music_assistant/server/controllers/media/artists.py index e130d277..d7f67cd9 100644 --- a/music_assistant/server/controllers/media/artists.py +++ b/music_assistant/server/controllers/media/artists.py @@ -277,15 +277,15 @@ class ArtistsController(MediaControllerBase[Artist]): limit=5000, ): with contextlib.suppress(MediaNotFoundError): - await self.mass.music.albums.remove_item_from_library(db_row["item_id"]) + await self.mass.music.albums.remove_item_from_library(db_row["album_id"]) # recursively also remove artist tracks for db_row in await self.mass.music.database.get_rows_from_query( - f"SELECT track_id FROM {DB_TABLE_TRACKS} WHERE artist_id = {db_id}", + f"SELECT track_id FROM {DB_TABLE_TRACK_ARTISTS} WHERE artist_id = {db_id}", limit=5000, ): with contextlib.suppress(MediaNotFoundError): - await self.mass.music.tracks.remove_item_from_library(db_row["item_id"]) + await self.mass.music.tracks.remove_item_from_library(db_row["track_id"]) # delete the artist itself from db await super().remove_item_from_library(db_id) diff --git a/music_assistant/server/controllers/media/base.py b/music_assistant/server/controllers/media/base.py index 1ec25feb..0cf80e20 100644 --- a/music_assistant/server/controllers/media/base.py +++ b/music_assistant/server/controllers/media/base.py @@ -75,6 +75,11 @@ class MediaControllerBase(Generic[ItemCls], metaclass=ABCMeta): self.db_table, {"item_id": db_id}, ) + # update provider_mappings table + await self.mass.music.database.delete( + DB_TABLE_PROVIDER_MAPPINGS, + {"media_type": self.media_type.value, "item_id": db_id}, + ) # NOTE: this does not delete any references to this item in other records, # this is handled/overridden in the mediatype specific controllers self.mass.signal_event(EventType.MEDIA_ITEM_DELETED, library_item.uri, library_item) @@ -497,7 +502,13 @@ class MediaControllerBase(Generic[ItemCls], metaclass=ABCMeta): }, ) if library_item.provider_mappings: - await self._set_provider_mappings(db_id, library_item.provider_mappings) + # we (temporary?) duplicate the provider mappings in a separate column of the media + # item's table, because the json_group_array query is superslow + await self.mass.music.database.update( + self.db_table, + {"item_id": db_id}, + {"provider_mappings": serialize_to_json(library_item.provider_mappings)}, + ) self.logger.debug( "removed provider_mapping %s/%s from item id %s", provider_instance_id, @@ -517,7 +528,7 @@ class MediaControllerBase(Generic[ItemCls], metaclass=ABCMeta): library_item = await self.get_library_item(db_id) except MediaNotFoundError: # edge case: already deleted / race condition - return + library_item = None # update provider_mappings table await self.mass.music.database.delete( DB_TABLE_PROVIDER_MAPPINGS, @@ -527,11 +538,20 @@ class MediaControllerBase(Generic[ItemCls], metaclass=ABCMeta): "provider_instance": provider_instance_id, }, ) + if library_item is None: + return # update the item's provider mappings (and check if we still have any) library_item.provider_mappings = { x for x in library_item.provider_mappings if x.provider_instance != provider_instance_id } if library_item.provider_mappings: + # we (temporary?) duplicate the provider mappings in a separate column of the media + # item's table, because the json_group_array query is superslow + await self.mass.music.database.update( + self.db_table, + {"item_id": db_id}, + {"provider_mappings": serialize_to_json(library_item.provider_mappings)}, + ) self.logger.debug( "removed all provider mappings for provider %s from item id %s", provider_instance_id, diff --git a/music_assistant/server/controllers/media/tracks.py b/music_assistant/server/controllers/media/tracks.py index 93ede994..f7737550 100644 --- a/music_assistant/server/controllers/media/tracks.py +++ b/music_assistant/server/controllers/media/tracks.py @@ -339,6 +339,8 @@ class TracksController(MediaControllerBase[Track]): db_id = int(item_id) # ensure integer # delete entry(s) from albumtracks table await self.mass.music.database.delete(DB_TABLE_ALBUM_TRACKS, {"track_id": db_id}) + # delete entry(s) from trackartists table + await self.mass.music.database.delete(DB_TABLE_TRACK_ARTISTS, {"track_id": db_id}) # delete the track itself from db await super().remove_item_from_library(db_id) diff --git a/music_assistant/server/controllers/music.py b/music_assistant/server/controllers/music.py index 7110a833..433a3431 100644 --- a/music_assistant/server/controllers/music.py +++ b/music_assistant/server/controllers/music.py @@ -700,24 +700,38 @@ class MusicController(CoreController): self.mass.music.tracks, self.mass.music.albums, self.mass.music.artists, + # run main controllers twice to rule out relations + self.mass.music.tracks, + self.mass.music.albums, + self.mass.music.artists, ): - prov_items = await ctrl.get_library_items_by_prov_id( - provider_instance=provider_instance + query = ( + f"SELECT item_id FROM {DB_TABLE_PROVIDER_MAPPINGS} " + f"WHERE media_type = '{ctrl.media_type}' " + f"AND provider_instance = '{provider_instance}'" ) - for item in prov_items: + for db_row in await self.database.get_rows_from_query(query, limit=100000): try: - await ctrl.remove_provider_mappings(item.item_id, provider_instance) + await ctrl.remove_provider_mappings(db_row["item_id"], provider_instance) except Exception as err: # we dont want the whole removal process to stall on one item # so in case of an unexpected error, we log and move on. self.logger.warning( "Error while removing %s: %s", - item.uri, + db_row["item_id"], str(err), exc_info=err if self.logger.isEnabledFor(logging.DEBUG) else None, ) errors += 1 + # remove all orphaned items (not in provider mappings table anymore) + query = ( + f"SELECT item_id FROM {DB_TABLE_PROVIDER_MAPPINGS} " + f"WHERE provider_instance = '{provider_instance}'" + ) + if remaining_items_count := await self.database.get_count_from_query(query): + errors += remaining_items_count + if errors == 0: # cleanup successful, remove from the deleted_providers setting self.logger.info("Provider %s removed from library", provider_instance) @@ -955,7 +969,7 @@ class MusicController(CoreController): [available] BOOLEAN DEFAULT 1, [url] text, [audio_format] json, - [details] json, + [details] TEXT, UNIQUE(media_type, provider_instance, provider_item_id) );""" ) diff --git a/music_assistant/server/helpers/database.py b/music_assistant/server/helpers/database.py index 6414522d..3dc3ee04 100644 --- a/music_assistant/server/helpers/database.py +++ b/music_assistant/server/helpers/database.py @@ -19,7 +19,7 @@ if TYPE_CHECKING: LOGGER = logging.getLogger(f"{MASS_LOGGER_NAME}.database") -ENABLE_DEBUG = bool(os.environ.get("PYTHONDEVMODE", "0")) +ENABLE_DEBUG = os.environ.get("PYTHONDEVMODE") == "1" @asynccontextmanager @@ -96,7 +96,8 @@ class DatabaseConnection: sql_query += " WHERE " + " AND ".join(f"{x} = :{x}" for x in match) if order_by is not None: sql_query += f" ORDER BY {order_by}" - sql_query += f" LIMIT {limit} OFFSET {offset}" + if limit: + sql_query += f" LIMIT {limit} OFFSET {offset}" async with debug_query(sql_query): return await self._db.execute_fetchall(sql_query, match) @@ -108,7 +109,8 @@ class DatabaseConnection: offset: int = 0, ) -> list[Mapping]: """Get all rows for given custom query.""" - query = f"{query} LIMIT {limit} OFFSET {offset}" + if limit: + query += f" LIMIT {limit} OFFSET {offset}" _query, _params = query_params(query, params) async with debug_query(_query): return await self._db.execute_fetchall(_query, _params) diff --git a/music_assistant/server/providers/filesystem_local/base.py b/music_assistant/server/providers/filesystem_local/base.py index f4542b5c..d90e4dde 100644 --- a/music_assistant/server/providers/filesystem_local/base.py +++ b/music_assistant/server/providers/filesystem_local/base.py @@ -10,11 +10,9 @@ from abc import abstractmethod from dataclasses import dataclass from typing import TYPE_CHECKING -import aiofiles import cchardet import xmltodict -from music_assistant.common.helpers.json import JSON_DECODE_EXCEPTIONS, json_dumps, json_loads from music_assistant.common.helpers.util import parse_title_and_version from music_assistant.common.models.config_entries import ( ConfigEntry, @@ -44,7 +42,7 @@ from music_assistant.common.models.media_items import ( Track, ) from music_assistant.common.models.streamdetails import StreamDetails -from music_assistant.constants import VARIOUS_ARTISTS_NAME +from music_assistant.constants import DB_TABLE_PROVIDER_MAPPINGS, VARIOUS_ARTISTS_NAME from music_assistant.server.controllers.cache import use_cache from music_assistant.server.controllers.music import DB_SCHEMA_VERSION from music_assistant.server.helpers.compare import compare_strings @@ -152,8 +150,6 @@ class FileSystemProviderBase(MusicProvider): """ write_access: bool = False - checksums_file: str - file_checksums: dict[str, int] @property def supported_features(self) -> tuple[ProviderFeature, ...]: @@ -166,24 +162,6 @@ class FileSystemProviderBase(MusicProvider): ) return SUPPORTED_FEATURES - async def loaded_in_mass(self) -> None: - """Call after the provider has been loaded.""" - # load the checksums from disk and store in memory - self.checksums_file = os.path.join(self.mass.storage_path, f"{self.instance_id}.json") - self.file_checksums = {} - if await asyncio.to_thread(os.path.isfile, self.checksums_file): - try: - async with aiofiles.open(self.checksums_file, "r", encoding="utf-8") as _file: - self.file_checksums = json_loads(await _file.read()) - self.logger.debug("Loaded persistent checksums from %s", self.checksums_file) - return - except FileNotFoundError: - pass - except JSON_DECODE_EXCEPTIONS: # pylint: disable=catching-non-exception - self.logger.exception( - "Error while reading persistent checksums file %s", self.checksums_file - ) - @abstractmethod async def listdir( self, path: str, recursive: bool = False @@ -346,10 +324,18 @@ class FileSystemProviderBase(MusicProvider): async def sync_library(self, media_types: tuple[MediaType, ...]) -> None: """Run library sync for this provider.""" + file_checksums: dict[str, str] = {} + query = ( + f"SELECT provider_item_id, details FROM {DB_TABLE_PROVIDER_MAPPINGS} " + f"WHERE provider_instance = '{self.instance_id}' " + "AND media_type in ('track', 'playlist')" + ) + for db_row in await self.mass.music.database.get_rows_from_query(query, limit=0): + file_checksums[db_row["provider_item_id"]] = str(db_row["details"]) # find all music files in the music directory and all subfolders # we work bottom up, as-in we derive all info from the tracks cur_filenames = set() - prev_filenames = set(self.file_checksums.keys()) + prev_filenames = set(file_checksums.keys()) async for item in self.listdir("", recursive=True): if "." not in item.filename or not item.ext: # skip system files and files without extension @@ -362,7 +348,7 @@ class FileSystemProviderBase(MusicProvider): cur_filenames.add(item.path) try: # continue if the item did not change (checksum still the same) - if item.checksum == self.file_checksums.get(item.path): + if item.checksum == file_checksums.get(item.path): continue self.logger.debug("Processing: %s", item.path) if item.ext in TRACK_EXTENSIONS: @@ -390,13 +376,7 @@ class FileSystemProviderBase(MusicProvider): str(err), exc_info=err if self.logger.isEnabledFor(logging.DEBUG) else None, ) - else: - self.file_checksums[item.path] = item.checksum - # save the checksums every 500 items to speed up scan restarts - if len(cur_filenames) % 500 == 0: - await self._async_save_checksums() - await self._async_save_checksums() # work out deletions deleted_files = prev_filenames - cur_filenames await self._process_deletions(deleted_files) @@ -492,6 +472,7 @@ class FileSystemProviderBase(MusicProvider): item_id=file_item.path, provider_domain=self.domain, provider_instance=self.instance_id, + details=file_item.checksum, ) }, ) @@ -714,6 +695,7 @@ class FileSystemProviderBase(MusicProvider): channels=tags.channels, bit_rate=tags.bit_rate, ), + details=file_item.checksum, ) }, disc_number=tags.disc, @@ -1066,16 +1048,3 @@ class FileSystemProviderBase(MusicProvider): ) break return images - - async def _async_save_checksums(self) -> None: - """Save persistent checksums data to disk.""" - filename_backup = f"{self.checksums_file}.backup" - # make backup before we write a new file - if await asyncio.to_thread(os.path.isfile, self.checksums_file): - if await asyncio.to_thread(os.path.isfile, filename_backup): - await asyncio.to_thread(os.remove, filename_backup) - await asyncio.to_thread(os.rename, self.checksums_file, filename_backup) - - async with aiofiles.open(self.checksums_file, "w", encoding="utf-8") as _file: - await _file.write(json_dumps(self.file_checksums, indent=True)) - self.logger.debug("Saved data to persistent storage") -- 2.34.1