From: Marcel van der Veldt <m.vanderveldt@outlook.com>
Date: Sun, 5 Oct 2025 15:05:26 +0000 (+0200)
Subject: Rework audio cache/buffering (#2483)
X-Git-Url: https://git.kitaultman.com/?a=commitdiff_plain;h=3fcff72a3a7730e8b207c8620a6999d2114fe0de;p=music-assistant-server.git

Rework audio cache/buffering (#2483)
---

diff --git a/music_assistant/controllers/player_queues.py b/music_assistant/controllers/player_queues.py
index 4ccc2014..4900aa4b 100644
--- a/music_assistant/controllers/player_queues.py
+++ b/music_assistant/controllers/player_queues.py
@@ -59,7 +59,6 @@ from music_assistant_models.queue_item import QueueItem
 from music_assistant.constants import (
     ATTR_ANNOUNCEMENT_IN_PROGRESS,
     CONF_FLOW_MODE,
-    CONF_SMART_FADES_MODE,
     MASS_LOGO_ONLINE,
     VERBOSE_LOG_LEVEL,
 )
@@ -69,7 +68,6 @@ from music_assistant.helpers.throttle_retry import BYPASS_THROTTLER
 from music_assistant.helpers.util import get_changed_keys, percentage
 from music_assistant.models.core_controller import CoreController
 from music_assistant.models.player import Player, PlayerMedia
-from music_assistant.models.smart_fades import SmartFadesMode
 
 if TYPE_CHECKING:
     from collections.abc import Iterator
@@ -853,9 +851,12 @@ class PlayerQueuesController(CoreController):
                 # all attempts to find a playable item failed
                 raise MediaNotFoundError("No playable item found to start playback")
 
+            flow_mode = queue.flow_mode
+            if queue_item.media_type in (MediaType.RADIO, MediaType.PLUGIN_SOURCE):
+                flow_mode = False
             await self.mass.players.play_media(
                 player_id=queue_id,
-                media=await self.player_media_from_queue_item(queue_item, queue.flow_mode),
+                media=await self.player_media_from_queue_item(queue_item, flow_mode),
             )
             await asyncio.sleep(2)
             self._transitioning_players.discard(queue_id)
@@ -1157,14 +1158,6 @@ class PlayerQueuesController(CoreController):
             fade_in=fade_in,
             prefer_album_loudness=playing_album_tracks,
         )
-        # allow stripping silence from the begin/end of the track if crossfade is enabled
-        # this will allow for (much) smoother crossfades
-        if (
-            await self.mass.config.get_player_config_value(queue_id, CONF_SMART_FADES_MODE)
-            != SmartFadesMode.DISABLED
-        ):
-            queue_item.streamdetails.strip_silence_end = True
-            queue_item.streamdetails.strip_silence_begin = not is_start
 
     def track_loaded_in_buffer(self, queue_id: str, item_id: str) -> None:
         """Call when a player has (started) loading a track in the buffer."""
@@ -1570,7 +1563,6 @@ class PlayerQueuesController(CoreController):
         Preload the streamdetails for the next item in the queue/buffer.
 
         This basically ensures the item is playable and fetches the stream details.
-        If caching is enabled, this will also start filling the stream cache.
         If an error occurs, the item will be skipped and the next item will be loaded.
         """
         queue = self._queues[queue_id]
diff --git a/music_assistant/controllers/streams.py b/music_assistant/controllers/streams.py
index 273fc3f2..f4413515 100644
--- a/music_assistant/controllers/streams.py
+++ b/music_assistant/controllers/streams.py
@@ -221,28 +221,6 @@ class StreamsController(CoreController):
                 category="advanced",
                 required=False,
             ),
-            # ConfigEntry(
-            #     key=CONF_ALLOW_AUDIO_CACHE,
-            #     type=ConfigEntryType.STRING,
-            #     default_value=self.allow_cache_default,
-            #     options=[
-            #         ConfigValueOption("Always", "always"),
-            #         ConfigValueOption("Disabled", "disabled"),
-            #         ConfigValueOption("Auto", "auto"),
-            #     ],
-            #     label="Allow caching of remote/cloudbased audio streams",
-            #     description="To ensure smooth(er) playback as well as fast seeking, "
-            #     "Music Assistant can cache audio streams on disk. \n"
-            #     "On systems with limited diskspace, this can be disabled, "
-            #     "but may result in less smooth playback or slower seeking.\n\n"
-            #     "**Always:** Enforce caching of audio streams at all times "
-            #     "(as long as there is enough free space).\n"
-            #     "**Disabled:** Never cache audio streams.\n"
-            #     "**Auto:** Let Music Assistant decide if caching "
-            #     "should be used on a per-item base.",
-            #     category="advanced",
-            #     required=True,
-            # ),
         )
 
     async def setup(self, config: CoreConfig) -> None:
@@ -413,20 +391,16 @@ class StreamsController(CoreController):
         if request.method != "GET":
             return resp
 
-        # work out pcm format based on output format
-        pcm_format = AudioFormat(
-            content_type=DEFAULT_PCM_FORMAT.content_type,
-            sample_rate=output_format.sample_rate,
-            # always use f32 internally for extra headroom for filters etc
-            bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
-            channels=2,
-        )
-        smart_fades_mode = await self.mass.config.get_player_config_value(
-            queue.queue_id, CONF_SMART_FADES_MODE
-        )
-        standard_crossfade_duration = self.mass.config.get_raw_player_config_value(
-            queue.queue_id, CONF_CROSSFADE_DURATION, 10
-        )
+        if queue_item.media_type != MediaType.TRACK:
+            # no crossfade on non-tracks
+            smart_fades_mode = SmartFadesMode.DISABLED
+        else:
+            smart_fades_mode = await self.mass.config.get_player_config_value(
+                queue.queue_id, CONF_SMART_FADES_MODE
+            )
+            standard_crossfade_duration = self.mass.config.get_raw_player_config_value(
+                queue.queue_id, CONF_CROSSFADE_DURATION, 10
+            )
         if (
             smart_fades_mode != SmartFadesMode.DISABLED
             and PlayerFeature.GAPLESS_PLAYBACK not in queue_player.supported_features
@@ -442,28 +416,43 @@ class StreamsController(CoreController):
             # crossfade is enabled, use special crossfaded single item stream
             # where the crossfade of the next track is present in the stream of
             # a single track. This only works if the player supports gapless playback.
-            audio_input = self.get_queue_item_stream_with_smartfade(
-                queue_item=queue_item,
-                pcm_format=pcm_format,
-                session_id=session_id,
-                smart_fades_mode=smart_fades_mode,
-                standard_crossfade_duration=standard_crossfade_duration,
+            # work out pcm format based on output format
+            pcm_format = AudioFormat(
+                content_type=DEFAULT_PCM_FORMAT.content_type,
+                sample_rate=output_format.sample_rate,
+                # always use f32 internally for extra headroom for filters etc
+                bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
+                channels=2,
+            )
+            audio_input = get_ffmpeg_stream(
+                audio_input=self.get_queue_item_stream_with_smartfade(
+                    queue_item=queue_item,
+                    pcm_format=pcm_format,
+                    session_id=session_id,
+                    smart_fades_mode=smart_fades_mode,
+                    standard_crossfade_duration=standard_crossfade_duration,
+                ),
+                input_format=pcm_format,
+                output_format=output_format,
+                filter_params=get_player_filter_params(
+                    self.mass, queue_player.player_id, pcm_format, output_format
+                ),
             )
         else:
+            # no crossfade, just a regular single item stream
+            # no need to convert to pcm first, request output format directly
             audio_input = self.get_queue_item_stream(
                 queue_item=queue_item,
-                pcm_format=pcm_format,
+                output_format=output_format,
+                filter_params=get_player_filter_params(
+                    self.mass,
+                    queue_player.player_id,
+                    queue_item.streamdetails.audio_format,
+                    output_format,
+                ),
             )
 
-        async for chunk in get_ffmpeg_stream(
-            audio_input=audio_input,
-            input_format=pcm_format,
-            output_format=output_format,
-            filter_params=get_player_filter_params(
-                self.mass, queue_player.player_id, pcm_format, output_format
-            ),
-            chunk_size=get_chunksize(output_format),
-        ):
+        async for chunk in audio_input:
             try:
                 await resp.write(chunk)
             except (BrokenPipeError, ConnectionResetError, ConnectionError):
@@ -788,12 +777,19 @@ class StreamsController(CoreController):
         pcm_sample_size = int(
             pcm_format.sample_rate * (pcm_format.bit_depth / 8) * pcm_format.channels
         )
-        smart_fades_mode = await self.mass.config.get_player_config_value(
-            queue.queue_id, CONF_SMART_FADES_MODE
-        )
-        standard_crossfade_duration = self.mass.config.get_raw_player_config_value(
-            queue.queue_id, CONF_CROSSFADE_DURATION, 10
-        )
+        if start_queue_item.media_type != MediaType.TRACK:
+            # no crossfade on non-tracks
+            # NOTE that we shouldn't be using flow mode for non-tracks at all,
+            # but just to be sure, we specifically disable crossfade here
+            smart_fades_mode = SmartFadesMode.DISABLED
+            standard_crossfade_duration = 0
+        else:
+            smart_fades_mode = await self.mass.config.get_player_config_value(
+                queue.queue_id, CONF_SMART_FADES_MODE
+            )
+            standard_crossfade_duration = self.mass.config.get_raw_player_config_value(
+                queue.queue_id, CONF_CROSSFADE_DURATION, 10
+            )
         self.logger.info(
             "Start Queue Flow stream for Queue %s - crossfade: %s %s",
             queue.display_name,
@@ -840,7 +836,7 @@ class StreamsController(CoreController):
             # handle incoming audio chunks
             async for chunk in self.get_queue_item_stream(
                 queue_track,
-                pcm_format=pcm_format,
+                output_format=pcm_format,
             ):
                 # buffer size needs to be big enough to include the crossfade part
                 req_buffer_size = (
@@ -1040,13 +1036,14 @@ class StreamsController(CoreController):
     async def get_queue_item_stream(
         self,
         queue_item: QueueItem,
-        pcm_format: AudioFormat,
+        output_format: AudioFormat,
+        filter_params: list[str] | None = None,
     ) -> AsyncGenerator[bytes, None]:
-        """Get the audio stream for a single queue item as raw PCM audio."""
+        """Get the audio stream for a single queue item."""
         # collect all arguments for ffmpeg
         streamdetails = queue_item.streamdetails
         assert streamdetails
-        filter_params = []
+        filter_params = filter_params or []
 
         # handle volume normalization
         gain_correct: float | None = None
@@ -1078,21 +1075,11 @@ class StreamsController(CoreController):
             filter_params.append(f"volume={gain_correct}dB")
         streamdetails.volume_normalization_gain_correct = gain_correct
 
-        # if streamdetails.media_type == MediaType.RADIO or not streamdetails.duration:
-        #     # pad some silence before the radio/live stream starts to create some headroom
-        #     # for radio stations (or other live streams) that do not provide any look ahead buffer
-        #     # without this, some radio streams jitter a lot,
-        #     # especially with dynamic normalization,
-        #     # if the stream does not provide a look ahead buffer
-        #     async for silence in get_silence(4, pcm_format):
-        #         yield silence
-        #         del silence
-
         first_chunk_received = False
         async for chunk in get_media_stream(
             self.mass,
             streamdetails=streamdetails,
-            pcm_format=pcm_format,
+            output_format=output_format,
             filter_params=filter_params,
         ):
             if not first_chunk_received:
diff --git a/music_assistant/helpers/audio.py b/music_assistant/helpers/audio.py
index 5b41c5bb..35fd2e49 100644
--- a/music_assistant/helpers/audio.py
+++ b/music_assistant/helpers/audio.py
@@ -32,9 +32,9 @@ from music_assistant_models.errors import (
     ProviderUnavailableError,
 )
 from music_assistant_models.media_items import AudioFormat
+from music_assistant_models.streamdetails import MultiPartPath
 
 from music_assistant.constants import (
-    CONF_ALLOW_AUDIO_CACHE,
     CONF_ENTRY_OUTPUT_LIMITER,
     CONF_OUTPUT_CHANNELS,
     CONF_VOLUME_NORMALIZATION,
@@ -54,7 +54,7 @@ from .dsp import filter_to_ffmpeg_params
 from .ffmpeg import FFMpeg, get_ffmpeg_stream
 from .playlists import IsHLSPlaylist, PlaylistItem, fetch_playlist, parse_m3u
 from .process import AsyncProcess, communicate
-from .util import detect_charset, has_enough_space
+from .util import detect_charset
 
 if TYPE_CHECKING:
     from music_assistant_models.config_entries import CoreConfig, PlayerConfig
@@ -74,240 +74,9 @@ HTTP_HEADERS_ICY = {**HTTP_HEADERS, "Icy-MetaData": "1"}
 
 SLOW_PROVIDERS = ("tidal", "ytmusic", "apple_music")
 
-CACHE_CATEGORY_AUDIO_CACHE: Final[int] = 99
 CACHE_CATEGORY_RESOLVED_RADIO_URL: Final[int] = 100
 CACHE_PROVIDER: Final[str] = "audio"
-CACHE_FILES_IN_USE: set[str] = set()
-
-
-class StreamCache:
-    """
-    StreamCache.
-
-    Basic class to handle caching of audio streams to a (semi) temporary file.
-    Useful in case of slow or unreliable network connections, faster seeking,
-    or when the audio stream is slow itself.
-    """
-
-    def __init__(self, mass: MusicAssistant, streamdetails: StreamDetails) -> None:
-        """Initialize the StreamCache."""
-        self.mass = mass
-        self.streamdetails = streamdetails
-        self.logger = LOGGER.getChild("cache")
-        self._cache_file: str | None = None
-        self._fetch_task: asyncio.Task[None] | None = None
-        self._subscribers: int = 0
-        self._first_part_received = asyncio.Event()
-        self._all_data_written: bool = False
-        self._stream_error: str | None = None
-        self.org_path: str | None = streamdetails.path
-        self.org_stream_type: StreamType | None = streamdetails.stream_type
-        self.org_extra_input_args: list[str] | None = streamdetails.extra_input_args
-        self.org_audio_format = streamdetails.audio_format
-        streamdetails.audio_format = AudioFormat(
-            content_type=ContentType.NUT,
-            codec_type=streamdetails.audio_format.codec_type,
-            sample_rate=streamdetails.audio_format.sample_rate,
-            bit_depth=streamdetails.audio_format.bit_depth,
-            channels=streamdetails.audio_format.channels,
-        )
-        streamdetails.path = "-"
-        streamdetails.stream_type = StreamType.CACHE
-        streamdetails.can_seek = True
-        streamdetails.allow_seek = True
-        streamdetails.extra_input_args = []
-
-    async def create(self) -> None:
-        """Create the cache file (if needed)."""
-        if self._cache_file is None:
-            if cached_cache_path := await self.mass.cache.get(
-                key=self.streamdetails.uri,
-                provider=CACHE_PROVIDER,
-                category=CACHE_CATEGORY_AUDIO_CACHE,
-            ):
-                # we have a mapping stored for this uri, prefer that
-                self._cache_file = cached_cache_path
-                assert self._cache_file is not None  # for type checking
-                if await asyncio.to_thread(os.path.exists, self._cache_file):
-                    # cache file already exists from a previous session,
-                    # we can simply use that, there is nothing to create
-                    CACHE_FILES_IN_USE.add(self._cache_file)
-                    self._all_data_written = True
-                    return
-            else:
-                # create new cache file
-                cache_id = shortuuid.random(30)
-                self._cache_file = cache_file = os.path.join(
-                    self.mass.streams.audio_cache_dir, cache_id
-                )
-                await self.mass.cache.set(
-                    key=self.streamdetails.uri,
-                    data=cache_file,
-                    provider=CACHE_PROVIDER,
-                    category=CACHE_CATEGORY_AUDIO_CACHE,
-                )
-        # mark file as in-use to prevent it being deleted
-        CACHE_FILES_IN_USE.add(self._cache_file)
-        # start fetch task if its not already running
-        if self._fetch_task is None:
-            self._fetch_task = self.mass.create_task(self._create_cache_file())
-        # wait until the first part of the file is received
-        await self._first_part_received.wait()
-        if self._stream_error:
-            # an error occurred while creating the cache file
-            # remove the cache file and raise an error
-            raise AudioError(self._stream_error)
-
-    def release(self) -> None:
-        """Release the cache file."""
-        self._subscribers -= 1
-        if self._subscribers <= 0:
-            assert self._cache_file is not None  # for type checking
-            CACHE_FILES_IN_USE.discard(self._cache_file)
-
-    async def get_audio_stream(self) -> str | AsyncGenerator[bytes, None]:
-        """
-        Get the cached audio stream.
-
-        Returns a string with the path of the cachefile if the file is ready.
-        If the file is not yet ready, it will return an async generator that will
-        stream the (intermediate) audio data from the cache file.
-        """
-        self._subscribers += 1
-        assert self._cache_file is not None  # type guard
-        # mark file as in-use to prevent it being deleted
-        CACHE_FILES_IN_USE.add(self._cache_file)
-
-        async def _stream_from_cache() -> AsyncGenerator[bytes, None]:
-            chunksize = get_chunksize(self.streamdetails.audio_format, 1)
-            wait_loops = 0
-            assert self._cache_file is not None  # type guard
-            async with aiofiles.open(self._cache_file, "rb") as file:
-                while wait_loops < 2000:
-                    chunk = await file.read(chunksize)
-                    if chunk:
-                        yield chunk
-                        await asyncio.sleep(0)  # yield to eventloop
-                        del chunk
-                    elif self._all_data_written:
-                        # reached EOF
-                        break
-                    else:
-                        # data is not yet available, wait a bit
-                        await asyncio.sleep(0.05)
-                        # prevent an infinite loop in case of an error
-                        wait_loops += 1
-
-        if self._all_data_written:
-            # cache file is ready
-            return self._cache_file
-
-        # cache file does not exist at all (or is still being written)
-        await self.create()
-        return _stream_from_cache()
-
-    async def _create_cache_file(self) -> None:
-        time_start = time.time()
-        self.logger.debug("Creating audio cache for %s", self.streamdetails.uri)
-        assert self._cache_file is not None  # for type checking
-        CACHE_FILES_IN_USE.add(self._cache_file)
-        self._first_part_received.clear()
-        self._all_data_written = False
-        extra_input_args = ["-y", *(self.org_extra_input_args or [])]
-        audio_source: AsyncGenerator[bytes, None] | str | int
-        if self.org_stream_type == StreamType.CUSTOM:
-            provider = self.mass.get_provider(self.streamdetails.provider)
-            if TYPE_CHECKING:  # avoid circular import
-                assert isinstance(provider, MusicProvider)
-            audio_source = provider.get_audio_stream(
-                self.streamdetails,
-            )
-        elif self.org_stream_type == StreamType.ICY:
-            raise NotImplementedError("Caching of this streamtype is not supported!")
-        elif self.org_stream_type == StreamType.HLS:
-            if self.streamdetails.media_type == MediaType.RADIO:
-                raise NotImplementedError("Caching of this streamtype is not supported!")
-            assert self.org_path is not None  # for type checking
-            substream = await get_hls_substream(self.mass, self.org_path)
-            audio_source = substream.path
-        elif self.org_stream_type == StreamType.ENCRYPTED_HTTP:
-            assert self.org_path is not None  # for type checking
-            assert self.streamdetails.decryption_key is not None  # for type checking
-            audio_source = self.org_path
-            extra_input_args += ["-decryption_key", self.streamdetails.decryption_key]
-        elif self.org_stream_type == StreamType.MULTI_FILE:
-            audio_source = get_multi_file_stream(self.mass, self.streamdetails)
-        else:
-            assert self.org_path is not None  # for type checking
-            audio_source = self.org_path
-
-        # we always use ffmpeg to fetch the original audio source
-        # this may feel a bit redundant, but it's the most reliable way to fetch the audio
-        # because ffmpeg has all logic to handle different audio formats, codecs, etc.
-        # and it also accounts for complicated cases such as encrypted streams or
-        # m4a/mp4 streams with the moov atom at the end of the file.
-        # ffmpeg will produce a lossless copy of the original codec.
-        ffmpeg_proc = FFMpeg(
-            audio_input=audio_source,
-            input_format=self.org_audio_format,
-            output_format=self.streamdetails.audio_format,
-            extra_input_args=extra_input_args,
-            audio_output=self._cache_file,
-            collect_log_history=True,
-        )
-        try:
-            await ffmpeg_proc.start()
-            # wait until the first data is written to the cache file
-            while ffmpeg_proc.returncode is None:
-                await asyncio.sleep(0.1)
-                if not await asyncio.to_thread(os.path.exists, self._cache_file):
-                    continue
-                if await asyncio.to_thread(os.path.getsize, self._cache_file) > 64000:
-                    break
-
-            # set 'first part received' event to signal that the first part of the file is ready
-            # this is useful for the get_audio_stream method to know when it can start streaming
-            # we do guard for the returncode here, because if ffmpeg exited abnormally, we should
-            # not signal that the first part is ready
-            if ffmpeg_proc.returncode in (None, 0):
-                self._first_part_received.set()
-                self.logger.debug(
-                    "First part received for %s after %.2fs",
-                    self.streamdetails.uri,
-                    time.time() - time_start,
-                )
-            # wait until ffmpeg is done
-            await ffmpeg_proc.wait()
-
-            # raise an error if ffmpeg exited with a non-zero code
-            if ffmpeg_proc.returncode != 0:
-                ffmpeg_proc.logger.warning("\n".join(ffmpeg_proc.log_history))
-                raise AudioError(f"FFMpeg error {ffmpeg_proc.returncode}")
-
-            # set 'all data written' event to signal that the entire file is ready
-            self._all_data_written = True
-            self.logger.debug(
-                "Writing all data for %s done in %.2fs",
-                self.streamdetails.uri,
-                time.time() - time_start,
-            )
-        except BaseException as err:
-            self.logger.error("Error while creating cache for %s: %s", self.streamdetails.uri, err)
-            # make sure that the (corrupted/incomplete) cache file is removed
-            await self._remove_cache_file()
-            # unblock the waiting tasks by setting the event
-            # this will allow the tasks to continue and handle the error
-            self._stream_error = str(err) or err.__qualname__  # type: ignore [attr-defined]
-            self._first_part_received.set()
-        finally:
-            await ffmpeg_proc.close()
-
-    async def _remove_cache_file(self) -> None:
-        self._first_part_received.clear()
-        self._all_data_written = False
-        self._fetch_task = None
-        assert self._cache_file is not None  # for type checking
-        await remove_file(self._cache_file)
+STREAMDETAILS_EXPIRATION: Final[int] = 60 * 15  # 15 minutes
 
 
 async def crossfade_pcm_parts(
@@ -552,7 +321,6 @@ async def get_stream_details(
     This is called just-in-time when a PlayerQueue wants a MediaItem to be played.
     Do not try to request streamdetails too much in advance as this is expiring data.
     """
-    BYPASS_THROTTLER.set(True)
     time_start = time.time()
     LOGGER.debug("Getting streamdetails for %s", queue_item.uri)
     if seek_position and (queue_item.media_type == MediaType.RADIO or not queue_item.duration):
@@ -565,9 +333,12 @@ async def get_stream_details(
         raise MediaNotFoundError(
             f"Unable to retrieve streamdetails for {queue_item.name} ({queue_item.uri})"
         )
-    if queue_item.streamdetails and (utc() - queue_item.streamdetails.created_at).seconds < 1800:
+    if (
+        queue_item.streamdetails
+        and (utc() - queue_item.streamdetails.created_at).seconds < STREAMDETAILS_EXPIRATION
+    ):
         # already got a fresh/unused (or cached) streamdetails
-        # we assume that the streamdetails are valid for max 30 minutes
+        # we assume that the streamdetails are valid for max STREAMDETAILS_EXPIRATION seconds
         streamdetails = queue_item.streamdetails
     else:
         # retrieve streamdetails from provider
@@ -589,6 +360,7 @@ async def get_stream_details(
                 continue  # provider not available ?
             # get streamdetails from provider
             try:
+                BYPASS_THROTTLER.set(True)
                 streamdetails = await music_prov.get_stream_details(
                     prov_media.item_id, media_item.media_type
                 )
@@ -596,6 +368,8 @@ async def get_stream_details(
                 LOGGER.warning(str(err))
             else:
                 break
+            finally:
+                BYPASS_THROTTLER.set(False)
         else:
             msg = f"Unable to retrieve streamdetails for {queue_item.name} ({queue_item.uri})"
             raise MediaNotFoundError(msg)
@@ -605,7 +379,7 @@ async def get_stream_details(
             streamdetails.stream_type in (StreamType.ICY, StreamType.HLS, StreamType.HTTP)
             and streamdetails.media_type == MediaType.RADIO
         ):
-            assert streamdetails.path is not None  # for type checking
+            assert isinstance(streamdetails.path, str)  # for type checking
             resolved_url, stream_type = await resolve_radio_stream(mass, streamdetails.path)
             streamdetails.path = resolved_url
             streamdetails.stream_type = stream_type
@@ -644,121 +418,43 @@ async def get_stream_details(
         queue_item.uri,
         int((time.time() - time_start) * 1000),
     )
-
-    # determine if we may use caching for the audio stream
-    if streamdetails.enable_cache is None:
-        streamdetails.enable_cache = await _is_cache_allowed(mass, streamdetails)
-
-    # handle temporary cache support of audio stream
-    if streamdetails.enable_cache:
-        if streamdetails.cache is None:
-            streamdetails.cache = StreamCache(mass, streamdetails)
-        else:
-            streamdetails.cache = cast("StreamCache", streamdetails.cache)
-        # create cache (if needed) and wait until the cache is available
-        await streamdetails.cache.create()
-        LOGGER.debug(
-            "streamdetails cache ready for %s in %s milliseconds",
-            queue_item.uri,
-            int((time.time() - time_start) * 1000),
-        )
-
     return streamdetails
 
 
-async def _is_cache_allowed(mass: MusicAssistant, streamdetails: StreamDetails) -> bool:
-    """Check if caching is allowed for the given streamdetails."""
-    if streamdetails.media_type not in (
-        MediaType.TRACK,
-        MediaType.AUDIOBOOK,
-        MediaType.PODCAST_EPISODE,
-    ):
-        return False
-    if streamdetails.stream_type in (StreamType.ICY, StreamType.LOCAL_FILE, StreamType.UNKNOWN):
-        return False
-    if streamdetails.stream_type == StreamType.LOCAL_FILE:
-        # no need to cache local files
-        return False
-    allow_cache = mass.config.get_raw_core_config_value(
-        "streams", CONF_ALLOW_AUDIO_CACHE, mass.streams.allow_cache_default
-    )
-    if allow_cache == "disabled":
-        return False
-    if not await has_enough_space(mass.streams.audio_cache_dir, 5):
-        return False
-    if allow_cache == "always":
-        return True
-    # auto mode
-    if streamdetails.stream_type == StreamType.ENCRYPTED_HTTP:
-        # always prefer cache for encrypted streams
-        return True
-    if not streamdetails.duration:
-        # we can't determine filesize without duration so play it safe and dont allow cache
-        return False
-    estimated_filesize = get_chunksize(streamdetails.audio_format, streamdetails.duration)
-    if streamdetails.stream_type == StreamType.MULTI_FILE:
-        # prefer cache to speedup multi-file streams
-        # (if total filesize smaller than 2GB)
-        max_filesize = 2 * 1024 * 1024 * 1024
-    elif streamdetails.stream_type == StreamType.CUSTOM:
-        # prefer cache for custom streams (to speedup seeking)
-        max_filesize = 250 * 1024 * 1024  # 250MB
-    elif streamdetails.stream_type == StreamType.HLS:
-        # prefer cache for HLS streams (to speedup seeking)
-        max_filesize = 250 * 1024 * 1024  # 250MB
-    elif streamdetails.media_type in (
-        MediaType.AUDIOBOOK,
-        MediaType.PODCAST_EPISODE,
-    ):
-        # prefer cache for audiobooks and episodes (to speedup seeking)
-        max_filesize = 2 * 1024 * 1024 * 1024  # 2GB
-    elif streamdetails.provider in SLOW_PROVIDERS:
-        # prefer cache for slow providers
-        max_filesize = 2 * 1024 * 1024 * 1024  # 2GB
-    else:
-        max_filesize = 50 * 1024 * 1024
-
-    return estimated_filesize < max_filesize
-
-
 async def get_media_stream(
     mass: MusicAssistant,
     streamdetails: StreamDetails,
-    pcm_format: AudioFormat,
+    output_format: AudioFormat,
     filter_params: list[str] | None = None,
 ) -> AsyncGenerator[bytes, None]:
-    """Get PCM audio stream for given media details."""
+    """Get audio stream for given media details."""
     logger = LOGGER.getChild("media_stream")
     logger.log(VERBOSE_LOG_LEVEL, "Starting media stream for %s", streamdetails.uri)
     extra_input_args = streamdetails.extra_input_args or []
-    strip_silence_begin = streamdetails.strip_silence_begin
-    strip_silence_end = streamdetails.strip_silence_end
     if filter_params is None:
         filter_params = []
     if streamdetails.fade_in:
         filter_params.append("afade=type=in:start_time=0:duration=3")
-        strip_silence_begin = False
 
+    seek_position = streamdetails.seek_position
     # work out audio source for these streamdetails
+    audio_source: str | AsyncGenerator[bytes, None]
     stream_type = streamdetails.stream_type
-    if stream_type == StreamType.CACHE:
-        cache = cast("StreamCache", streamdetails.cache)
-        audio_source = await cache.get_audio_stream()
-    elif stream_type == StreamType.MULTI_FILE:
-        audio_source = get_multi_file_stream(mass, streamdetails)
-    elif stream_type == StreamType.CUSTOM:
+    if stream_type == StreamType.CUSTOM:
         music_prov = mass.get_provider(streamdetails.provider)
         if TYPE_CHECKING:  # avoid circular import
             assert isinstance(music_prov, MusicProvider)
         audio_source = music_prov.get_audio_stream(
             streamdetails,
-            seek_position=streamdetails.seek_position if streamdetails.can_seek else 0,
+            seek_position=seek_position if streamdetails.can_seek else 0,
         )
+        seek_position = 0 if streamdetails.can_seek else seek_position
     elif stream_type == StreamType.ICY:
-        assert streamdetails.path is not None  # for type checking
+        assert isinstance(streamdetails.path, str)  # for type checking
         audio_source = get_icy_radio_stream(mass, streamdetails.path, streamdetails)
+        seek_position = 0  # seeking not possible on radio streams
     elif stream_type == StreamType.HLS:
-        assert streamdetails.path is not None  # for type checking
+        assert isinstance(streamdetails.path, str)  # for type checking
         substream = await get_hls_substream(mass, streamdetails.path)
         audio_source = substream.path
         if streamdetails.media_type == MediaType.RADIO:
@@ -766,35 +462,32 @@ async def get_media_stream(
             # with ffmpeg, where they just stop after some minutes,
             # so we tell ffmpeg to loop around in this case.
             extra_input_args += ["-stream_loop", "-1", "-re"]
-    elif stream_type == StreamType.ENCRYPTED_HTTP:
-        assert streamdetails.path is not None  # for type checking
-        assert streamdetails.decryption_key is not None  # for type checking
-        audio_source = streamdetails.path
-        extra_input_args += ["-decryption_key", streamdetails.decryption_key]
     else:
-        assert streamdetails.path is not None  # for type checking
-        audio_source = streamdetails.path
+        # all other stream types (HTTP, FILE, etc)
+        if stream_type == StreamType.ENCRYPTED_HTTP:
+            assert streamdetails.decryption_key is not None  # for type checking
+            extra_input_args += ["-decryption_key", streamdetails.decryption_key]
+        if isinstance(streamdetails.path, list):
+            # multi part stream
+            audio_source = get_multi_file_stream(mass, streamdetails, seek_position)
+            seek_position = 0  # handled by get_multi_file_stream
+        else:
+            # regular single file/url stream
+            assert isinstance(streamdetails.path, str)  # for type checking
+            audio_source = streamdetails.path
 
     # handle seek support
-    if (
-        streamdetails.seek_position
-        and streamdetails.duration
-        and streamdetails.allow_seek
-        # allow seeking for custom streams,
-        # but only for custom streams that can't seek theirselves
-        and not (stream_type == StreamType.CUSTOM and streamdetails.can_seek)
-    ):
-        extra_input_args += ["-ss", str(int(streamdetails.seek_position))]
+    if seek_position and streamdetails.duration and streamdetails.allow_seek:
+        extra_input_args += ["-ss", str(int(seek_position))]
 
     bytes_sent = 0
-    chunk_number = 0
-    buffer: bytes = b""
     finished = False
     cancelled = False
+    first_chunk_received = False
     ffmpeg_proc = FFMpeg(
         audio_input=audio_source,
         input_format=streamdetails.audio_format,
-        output_format=pcm_format,
+        output_format=output_format,
         filter_params=filter_params,
         extra_input_args=extra_input_args,
         collect_log_history=True,
@@ -813,77 +506,27 @@ async def get_media_stream(
             streamdetails.uri,
             streamdetails.stream_type,
             streamdetails.volume_normalization_mode,
-            pcm_format.content_type.value,
+            output_format.content_type.value,
             ffmpeg_proc.proc.pid,
         )
-        # use 1 second chunks
-        chunk_size = pcm_format.pcm_sample_size
+        stream_start = mass.loop.time()
+
+        chunk_size = get_chunksize(output_format, 1)
         async for chunk in ffmpeg_proc.iter_chunked(chunk_size):
-            if chunk_number == 1:
+            if not first_chunk_received:
                 # At this point ffmpeg has started and should now know the codec used
                 # for encoding the audio.
+                first_chunk_received = True
                 streamdetails.audio_format.codec_type = ffmpeg_proc.input_format.codec_type
-
-            # for non-tracks we just yield all chunks directly
-            if streamdetails.media_type != MediaType.TRACK:
-                yield chunk
-                bytes_sent += len(chunk)
-                continue
-
-            chunk_number += 1
-            # determine buffer size dynamically
-            if chunk_number < 5 and strip_silence_begin:
-                req_buffer_size = int(pcm_format.pcm_sample_size * 5)
-            elif chunk_number > 240 and strip_silence_end:
-                req_buffer_size = int(pcm_format.pcm_sample_size * 10)
-            elif chunk_number > 120 and strip_silence_end:
-                req_buffer_size = int(pcm_format.pcm_sample_size * 8)
-            elif chunk_number > 60:
-                req_buffer_size = int(pcm_format.pcm_sample_size * 6)
-            elif chunk_number > 20 and strip_silence_end:
-                req_buffer_size = int(pcm_format.pcm_sample_size * 4)
-            else:
-                req_buffer_size = pcm_format.pcm_sample_size * 2
-
-            # always append to buffer
-            buffer += chunk
-            del chunk
-
-            if len(buffer) < req_buffer_size:
-                # buffer is not full enough, move on
-                continue
-
-            if strip_silence_begin:
-                # strip silence from begin of audio
-                strip_silence_begin = False
-                chunk = await strip_silence(  # noqa: PLW2901
-                    mass, buffer, pcm_format=pcm_format
+                logger.debug(
+                    "First chunk received after %s seconds",
+                    mass.loop.time() - stream_start,
                 )
-                bytes_sent += len(chunk)
-                yield chunk
-                buffer = b""
-                continue
-
-            #### OTHER: enough data in buffer, feed to output
-            while len(buffer) > req_buffer_size:
-                yield buffer[: pcm_format.pcm_sample_size]
-                bytes_sent += pcm_format.pcm_sample_size
-                buffer = buffer[pcm_format.pcm_sample_size :]
+            yield chunk
+            bytes_sent += len(chunk)
 
         # end of audio/track reached
         logger.log(VERBOSE_LOG_LEVEL, "End of stream reached.")
-        if strip_silence_end and buffer:
-            # strip silence from end of audio
-            buffer = await strip_silence(
-                mass,
-                buffer,
-                pcm_format=pcm_format,
-                reverse=True,
-            )
-        # send remaining bytes in buffer
-        bytes_sent += len(buffer)
-        yield buffer
-        del buffer
         # wait until stderr also completed reading
         await ffmpeg_proc.wait_with_timeout(5)
         if bytes_sent == 0:
@@ -905,7 +548,16 @@ async def get_media_stream(
         # always ensure close is called which also handles all cleanup
         await ffmpeg_proc.close()
         # try to determine how many seconds we've streamed
-        seconds_streamed = bytes_sent / pcm_format.pcm_sample_size if bytes_sent else 0
+        if output_format.content_type.is_pcm():
+            # for pcm output we can calculate this easily
+            seconds_streamed = bytes_sent / output_format.pcm_sample_size if bytes_sent else 0
+            streamdetails.seconds_streamed = seconds_streamed
+            # store accurate duration
+            if finished and not streamdetails.seek_position and seconds_streamed:
+                streamdetails.duration = int(seconds_streamed)
+        else:
+            # this is a less accurate estimate for compressed audio
+            seconds_streamed = bytes_sent / get_chunksize(output_format, 1)
         logger.debug(
             "stream %s (with code %s) for %s - seconds streamed: %s",
             "cancelled" if cancelled else "finished" if finished else "aborted",
@@ -913,15 +565,6 @@ async def get_media_stream(
             streamdetails.uri,
             seconds_streamed,
         )
-        streamdetails.seconds_streamed = seconds_streamed
-        # store accurate duration
-        if finished and not streamdetails.seek_position and seconds_streamed:
-            streamdetails.duration = int(seconds_streamed)
-
-        # release cache if needed
-        if cache := streamdetails.cache:
-            cache = cast("StreamCache", streamdetails.cache)
-            cache.release()
 
         # parse loudnorm data if we have that collected (and enabled)
         if (
@@ -948,14 +591,15 @@ async def get_media_stream(
                         media_type=streamdetails.media_type,
                     )
                 )
-        elif (
+        # schedule loudness analysis if needed
+        if (
             streamdetails.loudness is None
             and streamdetails.volume_normalization_mode
             not in (
                 VolumeNormalizationMode.DISABLED,
                 VolumeNormalizationMode.FIXED_GAIN,
             )
-            and (finished or (seconds_streamed >= 30))
+            and (finished or (seconds_streamed >= 300))
         ):
             # dynamic mode not allowed and no measurement known, we need to analyze the audio
             # add background task to start analyzing the audio
@@ -1040,7 +684,9 @@ async def resolve_radio_stream(mass: MusicAssistant, url: str) -> tuple[str, Str
     if cache := await mass.cache.get(
         key=url, provider=CACHE_PROVIDER, category=CACHE_CATEGORY_RESOLVED_RADIO_URL
     ):
-        return cast("tuple[str, StreamType]", cache)
+        if TYPE_CHECKING:  # for type checking
+            cache = cast("tuple[str, str]", cache)
+        return (cache[0], StreamType(cache[1]))
     stream_type = StreamType.HTTP
     resolved_url = url
     timeout = ClientTimeout(total=0, connect=10, sock_read=5)
@@ -1309,7 +955,20 @@ async def get_multi_file_stream(
     Arguments:
     seek_position: The position to seek to in seconds
     """
-    files_list: list[str] = streamdetails.data
+    files_list: list[str] = []
+    if not isinstance(streamdetails.path, list):
+        raise InvalidDataError("Multi-file streamdetails requires a list of MultiPartPath")
+    skipped_duration = 0.0
+    for part in streamdetails.path:
+        if not isinstance(part, MultiPartPath):
+            raise InvalidDataError("Multi-file streamdetails requires a list of MultiPartPath")
+        if seek_position and part.duration and (skipped_duration + part.duration) < seek_position:
+            skipped_duration += part.duration
+            continue
+        files_list.append(part.path)
+    if seek_position:
+        seek_position -= int(skipped_duration)
+
     # concat input files
     temp_file = f"/tmp/{shortuuid.random(20)}.txt"  # noqa: S108
     async with aiofiles.open(temp_file, "w") as f:
@@ -1355,18 +1014,11 @@ async def get_preview_stream(
     if TYPE_CHECKING:  # avoid circular import
         assert isinstance(music_prov, MusicProvider)
     streamdetails = await music_prov.get_stream_details(item_id, media_type)
-
-    audio_input: AsyncGenerator[bytes, None] | str
-    if streamdetails.stream_type == StreamType.CUSTOM:
-        audio_input = music_prov.get_audio_stream(streamdetails, 30)
-    else:
-        assert streamdetails.path is not None  # for type checking
-        audio_input = streamdetails.path
-    async for chunk in get_ffmpeg_stream(
-        audio_input=audio_input,
-        input_format=streamdetails.audio_format,
+    streamdetails.extra_input_args += ["-t", "30"]  # cut after 30 seconds
+    async for chunk in get_media_stream(
+        mass=mass,
+        streamdetails=streamdetails,
         output_format=AudioFormat(content_type=ContentType.AAC),
-        extra_input_args=["-to", "30"],
     ):
         yield chunk
 
@@ -1435,7 +1087,7 @@ async def resample_pcm_audio(
 
 def get_chunksize(
     fmt: AudioFormat,
-    seconds: int = 1,
+    seconds: float = 1,
 ) -> int:
     """Get a default chunk/file size for given contenttype in bytes."""
     pcm_size = int(fmt.sample_rate * (fmt.bit_depth / 8) * fmt.channels * seconds)
@@ -1614,30 +1266,33 @@ async def analyze_loudness(
         "-t",
         "600",
     ]
-    if streamdetails.stream_type == StreamType.CACHE:
-        cache = cast("StreamCache", streamdetails.cache)
-        audio_source = await cache.get_audio_stream()
-    elif streamdetails.stream_type == StreamType.MULTI_FILE:
-        audio_source = get_multi_file_stream(mass, streamdetails)
-    elif streamdetails.stream_type == StreamType.CUSTOM:
+    # work out audio source for these streamdetails
+    stream_type = streamdetails.stream_type
+    audio_source: str | AsyncGenerator[bytes, None]
+    if stream_type == StreamType.CUSTOM:
         music_prov = mass.get_provider(streamdetails.provider)
         if TYPE_CHECKING:  # avoid circular import
             assert isinstance(music_prov, MusicProvider)
-        audio_source = music_prov.get_audio_stream(
-            streamdetails,
-        )
-    elif streamdetails.stream_type == StreamType.HLS:
-        assert streamdetails.path is not None  # for type checking
+        audio_source = music_prov.get_audio_stream(streamdetails)
+    elif stream_type == StreamType.ICY:
+        assert isinstance(streamdetails.path, str)  # for type checking
+        audio_source = get_icy_radio_stream(mass, streamdetails.path, streamdetails)
+    elif stream_type == StreamType.HLS:
+        assert isinstance(streamdetails.path, str)  # for type checking
         substream = await get_hls_substream(mass, streamdetails.path)
         audio_source = substream.path
-    elif streamdetails.stream_type == StreamType.ENCRYPTED_HTTP:
-        assert streamdetails.path is not None  # for type checking
-        assert streamdetails.decryption_key is not None  # for type checking
-        audio_source = streamdetails.path
-        extra_input_args += ["-decryption_key", streamdetails.decryption_key]
     else:
-        assert streamdetails.path is not None  # for type checking
-        audio_source = streamdetails.path
+        # all other stream types (HTTP, FILE, etc)
+        if stream_type == StreamType.ENCRYPTED_HTTP:
+            assert streamdetails.decryption_key is not None  # for type checking
+            extra_input_args += ["-decryption_key", streamdetails.decryption_key]
+        if isinstance(streamdetails.path, list):
+            # multi part stream - just use a single file for the measurement
+            audio_source = streamdetails.path[1].path
+        else:
+            # regular single file/url stream
+            assert isinstance(streamdetails.path, str)  # for type checking
+            audio_source = streamdetails.path
 
     # calculate BS.1770 R128 integrated loudness with ffmpeg
     async with FFMpeg(
@@ -1677,11 +1332,6 @@ async def analyze_loudness(
                 streamdetails.uri,
                 loudness,
             )
-        finally:
-            # release cache if needed
-            if cache := streamdetails.cache:
-                cache = cast("StreamCache", streamdetails.cache)
-                cache.release()
 
 
 def _get_normalization_mode(
diff --git a/music_assistant/helpers/ffmpeg.py b/music_assistant/helpers/ffmpeg.py
index d8c7e34d..cfc4ffcc 100644
--- a/music_assistant/helpers/ffmpeg.py
+++ b/music_assistant/helpers/ffmpeg.py
@@ -255,6 +255,10 @@ def get_ffmpeg_args(  # noqa: PLR0915
         "-ignore_unknown",
         "-protocol_whitelist",
         "file,hls,http,https,tcp,tls,crypto,pipe,data,fd,rtp,udp,concat",
+        "-probesize",
+        "8096",
+        "-analyzeduration",
+        "500000",  # 1 seconds should be enough to detect the format
     ]
     # collect input args
     if "-f" in extra_input_args:
diff --git a/music_assistant/helpers/playlists.py b/music_assistant/helpers/playlists.py
index 837d6f47..1fa645c6 100644
--- a/music_assistant/helpers/playlists.py
+++ b/music_assistant/helpers/playlists.py
@@ -116,7 +116,7 @@ def parse_pls(pls_data: str) -> list[PlaylistItem]:
     except configparser.Error as err:
         raise InvalidDataError("Can't parse playlist") from err
 
-    if "playlist" not in pls_parser or pls_parser["playlist"].getint("Version") != 2:
+    if "playlist" not in pls_parser:
         raise InvalidDataError("Invalid playlist")
 
     try:
diff --git a/music_assistant/helpers/smart_fades.py b/music_assistant/helpers/smart_fades.py
index 13ae8ed1..32b76dfa 100644
--- a/music_assistant/helpers/smart_fades.py
+++ b/music_assistant/helpers/smart_fades.py
@@ -18,7 +18,7 @@ import numpy.typing as npt
 import shortuuid
 
 from music_assistant.constants import VERBOSE_LOG_LEVEL
-from music_assistant.helpers.audio import crossfade_pcm_parts
+from music_assistant.helpers.audio import crossfade_pcm_parts, strip_silence
 from music_assistant.helpers.process import communicate
 from music_assistant.helpers.util import remove_file
 from music_assistant.models.smart_fades import (
@@ -258,6 +258,21 @@ class SmartFadesMixer:
             # Note that this should not happen since we check this before calling mix()
             # but just to be sure...
             return fade_out_part + fade_in_part
+
+        # strip silence from end of audio of fade_out_part
+        fade_out_part = await strip_silence(
+            self.mass,
+            fade_out_part,
+            pcm_format=pcm_format,
+            reverse=True,
+        )
+        # strip silence from begin of audio of fade_in_part
+        fade_in_part = await strip_silence(
+            self.mass,
+            fade_in_part,
+            pcm_format=pcm_format,
+            reverse=False,
+        )
         if mode == SmartFadesMode.STANDARD_CROSSFADE:
             # crossfade with standard crossfade
             return await self._default_crossfade(
diff --git a/music_assistant/providers/audiobookshelf/__init__.py b/music_assistant/providers/audiobookshelf/__init__.py
index 0a9bd084..5a67e908 100644
--- a/music_assistant/providers/audiobookshelf/__init__.py
+++ b/music_assistant/providers/audiobookshelf/__init__.py
@@ -47,7 +47,7 @@ from music_assistant_models.enums import (
     ProviderFeature,
     StreamType,
 )
-from music_assistant_models.errors import AudioError, LoginFailed, MediaNotFoundError
+from music_assistant_models.errors import LoginFailed, MediaNotFoundError
 from music_assistant_models.media_items import (
     Audiobook,
     AudioFormat,
@@ -58,10 +58,9 @@ from music_assistant_models.media_items import (
     UniqueList,
 )
 from music_assistant_models.media_items.media_item import RecommendationFolder
-from music_assistant_models.streamdetails import StreamDetails
+from music_assistant_models.streamdetails import MultiPartPath, StreamDetails
 
 from music_assistant.controllers.cache import use_cache
-from music_assistant.helpers.audio import get_multi_file_stream
 from music_assistant.models.music_provider import MusicProvider
 from music_assistant.providers.audiobookshelf.parsers import (
     parse_audiobook,
@@ -88,7 +87,6 @@ from .constants import (
 from .helpers import LibrariesHelper, LibraryHelper, ProgressGuard
 
 if TYPE_CHECKING:
-    from aioaudiobookshelf.schema.audio import AudioTrack as AbsAudioTrack
     from aioaudiobookshelf.schema.events_socket import LibraryItemRemoved
     from aioaudiobookshelf.schema.media_progress import MediaProgress
     from aioaudiobookshelf.schema.user import User
@@ -543,6 +541,8 @@ for more details.
 
     async def get_stream_details(self, item_id: str, media_type: MediaType) -> StreamDetails:
         """Get stream of item."""
+        # ensure we have a valid token
+        await self.reauthenticate()
         if media_type == MediaType.PODCAST_EPISODE:
             return await self._get_stream_details_episode(item_id)
         elif media_type == MediaType.AUDIOBOOK:
@@ -566,116 +566,24 @@ for more details.
         if abs_audiobook.media.tracks[0].metadata is not None:
             content_type = ContentType.try_parse(abs_audiobook.media.tracks[0].metadata.ext)
 
+        file_parts: list[MultiPartPath] = []
+        base_url = str(self.config.get_value(CONF_URL))
+        for track in tracks:
+            stream_url = f"{base_url}{track.content_url}?token={self._client.token}"
+            file_parts.append(MultiPartPath(path=stream_url, duration=track.duration))
+
         return StreamDetails(
             provider=self.lookup_key,
             item_id=abs_audiobook.id_,
             audio_format=AudioFormat(content_type=content_type),
             media_type=MediaType.AUDIOBOOK,
-            stream_type=StreamType.CUSTOM,
+            stream_type=StreamType.HTTP,
             duration=int(abs_audiobook.media.duration),
             data=tracks,
             can_seek=True,
             allow_seek=True,
         )
 
-    def _get_track_from_position(
-        self, tracks: list[AbsAudioTrack], seek_position: int
-    ) -> tuple[list[AbsAudioTrack] | None, int]:
-        """Get the remaining tracks list from a timestamp.
-
-        Arguments:
-        tracks: The list of Audiobookshelf tracks
-        seek_position: The seeking position in seconds of the tracklist
-
-        Returns:
-            In a tuple, A list of audiobookshelf tracks, starting with the one at the requested seek
-        position and the position in seconds to seek to in the first track.
-            A tuple of None and 0 if the track wasn't found
-        """
-        for i, track in enumerate(tracks):
-            offset = int(track.start_offset)
-            duration = int(track.duration)
-            if offset + duration < seek_position:
-                continue
-
-            position = int(seek_position) - offset
-
-            # Seeking in some tracks is inaccurate, making the seek to a chapter land on the end of
-            # the previous track. If we're within 2 second of the end, skip the current track
-            if position + 2 >= duration:
-                self.logger.debug(
-                    f"Skipping {track.title} due to seek position being at the end: {position}"
-                )
-                continue
-
-            position = max(position, 0)
-
-            return tracks[i:], position
-        return None, 0
-
-    async def get_audio_stream(
-        self, streamdetails: StreamDetails, seek_position: int = 0
-    ) -> AsyncGenerator[bytes, None]:
-        """Retrieve the audio track at the requested position.
-
-        Arguments:
-        streamdetails: The stream to be used
-        seek_position: The seeking position in seconds
-        """
-
-        async def _get_audio_stream() -> AsyncGenerator[bytes, None]:
-            tracks, position = self._get_track_from_position(streamdetails.data, seek_position)
-            if not tracks:
-                raise MediaNotFoundError(f"Track not found at seek position {seek_position}.")
-
-            self.logger.debug(
-                f"Skipped {len(streamdetails.data) - len(tracks)} tracks"
-                " while seeking to position {seek_position}."
-            )
-            base_url = str(self.config.get_value(CONF_URL))
-            track_urls = []
-            for track in tracks:
-                stream_url = f"{base_url}{track.content_url}?token={self._client.token}"
-                track_urls.append(stream_url)
-
-            async for chunk in get_multi_file_stream(
-                mass=self.mass,
-                streamdetails=StreamDetails(
-                    provider=self.lookup_key,
-                    item_id=streamdetails.item_id,
-                    audio_format=streamdetails.audio_format,
-                    media_type=MediaType.AUDIOBOOK,
-                    stream_type=StreamType.MULTI_FILE,
-                    duration=streamdetails.duration,
-                    data=track_urls,
-                    can_seek=True,
-                    allow_seek=True,
-                ),
-                seek_position=position,
-                raise_ffmpeg_exception=True,
-            ):
-                yield chunk
-
-        # Should our token expire, we try to refresh them and continue streaming once.
-        _refreshed = False
-        while True:
-            try:
-                async for chunk in _get_audio_stream():
-                    _refreshed = False
-                    yield chunk
-                break
-            except AudioError as err:
-                if not _refreshed:
-                    self.logger.debug("FFmpeg raised an error. Trying to refresh token.")
-                    try:
-                        await self._client.session_config.refresh()
-                    except RefreshTokenExpiredError:
-                        await self.reauthenticate()
-                    _refreshed = True
-                else:
-                    self.logger.error(err)
-                    break
-
     async def _get_stream_details_episode(self, podcast_id: str) -> StreamDetails:
         """Streamdetails of a podcast episode.
 
@@ -695,6 +603,8 @@ for more details.
         content_type = ContentType.UNKNOWN
         if abs_episode.audio_track.metadata is not None:
             content_type = ContentType.try_parse(abs_episode.audio_track.metadata.ext)
+        base_url = str(self.config.get_value(CONF_URL))
+        stream_url = f"{base_url}{abs_episode.audio_track.content_url}?token={self._client.token}"
         return StreamDetails(
             provider=self.lookup_key,
             item_id=podcast_id,
@@ -702,10 +612,10 @@ for more details.
                 content_type=content_type,
             ),
             media_type=MediaType.PODCAST_EPISODE,
-            stream_type=StreamType.CUSTOM,
+            stream_type=StreamType.HTTP,
             can_seek=True,
             allow_seek=True,
-            data=[abs_episode.audio_track],
+            path=stream_url,
         )
 
     @handle_refresh_token
diff --git a/music_assistant/providers/builtin_player/player.py b/music_assistant/providers/builtin_player/player.py
index ee35ff18..92548e7e 100644
--- a/music_assistant/providers/builtin_player/player.py
+++ b/music_assistant/providers/builtin_player/player.py
@@ -278,7 +278,6 @@ class BuiltinPlayer(Player):
             bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
             channels=DEFAULT_PCM_FORMAT.channels,
         )
-
         async for chunk in get_ffmpeg_stream(
             audio_input=self.mass.streams.get_queue_flow_stream(
                 queue=queue,
diff --git a/music_assistant/providers/filesystem_local/__init__.py b/music_assistant/providers/filesystem_local/__init__.py
index b116c8e7..557ce348 100644
--- a/music_assistant/providers/filesystem_local/__init__.py
+++ b/music_assistant/providers/filesystem_local/__init__.py
@@ -45,7 +45,7 @@ from music_assistant_models.media_items import (
     UniqueList,
     is_track,
 )
-from music_assistant_models.streamdetails import StreamDetails
+from music_assistant_models.streamdetails import MultiPartPath, StreamDetails
 
 from music_assistant.constants import (
     CONF_PATH,
@@ -1705,9 +1705,12 @@ class LocalFileSystemProvider(MusicProvider):
                 item_id=item_id,
                 audio_format=prov_mapping.audio_format,
                 media_type=MediaType.AUDIOBOOK,
-                stream_type=StreamType.MULTI_FILE,
+                stream_type=StreamType.LOCAL_FILE,
                 duration=duration,
-                data=[self.get_absolute_path(x[0]) for x in file_based_chapters],
+                path=[
+                    MultiPartPath(path=self.get_absolute_path(path), duration=duration)
+                    for path, duration in file_based_chapters
+                ],
                 allow_seek=True,
             )
 
diff --git a/music_assistant/providers/opensubsonic/sonic_provider.py b/music_assistant/providers/opensubsonic/sonic_provider.py
index 3f2e293c..56a699d0 100644
--- a/music_assistant/providers/opensubsonic/sonic_provider.py
+++ b/music_assistant/providers/opensubsonic/sonic_provider.py
@@ -619,7 +619,12 @@ class OpenSonicProvider(MusicProvider):
             allow_seek=True,
             can_seek=self._seek_support,
             media_type=media_type,
-            audio_format=AudioFormat(content_type=ContentType.try_parse(mime_type)),
+            audio_format=AudioFormat(
+                content_type=ContentType.try_parse(mime_type),
+                sample_rate=item.sampling_rate if item.sampling_rate else 44100,
+                bit_depth=item.bit_depth if item.bit_depth else 16,
+                channels=item.channel_count if item.channel_count else 2,
+            ),
             stream_type=StreamType.CUSTOM,
             duration=item.duration if item.duration else 0,
         )
diff --git a/pyproject.toml b/pyproject.toml
index 9ec43933..f1d73c73 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ dependencies = [
   "ifaddr==0.2.0",
   "mashumaro==3.16",
   "music-assistant-frontend==2.16.5",
-  "music-assistant-models==1.1.60",
+  "music-assistant-models==1.1.61",
   "mutagen==1.47.0",
   "orjson==3.11.3",
   "pillow==11.3.0",
diff --git a/requirements_all.txt b/requirements_all.txt
index 85793f19..b600cfcb 100644
--- a/requirements_all.txt
+++ b/requirements_all.txt
@@ -35,7 +35,7 @@ llvmlite==0.44.0
 lyricsgenius==3.7.2
 mashumaro==3.16
 music-assistant-frontend==2.16.5
-music-assistant-models==1.1.60
+music-assistant-models==1.1.61
 mutagen==1.47.0
 numpy==2.2.6
 orjson==3.11.3