Fix various issues with streaming (e.g. on Sonos devices) (#2536)

author Marcel van der Veldt <m.vanderveldt@outlook.com>

Wed, 22 Oct 2025 18:21:24 +0000 (20:21 +0200)

committer GitHub <noreply@github.com>

Wed, 22 Oct 2025 18:21:24 +0000 (20:21 +0200)
author Marcel van der Veldt <m.vanderveldt@outlook.com>
Wed, 22 Oct 2025 18:21:24 +0000 (20:21 +0200)
committer GitHub <noreply@github.com>
Wed, 22 Oct 2025 18:21:24 +0000 (20:21 +0200)
diff --git a/music_assistant/controllers/streams.py b/music_assistant/controllers/streams.py

index db9c82a0d50e41121403f7dc23892828f5ca5b05..4633d2bb52f6ee4e9e451f1b6dbe880641b9b0c8 100644 (file)
--- a/music_assistant/controllers/streams.py
+++ b/music_assistant/controllers/streams.py
@@ -412,47 +412,48 @@ class StreamsController(CoreController):
              )
              smart_fades_mode = SmartFadesMode.DISABLED
  
+        # work out pcm format based on output format
+        pcm_format = AudioFormat(
+            sample_rate=output_format.sample_rate,
+            # always use f32 internally for extra headroom for filters etc
+            content_type=ContentType.PCM_F32LE,
+            bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
+            channels=2,
+        )
          if smart_fades_mode != SmartFadesMode.DISABLED:
              # crossfade is enabled, use special crossfaded single item stream
              # where the crossfade of the next track is present in the stream of
              # a single track. This only works if the player supports gapless playback.
-            # work out pcm format based on output format
-            pcm_format = AudioFormat(
-                content_type=DEFAULT_PCM_FORMAT.content_type,
-                sample_rate=output_format.sample_rate,
-                # always use f32 internally for extra headroom for filters etc
-                bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
-                channels=2,
-            )
-            audio_input = get_ffmpeg_stream(
-                audio_input=self.get_queue_item_stream_with_smartfade(
-                    queue_item=queue_item,
-                    pcm_format=pcm_format,
-                    session_id=session_id,
-                    smart_fades_mode=smart_fades_mode,
-                    standard_crossfade_duration=standard_crossfade_duration,
-                ),
-                input_format=pcm_format,
-                output_format=output_format,
-                filter_params=get_player_filter_params(
-                    self.mass, queue_player.player_id, pcm_format, output_format
-                ),
+
+            audio_input = self.get_queue_item_stream_with_smartfade(
+                queue_item=queue_item,
+                pcm_format=pcm_format,
+                session_id=session_id,
+                smart_fades_mode=smart_fades_mode,
+                standard_crossfade_duration=standard_crossfade_duration,
              )
          else:
              # no crossfade, just a regular single item stream
-            # no need to convert to pcm first, request output format directly
              audio_input = self.get_queue_item_stream(
                  queue_item=queue_item,
-                output_format=output_format,
-                filter_params=get_player_filter_params(
-                    self.mass,
-                    queue_player.player_id,
-                    queue_item.streamdetails.audio_format,
-                    output_format,
-                ),
+                pcm_format=pcm_format,
              )
-
-        async for chunk in audio_input:
+        # stream the audio
+        # this final ffmpeg process in the chain will convert the raw, lossless PCM audio into
+        # the desired output format for the player including any player specific filter params
+        # such as channels mixing, DSP, resampling and, only if needed, encoding to lossy formats
+        async for chunk in get_ffmpeg_stream(
+            audio_input=audio_input,
+            input_format=pcm_format,
+            output_format=output_format,
+            filter_params=get_player_filter_params(
+                self.mass,
+                player_id=queue_player.player_id,
+                input_format=pcm_format,
+                output_format=output_format,
+            ),
+            chunk_size=get_chunksize(output_format),
+        ):
              try:
                  await resp.write(chunk)
              except (BrokenPipeError, ConnectionResetError, ConnectionError):
@@ -541,6 +542,9 @@ class StreamsController(CoreController):
              return resp
  
          # all checks passed, start streaming!
+        # this final ffmpeg process in the chain will convert the raw, lossless PCM audio into
+        # the desired output format for the player including any player specific filter params
+        # such as channels mixing, DSP, resampling and, only if needed, encoding to lossy formats
          self.logger.debug("Start serving Queue flow audio stream for %s", queue.display_name)
  
          async for chunk in get_ffmpeg_stream(
@@ -836,7 +840,7 @@ class StreamsController(CoreController):
              # handle incoming audio chunks
              async for chunk in self.get_queue_item_stream(
                  queue_track,
-                output_format=pcm_format,
+                pcm_format=pcm_format,
              ):
                  # buffer size needs to be big enough to include the crossfade part
                  req_buffer_size = (
@@ -1032,14 +1036,13 @@ class StreamsController(CoreController):
      async def get_queue_item_stream(
          self,
          queue_item: QueueItem,
-        output_format: AudioFormat,
-        filter_params: list[str] | None = None,
+        pcm_format: AudioFormat,
      ) -> AsyncGenerator[bytes, None]:
-        """Get the audio stream for a single queue item."""
+        """Get the (PCM) audio stream for a single queue item."""
          # collect all arguments for ffmpeg
          streamdetails = queue_item.streamdetails
          assert streamdetails
-        filter_params = filter_params or []
+        filter_params: list[str] = []
  
          # handle volume normalization
          gain_correct: float | None = None
@@ -1075,7 +1078,7 @@ class StreamsController(CoreController):
          async for chunk in get_media_stream(
              self.mass,
              streamdetails=streamdetails,
-            output_format=output_format,
+            pcm_format=pcm_format,
              filter_params=filter_params,
          ):
              if not first_chunk_received:
@@ -1279,6 +1282,9 @@ class StreamsController(CoreController):
              # no point in having a higher bit depth for lossy formats
              output_bit_depth = 16
              output_sample_rate = min(48000, output_sample_rate)
+        if content_type == ContentType.WAV and output_bit_depth > 16:
+            # WAV 24bit is not widely supported, fallback to 16bit
+            output_bit_depth = 16
          if output_format_str == "pcm":
              content_type = ContentType.from_bit_depth(output_bit_depth)
          return AudioFormat(
diff --git a/music_assistant/helpers/audio.py b/music_assistant/helpers/audio.py

index f9e082fbffb9fde6d4dc0ad92ace9870d5b4a921..9c1446c7bf6ef8fd9962c56d0f341d3734d023dc 100644 (file)
--- a/music_assistant/helpers/audio.py
+++ b/music_assistant/helpers/audio.py
@@ -424,10 +424,10 @@ async def get_stream_details(
  async def get_media_stream(
      mass: MusicAssistant,
      streamdetails: StreamDetails,
-    output_format: AudioFormat,
+    pcm_format: AudioFormat,
      filter_params: list[str] | None = None,
  ) -> AsyncGenerator[bytes, None]:
-    """Get audio stream for given media details."""
+    """Get audio stream for given media details as raw PCM."""
      logger = LOGGER.getChild("media_stream")
      logger.log(VERBOSE_LOG_LEVEL, "Starting media stream for %s", streamdetails.uri)
      extra_input_args = streamdetails.extra_input_args or []
@@ -487,7 +487,7 @@ async def get_media_stream(
      ffmpeg_proc = FFMpeg(
          audio_input=audio_source,
          input_format=streamdetails.audio_format,
-        output_format=output_format,
+        output_format=pcm_format,
          filter_params=filter_params,
          extra_input_args=extra_input_args,
          collect_log_history=True,
@@ -506,12 +506,12 @@ async def get_media_stream(
              streamdetails.uri,
              streamdetails.stream_type,
              streamdetails.volume_normalization_mode,
-            output_format.content_type.value,
+            pcm_format.content_type.value,
              ffmpeg_proc.proc.pid,
          )
          stream_start = mass.loop.time()
  
-        chunk_size = get_chunksize(output_format, 1)
+        chunk_size = get_chunksize(pcm_format, 1)
          async for chunk in ffmpeg_proc.iter_chunked(chunk_size):
              if not first_chunk_received:
                  # At this point ffmpeg has started and should now know the codec used
@@ -547,17 +547,14 @@ async def get_media_stream(
      finally:
          # always ensure close is called which also handles all cleanup
          await ffmpeg_proc.close()
-        # try to determine how many seconds we've streamed
-        if output_format.content_type.is_pcm():
-            # for pcm output we can calculate this easily
-            seconds_streamed = bytes_sent / output_format.pcm_sample_size if bytes_sent else 0
-            streamdetails.seconds_streamed = seconds_streamed
-            # store accurate duration
-            if finished and not streamdetails.seek_position and seconds_streamed:
-                streamdetails.duration = int(seconds_streamed)
-        else:
-            # this is a less accurate estimate for compressed audio
-            seconds_streamed = bytes_sent / get_chunksize(output_format, 1)
+        # determine how many seconds we've streamed
+        # for pcm output we can calculate this easily
+        seconds_streamed = bytes_sent / pcm_format.pcm_sample_size if bytes_sent else 0
+        streamdetails.seconds_streamed = seconds_streamed
+        # store accurate duration
+        if finished and not streamdetails.seek_position and seconds_streamed:
+            streamdetails.duration = int(seconds_streamed)
+
          logger.debug(
              "stream %s (with code %s) for %s - seconds streamed: %s",
              "cancelled" if cancelled else "finished" if finished else "aborted",
@@ -574,7 +571,7 @@ async def get_media_stream(
          ):
              # if dynamic volume normalization is enabled and the entire track is streamed
              # the loudnorm filter will output the measurement in the log,
-            # so we can use those directly instead of analyzing the audio
+            # so we can use that directly instead of analyzing the audio
              logger.log(VERBOSE_LOG_LEVEL, "Collecting loudness measurement...")
              if loudness_details := parse_loudnorm(" ".join(ffmpeg_proc.log_history)):
                  logger.debug(
@@ -1047,11 +1044,21 @@ async def get_preview_stream(
      if TYPE_CHECKING:  # avoid circular import
          assert isinstance(music_prov, MusicProvider)
      streamdetails = await music_prov.get_stream_details(item_id, media_type)
-    streamdetails.extra_input_args += ["-t", "30"]  # cut after 30 seconds
-    async for chunk in get_media_stream(
-        mass=mass,
-        streamdetails=streamdetails,
+    pcm_format = AudioFormat(
+        content_type=ContentType.from_bit_depth(streamdetails.audio_format.bit_depth),
+        sample_rate=streamdetails.audio_format.sample_rate,
+        bit_depth=streamdetails.audio_format.bit_depth,
+        channels=streamdetails.audio_format.channels,
+    )
+    async for chunk in get_ffmpeg_stream(
+        audio_input=get_media_stream(
+            mass=mass,
+            streamdetails=streamdetails,
+            pcm_format=pcm_format,
+        ),
+        input_format=pcm_format,
          output_format=AudioFormat(content_type=ContentType.AAC),
+        extra_input_args=["-t", "30"],  # cut after 30 seconds
      ):
          yield chunk
author	Marcel van der Veldt <m.vanderveldt@outlook.com>
	Wed, 22 Oct 2025 18:21:24 +0000 (20:21 +0200)
committer	GitHub <noreply@github.com>
	Wed, 22 Oct 2025 18:21:24 +0000 (20:21 +0200)
music_assistant/controllers/streams.py		patch \| blob \| history
music_assistant/helpers/audio.py		patch \| blob \| history