From: Marvin Schenkel Date: Tue, 18 Nov 2025 13:42:01 +0000 (+0100) Subject: Refactor streams controller + smart fades (#2613) X-Git-Url: https://git.kitaultman.com/?a=commitdiff_plain;h=882eb55900de4d49e989b61b791e332717a776a9;p=music-assistant-server.git Refactor streams controller + smart fades (#2613) * Refactor streamscontroller + smartfades * Adjust log levels and add mixer property * Define separate smart fades logger in the streamcontroller * Define separate smart fades logger in the streamcontroller * Remove leftover * Rewrite loggers as instance attributes --- diff --git a/music_assistant/controllers/music.py b/music_assistant/controllers/music.py index 3119a619..25f6d3f9 100644 --- a/music_assistant/controllers/music.py +++ b/music_assistant/controllers/music.py @@ -60,12 +60,12 @@ from music_assistant.constants import ( DB_TABLE_TRACKS, PROVIDERS_WITH_SHAREABLE_URLS, ) +from music_assistant.controllers.streams.smart_fades.fades import SMART_CROSSFADE_DURATION from music_assistant.helpers.api import api_command from music_assistant.helpers.compare import compare_strings, compare_version, create_safe_string from music_assistant.helpers.database import DatabaseConnection from music_assistant.helpers.datetime import utc_timestamp from music_assistant.helpers.json import json_dumps, json_loads, serialize_to_json -from music_assistant.helpers.smart_fades import SMART_CROSSFADE_DURATION from music_assistant.helpers.tags import split_artists from music_assistant.helpers.uri import parse_uri from music_assistant.helpers.util import TaskManager, parse_title_and_version diff --git a/music_assistant/controllers/streams.py b/music_assistant/controllers/streams.py deleted file mode 100644 index 9b3f99d8..00000000 --- a/music_assistant/controllers/streams.py +++ /dev/null @@ -1,1916 +0,0 @@ -""" -Controller to stream audio to players. - -The streams controller hosts a basic, unprotected HTTP-only webserver -purely to stream audio packets to players and some control endpoints such as -the upnp callbacks and json rpc api for slimproto clients. -""" - -from __future__ import annotations - -import asyncio -import gc -import logging -import os -import urllib.parse -from collections.abc import AsyncGenerator -from dataclasses import dataclass -from typing import TYPE_CHECKING, Final, cast - -from aiofiles.os import wrap -from aiohttp import web -from music_assistant_models.config_entries import ConfigEntry, ConfigValueOption, ConfigValueType -from music_assistant_models.enums import ( - ConfigEntryType, - ContentType, - MediaType, - PlayerFeature, - StreamType, - VolumeNormalizationMode, -) -from music_assistant_models.errors import ( - AudioError, - InvalidDataError, - ProviderUnavailableError, - QueueEmpty, -) -from music_assistant_models.media_items import AudioFormat, Track -from music_assistant_models.player_queue import PlayLogEntry - -from music_assistant.constants import ( - ANNOUNCE_ALERT_FILE, - CONF_BIND_IP, - CONF_BIND_PORT, - CONF_CROSSFADE_DURATION, - CONF_ENTRY_ENABLE_ICY_METADATA, - CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES, - CONF_HTTP_PROFILE, - CONF_OUTPUT_CHANNELS, - CONF_OUTPUT_CODEC, - CONF_PUBLISH_IP, - CONF_SAMPLE_RATES, - CONF_SMART_FADES_MODE, - CONF_VOLUME_NORMALIZATION_FIXED_GAIN_RADIO, - CONF_VOLUME_NORMALIZATION_FIXED_GAIN_TRACKS, - CONF_VOLUME_NORMALIZATION_RADIO, - CONF_VOLUME_NORMALIZATION_TRACKS, - DEFAULT_STREAM_HEADERS, - ICY_HEADERS, - INTERNAL_PCM_FORMAT, - SILENCE_FILE, - VERBOSE_LOG_LEVEL, -) -from music_assistant.controllers.players.player_controller import AnnounceData -from music_assistant.helpers.audio import LOGGER as AUDIO_LOGGER -from music_assistant.helpers.audio import ( - get_buffered_media_stream, - get_chunksize, - get_media_stream, - get_player_filter_params, - get_stream_details, - resample_pcm_audio, -) -from music_assistant.helpers.buffered_generator import buffered, use_buffer -from music_assistant.helpers.ffmpeg import LOGGER as FFMPEG_LOGGER -from music_assistant.helpers.ffmpeg import check_ffmpeg_version, get_ffmpeg_stream -from music_assistant.helpers.smart_fades import ( - SMART_CROSSFADE_DURATION, - SmartFadesMixer, -) -from music_assistant.helpers.util import ( - divide_chunks, - get_ip_addresses, - get_total_system_memory, - select_free_port, -) -from music_assistant.helpers.webserver import Webserver -from music_assistant.models.core_controller import CoreController -from music_assistant.models.music_provider import MusicProvider -from music_assistant.models.plugin import PluginProvider, PluginSource -from music_assistant.models.smart_fades import SmartFadesMode -from music_assistant.providers.universal_group.constants import UGP_PREFIX -from music_assistant.providers.universal_group.player import UniversalGroupPlayer - -if TYPE_CHECKING: - from music_assistant_models.config_entries import CoreConfig - from music_assistant_models.player import PlayerMedia - from music_assistant_models.player_queue import PlayerQueue - from music_assistant_models.queue_item import QueueItem - from music_assistant_models.streamdetails import StreamDetails - - from music_assistant.mass import MusicAssistant - from music_assistant.models.player import Player - - -isfile = wrap(os.path.isfile) - -CONF_ALLOW_BUFFER: Final[str] = "allow_buffering" -CONF_ALLOW_CROSSFADE_SAME_ALBUM: Final[str] = "allow_crossfade_same_album" - -# Calculate total system memory once at module load time -TOTAL_SYSTEM_MEMORY_GB: Final[float] = get_total_system_memory() -CONF_ALLOW_BUFFER_DEFAULT = TOTAL_SYSTEM_MEMORY_GB >= 8.0 - - -def parse_pcm_info(content_type: str) -> tuple[int, int, int]: - """Parse PCM info from a codec/content_type string.""" - params = ( - dict(urllib.parse.parse_qsl(content_type.replace(";", "&"))) if ";" in content_type else {} - ) - sample_rate = int(params.get("rate", 44100)) - sample_size = int(params.get("bitrate", 16)) - channels = int(params.get("channels", 2)) - return (sample_rate, sample_size, channels) - - -@dataclass -class CrossfadeData: - """Data class to hold crossfade data.""" - - data: bytes - fade_in_size: int - pcm_format: AudioFormat # Format of the 'data' bytes (current/previous track's format) - fade_in_pcm_format: AudioFormat # Format for 'fade_in_size' (next track's format) - queue_item_id: str - - -class StreamsController(CoreController): - """Webserver Controller to stream audio to players.""" - - domain: str = "streams" - - def __init__(self, mass: MusicAssistant) -> None: - """Initialize instance.""" - super().__init__(mass) - self._server = Webserver(self.logger, enable_dynamic_routes=True) - self.register_dynamic_route = self._server.register_dynamic_route - self.unregister_dynamic_route = self._server.unregister_dynamic_route - self.manifest.name = "Streamserver" - self.manifest.description = ( - "Music Assistant's core controller that is responsible for " - "streaming audio to players on the local network." - ) - self.manifest.icon = "cast-audio" - self.announcements: dict[str, AnnounceData] = {} - self._crossfade_data: dict[str, CrossfadeData] = {} - self._bind_ip: str = "0.0.0.0" - self._smart_fades_mixer = SmartFadesMixer(self.mass) - - @property - def base_url(self) -> str: - """Return the base_url for the streamserver.""" - return self._server.base_url - - @property - def bind_ip(self) -> str: - """Return the IP address this streamserver is bound to.""" - return self._bind_ip - - async def get_config_entries( - self, - action: str | None = None, - values: dict[str, ConfigValueType] | None = None, - ) -> tuple[ConfigEntry, ...]: - """Return all Config Entries for this core module (if any).""" - ip_addresses = await get_ip_addresses() - default_port = await select_free_port(8097, 9200) - return ( - ConfigEntry( - key=CONF_PUBLISH_IP, - type=ConfigEntryType.STRING, - default_value=ip_addresses[0], - label="Published IP address", - description="This IP address is communicated to players where to find this server." - "\nMake sure that this IP can be reached by players on the local network, " - "otherwise audio streaming will not work.", - required=False, - ), - ConfigEntry( - key=CONF_BIND_PORT, - type=ConfigEntryType.INTEGER, - default_value=default_port, - label="TCP Port", - description="The TCP port to run the server. " - "Make sure that this server can be reached " - "on the given IP and TCP port by players on the local network.", - ), - ConfigEntry( - key=CONF_ALLOW_BUFFER, - type=ConfigEntryType.BOOLEAN, - default_value=CONF_ALLOW_BUFFER_DEFAULT, - label="Allow (in-memory) buffering of (track) audio", - description="By default, Music Assistant tries to be as resource " - "efficient as possible when streaming audio, especially considering " - "low-end devices such as Raspberry Pi's. This means that audio " - "buffering is disabled by default to reduce memory usage. \n\n" - "Enabling this option allows for in-memory buffering of audio, " - "which (massively) improves playback (and seeking) performance but it comes " - "at the cost of increased memory usage. " - "If you run Music Assistant on a capable device with enough memory, " - "enabling this option is strongly recommended.", - required=False, - category="audio", - ), - ConfigEntry( - key=CONF_VOLUME_NORMALIZATION_RADIO, - type=ConfigEntryType.STRING, - default_value=VolumeNormalizationMode.FALLBACK_FIXED_GAIN, - label="Volume normalization method for radio streams", - options=[ - ConfigValueOption(x.value.replace("_", " ").title(), x.value) - for x in VolumeNormalizationMode - ], - category="audio", - ), - ConfigEntry( - key=CONF_VOLUME_NORMALIZATION_TRACKS, - type=ConfigEntryType.STRING, - default_value=VolumeNormalizationMode.FALLBACK_DYNAMIC, - label="Volume normalization method for tracks", - options=[ - ConfigValueOption(x.value.replace("_", " ").title(), x.value) - for x in VolumeNormalizationMode - ], - category="audio", - ), - ConfigEntry( - key=CONF_VOLUME_NORMALIZATION_FIXED_GAIN_RADIO, - type=ConfigEntryType.FLOAT, - range=(-20, 10), - default_value=-6, - label="Fixed/fallback gain adjustment for radio streams", - category="audio", - ), - ConfigEntry( - key=CONF_VOLUME_NORMALIZATION_FIXED_GAIN_TRACKS, - type=ConfigEntryType.FLOAT, - range=(-20, 10), - default_value=-6, - label="Fixed/fallback gain adjustment for tracks", - category="audio", - ), - ConfigEntry( - key=CONF_ALLOW_CROSSFADE_SAME_ALBUM, - type=ConfigEntryType.BOOLEAN, - default_value=False, - label="Allow crossfade between tracks from the same album", - description="Enabling this option allows for crossfading between tracks " - "that are part of the same album.", - category="audio", - ), - ConfigEntry( - key=CONF_BIND_IP, - type=ConfigEntryType.STRING, - default_value="0.0.0.0", - options=[ConfigValueOption(x, x) for x in {"0.0.0.0", *ip_addresses}], - label="Bind to IP/interface", - description="Start the stream server on this specific interface. \n" - "Use 0.0.0.0 to bind to all interfaces, which is the default. \n" - "This is an advanced setting that should normally " - "not be adjusted in regular setups.", - category="advanced", - required=False, - ), - ) - - async def setup(self, config: CoreConfig) -> None: - """Async initialize of module.""" - # copy log level to audio/ffmpeg loggers - AUDIO_LOGGER.setLevel(self.logger.level) - FFMPEG_LOGGER.setLevel(self.logger.level) - # perform check for ffmpeg version - await check_ffmpeg_version() - # start the webserver - self.publish_port = config.get_value(CONF_BIND_PORT) - self.publish_ip = config.get_value(CONF_PUBLISH_IP) - self._bind_ip = bind_ip = str(config.get_value(CONF_BIND_IP)) - # print a big fat message in the log where the streamserver is running - # because this is a common source of issues for people with more complex setups - self.logger.log( - logging.INFO if self.mass.config.onboard_done else logging.WARNING, - "\n\n################################################################################\n" - "Starting streamserver on %s:%s\n" - "This is the IP address that is communicated to players.\n" - "If this is incorrect, audio will not play!\n" - "See the documentation how to configure the publish IP for the Streamserver\n" - "in Settings --> Core modules --> Streamserver\n" - "################################################################################\n", - self.publish_ip, - self.publish_port, - ) - await self._server.setup( - bind_ip=bind_ip, - bind_port=cast("int", self.publish_port), - base_url=f"http://{self.publish_ip}:{self.publish_port}", - static_routes=[ - ( - "*", - "/flow/{session_id}/{queue_id}/{queue_item_id}.{fmt}", - self.serve_queue_flow_stream, - ), - ( - "*", - "/single/{session_id}/{queue_id}/{queue_item_id}.{fmt}", - self.serve_queue_item_stream, - ), - ( - "*", - "/command/{queue_id}/{command}.mp3", - self.serve_command_request, - ), - ( - "*", - "/announcement/{player_id}.{fmt}", - self.serve_announcement_stream, - ), - ( - "*", - "/pluginsource/{plugin_source}/{player_id}.{fmt}", - self.serve_plugin_source_stream, - ), - ], - ) - # Start periodic garbage collection task - # This ensures memory from audio buffers and streams is cleaned up regularly - self.mass.call_later(900, self._periodic_garbage_collection) # 15 minutes - - async def close(self) -> None: - """Cleanup on exit.""" - await self._server.close() - - async def resolve_stream_url( - self, - session_id: str, - queue_item: QueueItem, - flow_mode: bool = False, - player_id: str | None = None, - ) -> str: - """Resolve the stream URL for the given QueueItem.""" - if not player_id: - player_id = queue_item.queue_id - conf_output_codec = await self.mass.config.get_player_config_value( - player_id, CONF_OUTPUT_CODEC, default="flac", return_type=str - ) - output_codec = ContentType.try_parse(conf_output_codec or "flac") - fmt = output_codec.value - # handle raw pcm without exact format specifiers - if output_codec.is_pcm() and ";" not in fmt: - fmt += f";codec=pcm;rate={44100};bitrate={16};channels={2}" - base_path = "flow" if flow_mode else "single" - return f"{self._server.base_url}/{base_path}/{session_id}/{queue_item.queue_id}/{queue_item.queue_item_id}.{fmt}" # noqa: E501 - - async def get_plugin_source_url( - self, - plugin_source: PluginSource, - player_id: str, - ) -> str: - """Get the url for the Plugin Source stream/proxy.""" - if plugin_source.audio_format.content_type.is_pcm(): - fmt = ContentType.WAV.value - else: - fmt = plugin_source.audio_format.content_type.value - return f"{self._server.base_url}/pluginsource/{plugin_source.id}/{player_id}.{fmt}" - - async def serve_queue_item_stream(self, request: web.Request) -> web.StreamResponse: - """Stream single queueitem audio to a player.""" - self._log_request(request) - queue_id = request.match_info["queue_id"] - queue = self.mass.player_queues.get(queue_id) - if not queue: - raise web.HTTPNotFound(reason=f"Unknown Queue: {queue_id}") - session_id = request.match_info["session_id"] - if queue.session_id and session_id != queue.session_id: - raise web.HTTPNotFound(reason=f"Unknown (or invalid) session: {session_id}") - queue_player = self.mass.players.get(queue_id) - queue_item_id = request.match_info["queue_item_id"] - queue_item = self.mass.player_queues.get_item(queue_id, queue_item_id) - if not queue_item: - raise web.HTTPNotFound(reason=f"Unknown Queue item: {queue_item_id}") - if not queue_item.streamdetails: - try: - queue_item.streamdetails = await get_stream_details( - mass=self.mass, queue_item=queue_item - ) - except Exception as e: - self.logger.error( - "Failed to get streamdetails for QueueItem %s: %s", queue_item_id, e - ) - queue_item.available = False - raise web.HTTPNotFound(reason=f"No streamdetails for Queue item: {queue_item_id}") - - # pick output format based on the streamdetails and player capabilities - if not queue_player: - raise web.HTTPNotFound(reason=f"Unknown Player: {queue_id}") - - output_format = await self.get_output_format( - output_format_str=request.match_info["fmt"], - player=queue_player, - content_sample_rate=queue_item.streamdetails.audio_format.sample_rate, - # always use f32 internally for extra headroom for filters etc - content_bit_depth=INTERNAL_PCM_FORMAT.bit_depth, - ) - - # prepare request, add some DLNA/UPNP compatible headers - headers = { - **DEFAULT_STREAM_HEADERS, - "icy-name": queue_item.name, - "contentFeatures.dlna.org": "DLNA.ORG_OP=01;DLNA.ORG_FLAGS=01500000000000000000000000000000", # noqa: E501 - "Accept-Ranges": "none", - "Content-Type": f"audio/{output_format.output_format_str}", - } - resp = web.StreamResponse( - status=200, - reason="OK", - headers=headers, - ) - resp.content_type = f"audio/{output_format.output_format_str}" - http_profile = await self.mass.config.get_player_config_value( - queue_id, CONF_HTTP_PROFILE, default="default", return_type=str - ) - if http_profile == "forced_content_length" and not queue_item.duration: - # just set an insane high content length to make sure the player keeps playing - resp.content_length = get_chunksize(output_format, 12 * 3600) - elif http_profile == "forced_content_length" and queue_item.duration: - # guess content length based on duration - resp.content_length = get_chunksize(output_format, queue_item.duration) - elif http_profile == "chunked": - resp.enable_chunked_encoding() - - await resp.prepare(request) - - # return early if this is not a GET request - if request.method != "GET": - return resp - - if queue_item.media_type != MediaType.TRACK: - # no crossfade on non-tracks - smart_fades_mode = SmartFadesMode.DISABLED - else: - smart_fades_mode = await self.mass.config.get_player_config_value( - queue.queue_id, CONF_SMART_FADES_MODE, return_type=SmartFadesMode - ) - standard_crossfade_duration = self.mass.config.get_raw_player_config_value( - queue.queue_id, CONF_CROSSFADE_DURATION, 10 - ) - if ( - smart_fades_mode != SmartFadesMode.DISABLED - and PlayerFeature.GAPLESS_PLAYBACK not in queue_player.supported_features - ): - # crossfade is not supported on this player due to missing gapless playback - self.logger.warning( - "Crossfade disabled: Player %s does not support gapless playback, " - "consider enabling flow mode to enable crossfade on this player.", - queue_player.display_name if queue_player else "Unknown Player", - ) - smart_fades_mode = SmartFadesMode.DISABLED - - # work out pcm format based on streamdetails - pcm_format = AudioFormat( - sample_rate=queue_item.streamdetails.audio_format.sample_rate, - # always use f32 internally for extra headroom for filters etc - content_type=INTERNAL_PCM_FORMAT.content_type, - bit_depth=INTERNAL_PCM_FORMAT.bit_depth, - channels=queue_item.streamdetails.audio_format.channels, - ) - if smart_fades_mode != SmartFadesMode.DISABLED: - # crossfade is enabled, use special crossfaded single item stream - # where the crossfade of the next track is present in the stream of - # a single track. This only works if the player supports gapless playback! - audio_input = self.get_queue_item_stream_with_smartfade( - queue_item=queue_item, - pcm_format=pcm_format, - smart_fades_mode=smart_fades_mode, - standard_crossfade_duration=standard_crossfade_duration, - ) - else: - # no crossfade, just a regular single item stream - audio_input = self.get_queue_item_stream( - queue_item=queue_item, - pcm_format=pcm_format, - seek_position=queue_item.streamdetails.seek_position, - ) - # stream the audio - # this final ffmpeg process in the chain will convert the raw, lossless PCM audio into - # the desired output format for the player including any player specific filter params - # such as channels mixing, DSP, resampling and, only if needed, encoding to lossy formats - - # readrate filter input args to control buffering - # we need to slowly feed the music to avoid the player stopping and later - # restarting (or completely failing) the audio stream by keeping the buffer short. - # this is reported to be an issue especially with Chromecast players. - # see for example: https://github.com/music-assistant/support/issues/3717 - user_agent = request.headers.get("User-Agent", "") - if queue_item.media_type == MediaType.RADIO: - # keep very short buffer for radio streams - # to keep them (more or less) realtime and prevent time outs - read_rate_input_args = ["-readrate", "1.0", "-readrate_initial_burst", "2"] - elif "Network_Module" in user_agent or "transferMode.dlna.org" in request.headers: - # and ofcourse we have an exception of the exception. Where most players actually NEED - # the readrate filter to avoid disconnecting, some other players (DLNA/MusicCast) - # actually fail when the filter is used. So we disable it completely for those players. - read_rate_input_args = None # disable readrate for DLNA players - else: - # allow buffer ahead of 10 seconds and read 1.5x faster than realtime - read_rate_input_args = ["-readrate", "1.5", "-readrate_initial_burst", "10"] - - first_chunk_received = False - bytes_sent = 0 - async for chunk in get_ffmpeg_stream( - audio_input=audio_input, - input_format=pcm_format, - output_format=output_format, - filter_params=get_player_filter_params( - self.mass, - player_id=queue_player.player_id, - input_format=pcm_format, - output_format=output_format, - ), - extra_input_args=read_rate_input_args, - ): - try: - await resp.write(chunk) - bytes_sent += len(chunk) - if not first_chunk_received: - first_chunk_received = True - # inform the queue that the track is now loaded in the buffer - # so for example the next track can be enqueued - self.mass.player_queues.track_loaded_in_buffer( - queue_item.queue_id, queue_item.queue_item_id - ) - except (BrokenPipeError, ConnectionResetError, ConnectionError) as err: - if first_chunk_received and not queue_player.stop_called: - # Player disconnected (unexpected) after receiving at least some data - # This could indicate buffering issues, network problems, - # or player-specific issues - bytes_expected = get_chunksize(output_format, queue_item.duration or 3600) - self.logger.warning( - "Player %s disconnected prematurely from stream for %s (%s) - " - "error: %s, sent %d bytes, expected (approx) bytes=%d", - queue.display_name, - queue_item.name, - queue_item.uri, - err.__class__.__name__, - bytes_sent, - bytes_expected, - ) - break - if queue_item.streamdetails.stream_error: - self.logger.error( - "Error streaming QueueItem %s (%s) to %s - will try to skip to next item", - queue_item.name, - queue_item.uri, - queue.display_name, - ) - # try to skip to the next item in the queue after a short delay - self.mass.call_later(5, self.mass.player_queues.next(queue_id)) - return resp - - async def serve_queue_flow_stream(self, request: web.Request) -> web.StreamResponse: - """Stream Queue Flow audio to player.""" - self._log_request(request) - queue_id = request.match_info["queue_id"] - queue = self.mass.player_queues.get(queue_id) - if not queue: - raise web.HTTPNotFound(reason=f"Unknown Queue: {queue_id}") - if not (queue_player := self.mass.players.get(queue_id)): - raise web.HTTPNotFound(reason=f"Unknown Player: {queue_id}") - start_queue_item_id = request.match_info["queue_item_id"] - start_queue_item = self.mass.player_queues.get_item(queue_id, start_queue_item_id) - if not start_queue_item: - raise web.HTTPNotFound(reason=f"Unknown Queue item: {start_queue_item_id}") - - # select the highest possible PCM settings for this player - flow_pcm_format = await self._select_flow_format(queue_player) - - # work out output format/details - output_format = await self.get_output_format( - output_format_str=request.match_info["fmt"], - player=queue_player, - content_sample_rate=flow_pcm_format.sample_rate, - content_bit_depth=flow_pcm_format.bit_depth, - ) - # work out ICY metadata support - icy_preference = self.mass.config.get_raw_player_config_value( - queue_id, - CONF_ENTRY_ENABLE_ICY_METADATA.key, - CONF_ENTRY_ENABLE_ICY_METADATA.default_value, - ) - enable_icy = request.headers.get("Icy-MetaData", "") == "1" and icy_preference != "disabled" - icy_meta_interval = 256000 if icy_preference == "full" else 16384 - - # prepare request, add some DLNA/UPNP compatible headers - headers = { - **DEFAULT_STREAM_HEADERS, - **ICY_HEADERS, - "contentFeatures.dlna.org": "DLNA.ORG_OP=01;DLNA.ORG_FLAGS=01700000000000000000000000000000", # noqa: E501 - "Accept-Ranges": "none", - "Content-Type": f"audio/{output_format.output_format_str}", - } - if enable_icy: - headers["icy-metaint"] = str(icy_meta_interval) - - resp = web.StreamResponse( - status=200, - reason="OK", - headers=headers, - ) - http_profile = await self.mass.config.get_player_config_value( - queue_id, CONF_HTTP_PROFILE, default="default", return_type=str - ) - if http_profile == "forced_content_length": - # just set an insane high content length to make sure the player keeps playing - resp.content_length = get_chunksize(output_format, 12 * 3600) - elif http_profile == "chunked": - resp.enable_chunked_encoding() - - await resp.prepare(request) - - # return early if this is not a GET request - if request.method != "GET": - return resp - - # all checks passed, start streaming! - # this final ffmpeg process in the chain will convert the raw, lossless PCM audio into - # the desired output format for the player including any player specific filter params - # such as channels mixing, DSP, resampling and, only if needed, encoding to lossy formats - self.logger.debug("Start serving Queue flow audio stream for %s", queue.display_name) - - async for chunk in get_ffmpeg_stream( - audio_input=self.get_queue_flow_stream( - queue=queue, - start_queue_item=start_queue_item, - pcm_format=flow_pcm_format, - ), - input_format=flow_pcm_format, - output_format=output_format, - filter_params=get_player_filter_params( - self.mass, queue_player.player_id, flow_pcm_format, output_format - ), - # we need to slowly feed the music to avoid the player stopping and later - # restarting (or completely failing) the audio stream by keeping the buffer short. - # this is reported to be an issue especially with Chromecast players. - # see for example: https://github.com/music-assistant/support/issues/3717 - # allow buffer ahead of 8 seconds and read slightly faster than realtime - extra_input_args=["-readrate", "1.01", "-readrate_initial_burst", "8"], - chunk_size=icy_meta_interval if enable_icy else get_chunksize(output_format), - ): - try: - await resp.write(chunk) - except (BrokenPipeError, ConnectionResetError, ConnectionError): - # race condition - break - - if not enable_icy: - continue - - # if icy metadata is enabled, send the icy metadata after the chunk - if ( - # use current item here and not buffered item, otherwise - # the icy metadata will be too much ahead - (current_item := queue.current_item) - and current_item.streamdetails - and current_item.streamdetails.stream_title - ): - title = current_item.streamdetails.stream_title - elif queue and current_item and current_item.name: - title = current_item.name - else: - title = "Music Assistant" - metadata = f"StreamTitle='{title}';".encode() - if icy_preference == "full" and current_item and current_item.image: - metadata += f"StreamURL='{current_item.image.path}'".encode() - while len(metadata) % 16 != 0: - metadata += b"\x00" - length = len(metadata) - length_b = chr(int(length / 16)).encode() - await resp.write(length_b + metadata) - - return resp - - async def serve_command_request(self, request: web.Request) -> web.FileResponse: - """Handle special 'command' request for a player.""" - self._log_request(request) - queue_id = request.match_info["queue_id"] - command = request.match_info["command"] - if command == "next": - self.mass.create_task(self.mass.player_queues.next(queue_id)) - return web.FileResponse(SILENCE_FILE, headers={"icy-name": "Music Assistant"}) - - async def serve_announcement_stream(self, request: web.Request) -> web.StreamResponse: - """Stream announcement audio to a player.""" - self._log_request(request) - player_id = request.match_info["player_id"] - player = self.mass.player_queues.get(player_id) - if not player: - raise web.HTTPNotFound(reason=f"Unknown Player: {player_id}") - if not (announce_data := self.announcements.get(player_id)): - raise web.HTTPNotFound(reason=f"No pending announcements for Player: {player_id}") - - # work out output format/details - fmt = request.match_info["fmt"] - audio_format = AudioFormat(content_type=ContentType.try_parse(fmt)) - - http_profile = await self.mass.config.get_player_config_value( - player_id, CONF_HTTP_PROFILE, default="default", return_type=str - ) - if http_profile == "forced_content_length": - # given the fact that an announcement is just a short audio clip, - # just send it over completely at once so we have a fixed content length - data = b"" - async for chunk in self.get_announcement_stream( - announcement_url=announce_data["announcement_url"], - output_format=audio_format, - pre_announce=announce_data["pre_announce"], - pre_announce_url=announce_data["pre_announce_url"], - ): - data += chunk - return web.Response( - body=data, - content_type=f"audio/{audio_format.output_format_str}", - headers=DEFAULT_STREAM_HEADERS, - ) - - resp = web.StreamResponse( - status=200, - reason="OK", - headers=DEFAULT_STREAM_HEADERS, - ) - resp.content_type = f"audio/{audio_format.output_format_str}" - if http_profile == "chunked": - resp.enable_chunked_encoding() - - await resp.prepare(request) - - # return early if this is not a GET request - if request.method != "GET": - return resp - - # all checks passed, start streaming! - self.logger.debug( - "Start serving audio stream for Announcement %s to %s", - announce_data["announcement_url"], - player.display_name, - ) - async for chunk in self.get_announcement_stream( - announcement_url=announce_data["announcement_url"], - output_format=audio_format, - pre_announce=announce_data["pre_announce"], - pre_announce_url=announce_data["pre_announce_url"], - ): - try: - await resp.write(chunk) - except (BrokenPipeError, ConnectionResetError): - break - - self.logger.debug( - "Finished serving audio stream for Announcement %s to %s", - announce_data["announcement_url"], - player.display_name, - ) - - return resp - - async def serve_plugin_source_stream(self, request: web.Request) -> web.StreamResponse: - """Stream PluginSource audio to a player.""" - self._log_request(request) - plugin_source_id = request.match_info["plugin_source"] - provider = cast("PluginProvider", self.mass.get_provider(plugin_source_id)) - if not provider: - raise ProviderUnavailableError(f"Unknown PluginSource: {plugin_source_id}") - # work out output format/details - player_id = request.match_info["player_id"] - player = self.mass.players.get(player_id) - if not player: - raise web.HTTPNotFound(reason=f"Unknown Player: {player_id}") - plugin_source = provider.get_source() - output_format = await self.get_output_format( - output_format_str=request.match_info["fmt"], - player=player, - content_sample_rate=plugin_source.audio_format.sample_rate, - content_bit_depth=plugin_source.audio_format.bit_depth, - ) - headers = { - **DEFAULT_STREAM_HEADERS, - "contentFeatures.dlna.org": "DLNA.ORG_OP=01;DLNA.ORG_FLAGS=01700000000000000000000000000000", # noqa: E501 - "icy-name": plugin_source.name, - "Accept-Ranges": "none", - "Content-Type": f"audio/{output_format.output_format_str}", - } - - resp = web.StreamResponse( - status=200, - reason="OK", - headers=headers, - ) - resp.content_type = f"audio/{output_format.output_format_str}" - http_profile = await self.mass.config.get_player_config_value( - player_id, CONF_HTTP_PROFILE, default="default", return_type=str - ) - if http_profile == "forced_content_length": - # just set an insanely high content length to make sure the player keeps playing - resp.content_length = get_chunksize(output_format, 12 * 3600) - elif http_profile == "chunked": - resp.enable_chunked_encoding() - - await resp.prepare(request) - - # return early if this is not a GET request - if request.method != "GET": - return resp - - # all checks passed, start streaming! - if not plugin_source.audio_format: - raise InvalidDataError(f"No audio format for plugin source {plugin_source_id}") - async for chunk in self.get_plugin_source_stream( - plugin_source_id=plugin_source_id, - output_format=output_format, - player_id=player_id, - player_filter_params=get_player_filter_params( - self.mass, player_id, plugin_source.audio_format, output_format - ), - ): - try: - await resp.write(chunk) - except (BrokenPipeError, ConnectionResetError, ConnectionError): - break - return resp - - def get_command_url(self, player_or_queue_id: str, command: str) -> str: - """Get the url for the special command stream.""" - return f"{self.base_url}/command/{player_or_queue_id}/{command}.mp3" - - def get_announcement_url( - self, - player_id: str, - announce_data: AnnounceData, - content_type: ContentType = ContentType.MP3, - ) -> str: - """Get the url for the special announcement stream.""" - self.announcements[player_id] = announce_data - # use stream server to host announcement on local network - # this ensures playback on all players, including ones that do not - # like https hosts and it also offers the pre-announce 'bell' - return f"{self.base_url}/announcement/{player_id}.{content_type.value}" - - def get_stream( - self, media: PlayerMedia, pcm_format: AudioFormat - ) -> AsyncGenerator[bytes, None]: - """ - Get a stream of the given media as raw PCM audio. - - This is used as helper for player providers that can consume the raw PCM - audio stream directly (e.g. AirPlay) and not rely on HTTP transport. - """ - # select audio source - if media.media_type == MediaType.ANNOUNCEMENT: - # special case: stream announcement - assert media.custom_data - audio_source = self.get_announcement_stream( - media.custom_data["announcement_url"], - output_format=pcm_format, - pre_announce=media.custom_data["pre_announce"], - pre_announce_url=media.custom_data["pre_announce_url"], - ) - elif media.media_type == MediaType.PLUGIN_SOURCE: - # special case: plugin source stream - assert media.custom_data - audio_source = self.get_plugin_source_stream( - plugin_source_id=media.custom_data["source_id"], - output_format=pcm_format, - # need to pass player_id from the PlayerMedia object - # because this could have been a group - player_id=media.custom_data["player_id"], - ) - elif ( - media.media_type == MediaType.FLOW_STREAM - and media.source_id - and media.source_id.startswith(UGP_PREFIX) - and media.uri - and "/ugp/" in media.uri - ): - # special case: member player accessing UGP stream - # Check URI to distinguish from the UGP accessing its own stream - ugp_player = cast("UniversalGroupPlayer", self.mass.players.get(media.source_id)) - ugp_stream = ugp_player.stream - assert ugp_stream is not None # for type checker - if ugp_stream.base_pcm_format == pcm_format: - # no conversion needed - audio_source = ugp_stream.subscribe_raw() - else: - audio_source = ugp_stream.get_stream(output_format=pcm_format) - elif media.source_id and media.queue_item_id and media.media_type == MediaType.FLOW_STREAM: - # regular queue (flow) stream request - queue = self.mass.player_queues.get(media.source_id) - assert queue - start_queue_item = self.mass.player_queues.get_item( - media.source_id, media.queue_item_id - ) - assert start_queue_item - audio_source = self.mass.streams.get_queue_flow_stream( - queue=queue, - start_queue_item=start_queue_item, - pcm_format=pcm_format, - ) - elif media.source_id and media.queue_item_id: - # single item stream (e.g. radio) - queue_item = self.mass.player_queues.get_item(media.source_id, media.queue_item_id) - assert queue_item - audio_source = buffered( - self.get_queue_item_stream( - queue_item=queue_item, - pcm_format=pcm_format, - ), - buffer_size=10, - min_buffer_before_yield=2, - ) - else: - # assume url or some other direct path - # NOTE: this will fail if its an uri not playable by ffmpeg - audio_source = get_ffmpeg_stream( - audio_input=media.uri, - input_format=AudioFormat(content_type=ContentType.try_parse(media.uri)), - output_format=pcm_format, - ) - return audio_source - - @use_buffer(buffer_size=30, min_buffer_before_yield=2) - async def get_queue_flow_stream( - self, - queue: PlayerQueue, - start_queue_item: QueueItem, - pcm_format: AudioFormat, - ) -> AsyncGenerator[bytes, None]: - """ - Get a flow stream of all tracks in the queue as raw PCM audio. - - yields chunks of exactly 1 second of audio in the given pcm_format. - """ - # ruff: noqa: PLR0915 - assert pcm_format.content_type.is_pcm() - queue_track = None - last_fadeout_part: bytes = b"" - last_streamdetails: StreamDetails | None = None - last_play_log_entry: PlayLogEntry | None = None - queue.flow_mode = True - if not start_queue_item: - # this can happen in some (edge case) race conditions - return - pcm_sample_size = pcm_format.pcm_sample_size - if start_queue_item.media_type != MediaType.TRACK: - # no crossfade on non-tracks - smart_fades_mode = SmartFadesMode.DISABLED - standard_crossfade_duration = 0 - else: - smart_fades_mode = await self.mass.config.get_player_config_value( - queue.queue_id, CONF_SMART_FADES_MODE, return_type=SmartFadesMode - ) - standard_crossfade_duration = self.mass.config.get_raw_player_config_value( - queue.queue_id, CONF_CROSSFADE_DURATION, 10 - ) - self.logger.info( - "Start Queue Flow stream for Queue %s - crossfade: %s %s", - queue.display_name, - smart_fades_mode, - f"({standard_crossfade_duration}s)" - if smart_fades_mode == SmartFadesMode.STANDARD_CROSSFADE - else "", - ) - total_bytes_sent = 0 - total_chunks_received = 0 - - while True: - # get (next) queue item to stream - if queue_track is None: - queue_track = start_queue_item - else: - try: - queue_track = await self.mass.player_queues.load_next_queue_item( - queue.queue_id, queue_track.queue_item_id - ) - except QueueEmpty: - break - - if queue_track.streamdetails is None: - raise InvalidDataError( - "No Streamdetails known for queue item %s", - queue_track.queue_item_id, - ) - - self.logger.debug( - "Start Streaming queue track: %s (%s) for queue %s", - queue_track.streamdetails.uri, - queue_track.name, - queue.display_name, - ) - # append to play log so the queue controller can work out which track is playing - play_log_entry = PlayLogEntry(queue_track.queue_item_id) - queue.flow_mode_stream_log.append(play_log_entry) - # calculate crossfade buffer size - crossfade_buffer_duration = ( - SMART_CROSSFADE_DURATION - if smart_fades_mode == SmartFadesMode.SMART_CROSSFADE - else standard_crossfade_duration - ) - crossfade_buffer_duration = min( - crossfade_buffer_duration, - int(queue_track.streamdetails.duration / 2) - if queue_track.streamdetails.duration - else crossfade_buffer_duration, - ) - # Ensure crossfade buffer size is aligned to frame boundaries - # Frame size = bytes_per_sample * channels - bytes_per_sample = pcm_format.bit_depth // 8 - frame_size = bytes_per_sample * pcm_format.channels - crossfade_buffer_size = int(pcm_format.pcm_sample_size * crossfade_buffer_duration) - # Round down to nearest frame boundary - crossfade_buffer_size = (crossfade_buffer_size // frame_size) * frame_size - - bytes_written = 0 - buffer = b"" - # handle incoming audio chunks - first_chunk_received = False - # buffer size needs to be big enough to include the crossfade part - - async for chunk in self.get_queue_item_stream( - queue_track, - pcm_format=pcm_format, - seek_position=queue_track.streamdetails.seek_position, - raise_on_error=False, - ): - total_chunks_received += 1 - if not first_chunk_received: - first_chunk_received = True - # inform the queue that the track is now loaded in the buffer - # so the next track can be preloaded - self.mass.player_queues.track_loaded_in_buffer( - queue.queue_id, queue_track.queue_item_id - ) - if total_chunks_received < 10 and smart_fades_mode != SmartFadesMode.DISABLED: - # we want a stream to start as quickly as possible - # so for the first 10 chunks we keep a very short buffer - req_buffer_size = pcm_format.pcm_sample_size - else: - req_buffer_size = ( - pcm_sample_size - if smart_fades_mode == SmartFadesMode.DISABLED - else crossfade_buffer_size - ) - - # ALWAYS APPEND CHUNK TO BUFFER - buffer += chunk - del chunk - if len(buffer) < req_buffer_size: - # buffer is not full enough, move on - # yield control to event loop with 10ms delay - await asyncio.sleep(0.01) - continue - - #### HANDLE CROSSFADE OF PREVIOUS TRACK AND NEW TRACK - if last_fadeout_part and last_streamdetails: - # perform crossfade - fadein_part = buffer[:crossfade_buffer_size] - remaining_bytes = buffer[crossfade_buffer_size:] - # Use the mixer to handle all crossfade logic - crossfade_part = await self._smart_fades_mixer.mix( - fade_in_part=fadein_part, - fade_out_part=last_fadeout_part, - fade_in_streamdetails=queue_track.streamdetails, - fade_out_streamdetails=last_streamdetails, - pcm_format=pcm_format, - standard_crossfade_duration=standard_crossfade_duration, - mode=smart_fades_mode, - ) - # because the crossfade exists of both the fadein and fadeout part - # we need to correct the bytes_written accordingly so the duration - # calculations at the end of the track are correct - crossfade_part_len = len(crossfade_part) - bytes_written += int(crossfade_part_len / 2) - if last_play_log_entry: - assert last_play_log_entry.seconds_streamed is not None - last_play_log_entry.seconds_streamed += ( - crossfade_part_len / 2 / pcm_sample_size - ) - # yield crossfade_part (in pcm_sample_size chunks) - for _chunk in divide_chunks(crossfade_part, pcm_sample_size): - yield _chunk - del _chunk - del crossfade_part - # also write the leftover bytes from the crossfade action - if remaining_bytes: - yield remaining_bytes - bytes_written += len(remaining_bytes) - del remaining_bytes - # clear vars - last_fadeout_part = b"" - last_streamdetails = None - buffer = b"" - - #### OTHER: enough data in buffer, feed to output - while len(buffer) > req_buffer_size: - yield buffer[:pcm_sample_size] - bytes_written += pcm_sample_size - buffer = buffer[pcm_sample_size:] - - #### HANDLE END OF TRACK - if last_fadeout_part: - # edge case: we did not get enough data to make the crossfade - for _chunk in divide_chunks(last_fadeout_part, pcm_sample_size): - yield _chunk - del _chunk - bytes_written += len(last_fadeout_part) - last_fadeout_part = b"" - if self._crossfade_allowed( - queue_track, smart_fades_mode=smart_fades_mode, flow_mode=True - ): - # if crossfade is enabled, save fadeout part to pickup for next track - last_fadeout_part = buffer[-crossfade_buffer_size:] - last_streamdetails = queue_track.streamdetails - last_play_log_entry = play_log_entry - remaining_bytes = buffer[:-crossfade_buffer_size] - if remaining_bytes: - yield remaining_bytes - bytes_written += len(remaining_bytes) - del remaining_bytes - elif buffer: - # no crossfade enabled, just yield the buffer last part - bytes_written += len(buffer) - for _chunk in divide_chunks(buffer, pcm_sample_size): - yield _chunk - del _chunk - # make sure the buffer gets cleaned up - del buffer - - # update duration details based on the actual pcm data we sent - # this also accounts for crossfade and silence stripping - seconds_streamed = bytes_written / pcm_sample_size - queue_track.streamdetails.seconds_streamed = seconds_streamed - queue_track.streamdetails.duration = int( - queue_track.streamdetails.seek_position + seconds_streamed - ) - play_log_entry.seconds_streamed = seconds_streamed - play_log_entry.duration = queue_track.streamdetails.duration - total_bytes_sent += bytes_written - self.logger.debug( - "Finished Streaming queue track: %s (%s) on queue %s", - queue_track.streamdetails.uri, - queue_track.name, - queue.display_name, - ) - #### HANDLE END OF QUEUE FLOW STREAM - # end of queue flow: make sure we yield the last_fadeout_part - if last_fadeout_part: - for _chunk in divide_chunks(last_fadeout_part, pcm_sample_size): - yield _chunk - del _chunk - # correct seconds streamed/duration - last_part_seconds = len(last_fadeout_part) / pcm_sample_size - streamdetails = queue_track.streamdetails - assert streamdetails is not None - streamdetails.seconds_streamed = ( - streamdetails.seconds_streamed or 0 - ) + last_part_seconds - streamdetails.duration = int((streamdetails.duration or 0) + last_part_seconds) - last_fadeout_part = b"" - total_bytes_sent += bytes_written - self.logger.info("Finished Queue Flow stream for Queue %s", queue.display_name) - - async def get_announcement_stream( - self, - announcement_url: str, - output_format: AudioFormat, - pre_announce: bool | str = False, - pre_announce_url: str = ANNOUNCE_ALERT_FILE, - ) -> AsyncGenerator[bytes, None]: - """Get the special announcement stream.""" - filter_params = ["loudnorm=I=-10:LRA=11:TP=-2"] - - if pre_announce: - # Note: TTS URLs might take a while to load cause the actual data are often generated - # asynchronously by the TTS provider. If we ask ffmpeg to mix the pre-announce, it will - # wait until it reads the TTS data, so the whole stream will be delayed. It is much - # faster to first play the pre-announce using a separate ffmpeg stream, and only - # afterwards play the TTS itself. - # - # For this to be effective the player itself needs to be able to start playback fast. - # Finally, if the output_format is non-PCM, raw concatenation can be problematic. - # So far players seem to tolerate this, but it might break some player in the future. - - async for chunk in get_ffmpeg_stream( - audio_input=pre_announce_url, - input_format=AudioFormat(content_type=ContentType.try_parse(pre_announce_url)), - output_format=output_format, - filter_params=filter_params, - chunk_size=get_chunksize(output_format, 1), - ): - yield chunk - - # work out output format/details - fmt = announcement_url.rsplit(".")[-1] - audio_format = AudioFormat(content_type=ContentType.try_parse(fmt)) - async for chunk in get_ffmpeg_stream( - audio_input=announcement_url, - input_format=audio_format, - output_format=output_format, - filter_params=filter_params, - chunk_size=get_chunksize(output_format, 1), - ): - yield chunk - - async def get_plugin_source_stream( - self, - plugin_source_id: str, - output_format: AudioFormat, - player_id: str, - player_filter_params: list[str] | None = None, - ) -> AsyncGenerator[bytes, None]: - """Get the special plugin source stream.""" - plugin_prov = cast("PluginProvider", self.mass.get_provider(plugin_source_id)) - if not plugin_prov: - raise ProviderUnavailableError(f"Unknown PluginSource: {plugin_source_id}") - - plugin_source = plugin_prov.get_source() - self.logger.debug( - "Start streaming PluginSource %s to %s using output format %s", - plugin_source_id, - player_id, - output_format, - ) - # this should already be set by the player controller, but just to be sure - plugin_source.in_use_by = player_id - - try: - async for chunk in get_ffmpeg_stream( - audio_input=cast( - "str | AsyncGenerator[bytes, None]", - plugin_prov.get_audio_stream(player_id) - if plugin_source.stream_type == StreamType.CUSTOM - else plugin_source.path, - ), - input_format=plugin_source.audio_format, - output_format=output_format, - filter_params=player_filter_params, - extra_input_args=["-y", "-re"], - ): - if plugin_source.in_use_by != player_id: - # another player took over or the stream ended, stop streaming - break - yield chunk - finally: - self.logger.debug( - "Finished streaming PluginSource %s to %s", plugin_source_id, player_id - ) - await asyncio.sleep(1) # prevent race conditions when selecting source - if plugin_source.in_use_by == player_id: - # release control - plugin_source.in_use_by = None - - async def get_queue_item_stream( - self, - queue_item: QueueItem, - pcm_format: AudioFormat, - seek_position: int = 0, - raise_on_error: bool = True, - ) -> AsyncGenerator[bytes, None]: - """Get the (PCM) audio stream for a single queue item.""" - # collect all arguments for ffmpeg - streamdetails = queue_item.streamdetails - assert streamdetails - filter_params: list[str] = [] - - # handle volume normalization - gain_correct: float | None = None - if streamdetails.volume_normalization_mode == VolumeNormalizationMode.DYNAMIC: - # volume normalization using loudnorm filter (in dynamic mode) - # which also collects the measurement on the fly during playback - # more info: https://k.ylo.ph/2016/04/04/loudnorm.html - filter_rule = f"loudnorm=I={streamdetails.target_loudness}:TP=-2.0:LRA=10.0:offset=0.0" - filter_rule += ":print_format=json" - filter_params.append(filter_rule) - elif streamdetails.volume_normalization_mode == VolumeNormalizationMode.FIXED_GAIN: - # apply user defined fixed volume/gain correction - config_key = ( - CONF_VOLUME_NORMALIZATION_FIXED_GAIN_TRACKS - if streamdetails.media_type == MediaType.TRACK - else CONF_VOLUME_NORMALIZATION_FIXED_GAIN_RADIO - ) - gain_value = await self.mass.config.get_core_config_value( - self.domain, config_key, default=0.0, return_type=float - ) - gain_correct = round(gain_value, 2) - filter_params.append(f"volume={gain_correct}dB") - elif streamdetails.volume_normalization_mode == VolumeNormalizationMode.MEASUREMENT_ONLY: - # volume normalization with known loudness measurement - # apply volume/gain correction - target_loudness = ( - float(streamdetails.target_loudness) - if streamdetails.target_loudness is not None - else 0.0 - ) - if streamdetails.prefer_album_loudness and streamdetails.loudness_album is not None: - gain_correct = target_loudness - float(streamdetails.loudness_album) - elif streamdetails.loudness is not None: - gain_correct = target_loudness - float(streamdetails.loudness) - else: - gain_correct = 0.0 - gain_correct = round(gain_correct, 2) - filter_params.append(f"volume={gain_correct}dB") - streamdetails.volume_normalization_gain_correct = gain_correct - - allow_buffer = bool( - self.mass.config.get_raw_core_config_value( - self.domain, CONF_ALLOW_BUFFER, CONF_ALLOW_BUFFER_DEFAULT - ) - and streamdetails.duration - ) - - self.logger.debug( - "Starting queue item stream for %s (%s)" - " - using buffer: %s" - " - using fade-in: %s" - " - using volume normalization: %s", - queue_item.name, - streamdetails.uri, - allow_buffer, - streamdetails.fade_in, - streamdetails.volume_normalization_mode, - ) - if allow_buffer: - media_stream_gen = get_buffered_media_stream( - self.mass, - streamdetails=streamdetails, - pcm_format=pcm_format, - seek_position=int(seek_position), - filter_params=filter_params, - ) - else: - media_stream_gen = get_media_stream( - self.mass, - streamdetails=streamdetails, - pcm_format=pcm_format, - seek_position=int(seek_position), - filter_params=filter_params, - ) - - first_chunk_received = False - fade_in_buffer = b"" - bytes_received = 0 - finished = False - stream_started_at = asyncio.get_event_loop().time() - try: - async for chunk in media_stream_gen: - bytes_received += len(chunk) - if not first_chunk_received: - first_chunk_received = True - self.logger.debug( - "First audio chunk received for %s (%s) after %.2f seconds", - queue_item.name, - streamdetails.uri, - asyncio.get_event_loop().time() - stream_started_at, - ) - # handle optional fade-in - if streamdetails.fade_in: - if len(fade_in_buffer) < pcm_format.pcm_sample_size * 4: - fade_in_buffer += chunk - elif fade_in_buffer: - async for fade_chunk in get_ffmpeg_stream( - # NOTE: get_ffmpeg_stream signature says str | AsyncGenerator - # but FFMpeg class actually accepts bytes too. This works at - # runtime but needs type: ignore for mypy. - audio_input=fade_in_buffer + chunk, # type: ignore[arg-type] - input_format=pcm_format, - output_format=pcm_format, - filter_params=["afade=type=in:start_time=0:duration=3"], - ): - yield fade_chunk - fade_in_buffer = b"" - streamdetails.fade_in = False - else: - yield chunk - # help garbage collection by explicitly deleting chunk - del chunk - finished = True - except AudioError as err: - streamdetails.stream_error = True - queue_item.available = False - if raise_on_error: - raise - # yes, we swallow the error here after logging it - # so the outer stream can handle it gracefully - self.logger.error( - "AudioError while streaming queue item %s (%s): %s", - queue_item.name, - streamdetails.uri, - err, - ) - finally: - # determine how many seconds we've streamed - # for pcm output we can calculate this easily - seconds_streamed = bytes_received / pcm_format.pcm_sample_size - streamdetails.seconds_streamed = seconds_streamed - self.logger.debug( - "stream %s for %s in %.2f seconds - seconds streamed/buffered: %.2f", - "aborted" if not finished else "finished", - streamdetails.uri, - asyncio.get_event_loop().time() - stream_started_at, - seconds_streamed, - ) - # report stream to provider - if (finished or seconds_streamed >= 90) and ( - music_prov := self.mass.get_provider(streamdetails.provider) - ): - if TYPE_CHECKING: # avoid circular import - assert isinstance(music_prov, MusicProvider) - self.mass.create_task(music_prov.on_streamed(streamdetails)) - - @use_buffer(buffer_size=30, min_buffer_before_yield=2) - async def get_queue_item_stream_with_smartfade( - self, - queue_item: QueueItem, - pcm_format: AudioFormat, - smart_fades_mode: SmartFadesMode = SmartFadesMode.SMART_CROSSFADE, - standard_crossfade_duration: int = 10, - ) -> AsyncGenerator[bytes, None]: - """Get the audio stream for a single queue item with (smart) crossfade to the next item.""" - queue = self.mass.player_queues.get(queue_item.queue_id) - if not queue: - raise RuntimeError(f"Queue {queue_item.queue_id} not found") - - streamdetails = queue_item.streamdetails - assert streamdetails - crossfade_data = self._crossfade_data.pop(queue.queue_id, None) - - if crossfade_data and streamdetails.seek_position > 0: - # don't do crossfade when seeking into track - crossfade_data = None - if crossfade_data and (crossfade_data.queue_item_id != queue_item.queue_item_id): - # edge case alert: the next item changed just while we were preloading/crossfading - self.logger.warning( - "Skipping crossfade data for queue %s - next item changed!", queue.display_name - ) - crossfade_data = None - - self.logger.debug( - "Start Streaming queue track: %s (%s) for queue %s " - "- crossfade mode: %s " - "- crossfading from previous track: %s ", - queue_item.streamdetails.uri if queue_item.streamdetails else "Unknown URI", - queue_item.name, - queue.display_name, - smart_fades_mode, - "true" if crossfade_data else "false", - ) - - buffer = b"" - bytes_written = 0 - # calculate crossfade buffer size - crossfade_buffer_duration = ( - SMART_CROSSFADE_DURATION - if smart_fades_mode == SmartFadesMode.SMART_CROSSFADE - else standard_crossfade_duration - ) - crossfade_buffer_duration = min( - crossfade_buffer_duration, - int(streamdetails.duration / 2) - if streamdetails.duration - else crossfade_buffer_duration, - ) - # Ensure crossfade buffer size is aligned to frame boundaries - # Frame size = bytes_per_sample * channels - bytes_per_sample = pcm_format.bit_depth // 8 - frame_size = bytes_per_sample * pcm_format.channels - crossfade_buffer_size = int(pcm_format.pcm_sample_size * crossfade_buffer_duration) - # Round down to nearest frame boundary - crossfade_buffer_size = (crossfade_buffer_size // frame_size) * frame_size - fade_out_data: bytes | None = None - - if crossfade_data: - # Calculate discard amount in seconds (format-independent) - # Use fade_in_pcm_format because fade_in_size is in the next track's original format - fade_in_duration_seconds = ( - crossfade_data.fade_in_size / crossfade_data.fade_in_pcm_format.pcm_sample_size - ) - discard_seconds = int(fade_in_duration_seconds) - 1 - # Calculate discard amounts in CURRENT track's format - discard_bytes = int(discard_seconds * pcm_format.pcm_sample_size) - # Convert fade_in_size to current track's format for correct leftover calculation - fade_in_size_in_current_format = int( - fade_in_duration_seconds * pcm_format.pcm_sample_size - ) - discard_leftover = fade_in_size_in_current_format - discard_bytes - else: - discard_seconds = streamdetails.seek_position - discard_leftover = 0 - total_chunks_received = 0 - req_buffer_size = crossfade_buffer_size - async for chunk in self.get_queue_item_stream( - queue_item, pcm_format, seek_position=discard_seconds - ): - total_chunks_received += 1 - if discard_leftover: - # discard leftover bytes from crossfade data - chunk = chunk[discard_leftover:] # noqa: PLW2901 - discard_leftover = 0 - - if total_chunks_received < 10: - # we want a stream to start as quickly as possible - # so for the first 10 chunks we keep a very short buffer - req_buffer_size = pcm_format.pcm_sample_size - else: - req_buffer_size = crossfade_buffer_size - - # ALWAYS APPEND CHUNK TO BUFFER - buffer += chunk - del chunk - if len(buffer) < req_buffer_size: - # buffer is not full enough, move on - continue - - #### HANDLE CROSSFADE DATA FROM PREVIOUS TRACK - if crossfade_data: - # send the (second half of the) crossfade data - if crossfade_data.pcm_format != pcm_format: - # edge case: pcm format mismatch, we need to resample - self.logger.debug( - "Resampling crossfade data from %s to %s for queue %s", - crossfade_data.pcm_format.sample_rate, - pcm_format.sample_rate, - queue.display_name, - ) - resampled_data = await resample_pcm_audio( - crossfade_data.data, - crossfade_data.pcm_format, - pcm_format, - ) - if resampled_data: - for _chunk in divide_chunks(resampled_data, pcm_format.pcm_sample_size): - yield _chunk - bytes_written += len(resampled_data) - else: - # Resampling failed, error already logged in resample_pcm_audio - # Skip crossfade data entirely - stream continues without it - self.logger.warning( - "Skipping crossfade data for queue %s due to resampling failure", - queue.display_name, - ) - else: - for _chunk in divide_chunks(crossfade_data.data, pcm_format.pcm_sample_size): - yield _chunk - bytes_written += len(crossfade_data.data) - # clear vars - crossfade_data = None - - #### OTHER: enough data in buffer, feed to output - while len(buffer) > req_buffer_size: - yield buffer[: pcm_format.pcm_sample_size] - bytes_written += pcm_format.pcm_sample_size - buffer = buffer[pcm_format.pcm_sample_size :] - - #### HANDLE END OF TRACK - - if crossfade_data: - # edge case: we did not get enough data to send the crossfade data - # send the (second half of the) crossfade data - if crossfade_data.pcm_format != pcm_format: - # (yet another) edge case: pcm format mismatch, we need to resample - self.logger.debug( - "Resampling remaining crossfade data from %s to %s for queue %s", - crossfade_data.pcm_format.sample_rate, - pcm_format.sample_rate, - queue.display_name, - ) - resampled_crossfade_data = await resample_pcm_audio( - crossfade_data.data, - crossfade_data.pcm_format, - pcm_format, - ) - if resampled_crossfade_data: - crossfade_data.data = resampled_crossfade_data - else: - # Resampling failed, error already logged in resample_pcm_audio - # Skip the crossfade data entirely - self.logger.warning( - "Skipping remaining crossfade data for queue %s due to resampling failure", - queue.display_name, - ) - crossfade_data = None - if crossfade_data: - for _chunk in divide_chunks(crossfade_data.data, pcm_format.pcm_sample_size): - yield _chunk - bytes_written += len(crossfade_data.data) - crossfade_data = None - next_queue_item: QueueItem | None = None - if not self._crossfade_allowed( - queue_item, smart_fades_mode=smart_fades_mode, flow_mode=False - ): - # no crossfade enabled/allowed, just yield the buffer last part - bytes_written += len(buffer) - for _chunk in divide_chunks(buffer, pcm_format.pcm_sample_size): - yield _chunk - else: - # if crossfade is enabled, save fadeout part in buffer to pickup for next track - fade_out_data = buffer - buffer = b"" - # get next track for crossfade - self.logger.debug( - "Preloading NEXT track for crossfade for queue %s", - queue.display_name, - ) - try: - next_queue_item = await self.mass.player_queues.load_next_queue_item( - queue.queue_id, queue_item.queue_item_id - ) - # set index_in_buffer to prevent our next track is overwritten while preloading - if next_queue_item.streamdetails is None: - raise InvalidDataError( - f"No streamdetails for next queue item {next_queue_item.queue_item_id}" - ) - queue.index_in_buffer = self.mass.player_queues.index_by_id( - queue.queue_id, next_queue_item.queue_item_id - ) - next_queue_item_pcm_format = AudioFormat( - content_type=INTERNAL_PCM_FORMAT.content_type, - bit_depth=INTERNAL_PCM_FORMAT.bit_depth, - sample_rate=next_queue_item.streamdetails.audio_format.sample_rate, - channels=next_queue_item.streamdetails.audio_format.channels, - ) - async for chunk in self.get_queue_item_stream( - next_queue_item, next_queue_item_pcm_format - ): - # append to buffer until we reach crossfade size - # we only need the first X seconds of the NEXT track so we can - # perform the crossfade. - # the crossfaded audio of the previous and next track will be - # sent in two equal parts: first half now, second half - # when the next track starts. We use CrossfadeData to store - # the second half to be picked up by the next track's stream generator. - # Note that we more or less expect the user to have enabled the in-memory - # buffer so we can keep the next track's audio data in memory. - buffer += chunk - del chunk - if len(buffer) >= crossfade_buffer_size: - break - #### HANDLE CROSSFADE OF PREVIOUS TRACK AND NEW TRACK - # Store original buffer size before any resampling for fade_in_size calculation - # This size is in the next track's original format which is what we need - original_buffer_size = len(buffer) - if next_queue_item_pcm_format != pcm_format: - # edge case: pcm format mismatch, we need to resample the next track's - # beginning part before crossfading - self.logger.debug( - "Resampling next track's crossfade from %s to %s for queue %s", - next_queue_item_pcm_format.sample_rate, - pcm_format.sample_rate, - queue.display_name, - ) - resampled_buffer = await resample_pcm_audio( - buffer, - next_queue_item_pcm_format, - pcm_format, - ) - if resampled_buffer: - buffer = resampled_buffer - else: - # Resampling failed, error already logged in resample_pcm_audio - # Cannot crossfade safely - yield fade_out_data and raise error - self.logger.error( - "Failed to resample next track for crossfade in queue %s - " - "skipping crossfade", - queue.display_name, - ) - yield fade_out_data - bytes_written += len(fade_out_data) - raise AudioError("Failed to resample next track for crossfade") - try: - crossfade_bytes = await self._smart_fades_mixer.mix( - fade_in_part=buffer, - fade_out_part=fade_out_data, - fade_in_streamdetails=next_queue_item.streamdetails, - fade_out_streamdetails=streamdetails, - pcm_format=pcm_format, - standard_crossfade_duration=standard_crossfade_duration, - mode=smart_fades_mode, - ) - # send half of the crossfade_part (= approx the fadeout part) - split_point = (len(crossfade_bytes) + 1) // 2 - crossfade_first = crossfade_bytes[:split_point] - crossfade_second = crossfade_bytes[split_point:] - del crossfade_bytes - bytes_written += len(crossfade_first) - for _chunk in divide_chunks(crossfade_first, pcm_format.pcm_sample_size): - yield _chunk - # store the other half for the next track - # IMPORTANT: crossfade_second data is in CURRENT track's format (pcm_format) - # because it was created from the resampled buffer used for mixing. - # BUT fade_in_size represents bytes in NEXT track's original format - # (next_queue_item_pcm_format) because that's how much of the next track - # was consumed during the crossfade. We need both formats to correctly - # handle the crossfade data when the next track starts. - self._crossfade_data[queue_item.queue_id] = CrossfadeData( - data=crossfade_second, - fade_in_size=original_buffer_size, - pcm_format=pcm_format, # Format of the data (current track) - fade_in_pcm_format=next_queue_item_pcm_format, # Format for fade_in_size - queue_item_id=next_queue_item.queue_item_id, - ) - except Exception as err: - self.logger.error( - "Failed to create crossfade for queue %s: %s - " - "falling back to no crossfade", - queue.display_name, - err, - ) - # Fallback: just yield the fade_out_data without crossfade - yield fade_out_data - bytes_written += len(fade_out_data) - next_queue_item = None - except (QueueEmpty, AudioError): - # end of queue reached, next item skipped or crossfade failed - # no crossfade possible, just yield the fade_out_data - next_queue_item = None - yield fade_out_data - bytes_written += len(fade_out_data) - del fade_out_data - # make sure the buffer gets cleaned up - del buffer - # update duration details based on the actual pcm data we sent - # this also accounts for crossfade and silence stripping - seconds_streamed = bytes_written / pcm_format.pcm_sample_size - streamdetails.seconds_streamed = seconds_streamed - streamdetails.duration = int(streamdetails.seek_position + seconds_streamed) - self.logger.debug( - "Finished Streaming queue track: %s (%s) on queue %s " - "- crossfade data prepared for next track: %s", - streamdetails.uri, - queue_item.name, - queue.display_name, - next_queue_item.name if next_queue_item else "N/A", - ) - - def _log_request(self, request: web.Request) -> None: - """Log request.""" - if self.logger.isEnabledFor(VERBOSE_LOG_LEVEL): - self.logger.log( - VERBOSE_LOG_LEVEL, - "Got %s request to %s from %s\nheaders: %s\n", - request.method, - request.path, - request.remote, - request.headers, - ) - else: - self.logger.debug( - "Got %s request to %s from %s", - request.method, - request.path, - request.remote, - ) - - async def get_output_format( - self, - output_format_str: str, - player: Player, - content_sample_rate: int, - content_bit_depth: int, - ) -> AudioFormat: - """Parse (player specific) output format details for given format string.""" - content_type: ContentType = ContentType.try_parse(output_format_str) - supported_rates_conf = cast( - "list[tuple[str, str]]", - await self.mass.config.get_player_config_value( - player.player_id, CONF_SAMPLE_RATES, unpack_splitted_values=True - ), - ) - output_channels_str = self.mass.config.get_raw_player_config_value( - player.player_id, CONF_OUTPUT_CHANNELS, "stereo" - ) - supported_sample_rates = tuple(int(x[0]) for x in supported_rates_conf) - supported_bit_depths = tuple(int(x[1]) for x in supported_rates_conf) - - player_max_bit_depth = max(supported_bit_depths) - output_bit_depth = min(content_bit_depth, player_max_bit_depth) - if content_sample_rate in supported_sample_rates: - output_sample_rate = content_sample_rate - else: - output_sample_rate = max(supported_sample_rates) - - if not content_type.is_lossless(): - # no point in having a higher bit depth for lossy formats - output_bit_depth = 16 - output_sample_rate = min(48000, output_sample_rate) - if content_type == ContentType.WAV and output_bit_depth > 16: - # WAV 24bit is not widely supported, fallback to 16bit - output_bit_depth = 16 - if output_format_str == "pcm": - content_type = ContentType.from_bit_depth(output_bit_depth) - return AudioFormat( - content_type=content_type, - sample_rate=output_sample_rate, - bit_depth=output_bit_depth, - channels=1 if output_channels_str != "stereo" else 2, - ) - - async def _select_flow_format( - self, - player: Player, - ) -> AudioFormat: - """Parse (player specific) flow stream PCM format.""" - supported_rates_conf = cast( - "list[tuple[str, str]]", - await self.mass.config.get_player_config_value( - player.player_id, CONF_SAMPLE_RATES, unpack_splitted_values=True - ), - ) - supported_sample_rates = tuple(int(x[0]) for x in supported_rates_conf) - output_sample_rate = INTERNAL_PCM_FORMAT.sample_rate - for sample_rate in (192000, 96000, 48000, 44100): - if sample_rate in supported_sample_rates: - output_sample_rate = sample_rate - break - return AudioFormat( - content_type=INTERNAL_PCM_FORMAT.content_type, - sample_rate=output_sample_rate, - bit_depth=INTERNAL_PCM_FORMAT.bit_depth, - channels=2, - ) - - def _crossfade_allowed( - self, queue_item: QueueItem, smart_fades_mode: SmartFadesMode, flow_mode: bool = False - ) -> bool: - """Get the crossfade config for a queue item.""" - if smart_fades_mode == SmartFadesMode.DISABLED: - return False - if not (queue_player := self.mass.players.get(queue_item.queue_id)): - return False # just a guard - if queue_item.media_type != MediaType.TRACK: - self.logger.debug("Skipping crossfade: current item is not a track") - return False - # check if the next item is part of the same album - next_item = self.mass.player_queues.get_next_item( - queue_item.queue_id, queue_item.queue_item_id - ) - if not next_item: - # there is no next item! - return False - # check if next item is a track - if next_item.media_type != MediaType.TRACK: - self.logger.debug("Skipping crossfade: next item is not a track") - return False - if ( - isinstance(queue_item.media_item, Track) - and isinstance(next_item.media_item, Track) - and queue_item.media_item.album - and next_item.media_item.album - and queue_item.media_item.album == next_item.media_item.album - and not self.mass.config.get_raw_core_config_value( - self.domain, CONF_ALLOW_CROSSFADE_SAME_ALBUM, False - ) - ): - # in general, crossfade is not desired for tracks of the same (gapless) album - # because we have no accurate way to determine if the album is gapless or not, - # for now we just never crossfade between tracks of the same album - self.logger.debug("Skipping crossfade: next item is part of the same album") - return False - - # check if next item sample rate matches - if ( - not flow_mode - and next_item.streamdetails - and queue_item.streamdetails - and next_item.streamdetails.audio_format - and queue_item.streamdetails.audio_format - and ( - queue_item.streamdetails.audio_format.sample_rate - != next_item.streamdetails.audio_format.sample_rate - ) - and (queue_player := self.mass.players.get(queue_item.queue_id)) - and not ( - PlayerFeature.GAPLESS_DIFFERENT_SAMPLERATE in queue_player.supported_features - or self.mass.config.get_raw_player_config_value( - queue_player.player_id, - CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES.key, - CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES.default_value, - ) - ) - ): - self.logger.debug("Skipping crossfade: sample rate mismatch") - return False - return True - - async def _periodic_garbage_collection(self) -> None: - """Periodic garbage collection to free up memory from audio buffers and streams.""" - self.logger.log( - VERBOSE_LOG_LEVEL, - "Running periodic garbage collection...", - ) - # Run garbage collection in executor to avoid blocking the event loop - # Since this runs periodically (not in response to subprocess cleanup), - # it's safe to run in a thread without causing thread-safety issues - loop = asyncio.get_running_loop() - collected = await loop.run_in_executor(None, gc.collect) - self.logger.log( - VERBOSE_LOG_LEVEL, - "Garbage collection completed, collected %d objects", - collected, - ) - # Schedule next run in 15 minutes - self.mass.call_later(900, self._periodic_garbage_collection) diff --git a/music_assistant/controllers/streams/__init__.py b/music_assistant/controllers/streams/__init__.py new file mode 100644 index 00000000..7ca53fcc --- /dev/null +++ b/music_assistant/controllers/streams/__init__.py @@ -0,0 +1,12 @@ +""" +MusicAssistant StreamsController. + +Handles all logic related to audio streams, such as creating and managing +streams for playback by players as well as fades between songs. +""" + +from __future__ import annotations + +from .streams_controller import StreamsController + +__all__ = ["StreamsController"] diff --git a/music_assistant/controllers/streams/smart_fades/__init__.py b/music_assistant/controllers/streams/smart_fades/__init__.py new file mode 100644 index 00000000..b234507f --- /dev/null +++ b/music_assistant/controllers/streams/smart_fades/__init__.py @@ -0,0 +1,8 @@ +"""Smart Fades - Audio analyzer and mixer.""" + +from __future__ import annotations + +from .analyzer import SmartFadesAnalyzer +from .mixer import SmartFadesMixer + +__all__ = ["SmartFadesAnalyzer", "SmartFadesMixer"] diff --git a/music_assistant/controllers/streams/smart_fades/analyzer.py b/music_assistant/controllers/streams/smart_fades/analyzer.py new file mode 100644 index 00000000..19bf7a8f --- /dev/null +++ b/music_assistant/controllers/streams/smart_fades/analyzer.py @@ -0,0 +1,257 @@ +"""Smart Fades Analyzer - Performs audio analysis for smart fades.""" + +from __future__ import annotations + +import asyncio +import time +import warnings +from typing import TYPE_CHECKING + +import librosa +import numpy as np +import numpy.typing as npt + +from music_assistant.constants import VERBOSE_LOG_LEVEL +from music_assistant.helpers.audio import ( + align_audio_to_frame_boundary, +) +from music_assistant.models.smart_fades import ( + SmartFadesAnalysis, + SmartFadesAnalysisFragment, +) + +if TYPE_CHECKING: + from music_assistant_models.media_items import AudioFormat + + from music_assistant.controllers.streams.streams_controller import StreamsController + +ANALYSIS_FPS = 100 + + +class SmartFadesAnalyzer: + """Smart fades analyzer that performs audio analysis.""" + + def __init__(self, streams: StreamsController) -> None: + """Initialize smart fades analyzer.""" + self.streams = streams + self.logger = streams.logger.getChild("smart_fades_analyzer") + + async def analyze( + self, + item_id: str, + provider_instance_id_or_domain: str, + fragment: SmartFadesAnalysisFragment, + audio_data: bytes, + pcm_format: AudioFormat, + ) -> SmartFadesAnalysis | None: + """Analyze a track's beats for BPM matching smart fade.""" + stream_details_name = f"{provider_instance_id_or_domain}://{item_id}" + start_time = time.perf_counter() + self.logger.log( + VERBOSE_LOG_LEVEL, + "Starting %s beat analysis for track : %s", + fragment.name, + stream_details_name, + ) + + # Validate input audio data is frame-aligned + audio_data = align_audio_to_frame_boundary(audio_data, pcm_format) + + fragment_duration = len(audio_data) / (pcm_format.pcm_sample_size) + try: + self.logger.log( + VERBOSE_LOG_LEVEL, + "Audio data: %.2fs, %d bytes", + fragment_duration, + len(audio_data), + ) + # Convert PCM bytes to numpy array and then to mono for analysis + audio_array = np.frombuffer(audio_data, dtype=np.float32) + if pcm_format.channels > 1: + # Ensure array size is divisible by channel count + samples_per_channel = len(audio_array) // pcm_format.channels + valid_samples = samples_per_channel * pcm_format.channels + if valid_samples != len(audio_array): + self.logger.warning( + "Audio buffer size (%d) not divisible by channels (%d), " + "truncating %d samples", + len(audio_array), + pcm_format.channels, + len(audio_array) - valid_samples, + ) + audio_array = audio_array[:valid_samples] + + # Reshape to separate channels and take average for mono conversion + audio_array = audio_array.reshape(-1, pcm_format.channels) + mono_audio = np.asarray(np.mean(audio_array, axis=1, dtype=np.float32)) + else: + # Single channel - ensure consistent array type + mono_audio = np.asarray(audio_array, dtype=np.float32) + + # Validate that the audio is finite (no NaN or Inf values) + if not np.all(np.isfinite(mono_audio)): + self.logger.error( + "Audio buffer contains non-finite values (NaN/Inf) for %s, cannot analyze", + stream_details_name, + ) + return None + + analysis = await self._analyze_track_beats(mono_audio, fragment, pcm_format.sample_rate) + + total_time = time.perf_counter() - start_time + if not analysis: + self.logger.debug( + "No analysis results found after analyzing audio for: %s (took %.2fs).", + stream_details_name, + total_time, + ) + return None + self.logger.debug( + "Smart fades %s analysis completed for %s: BPM=%.1f, %d beats, " + "%d downbeats, confidence=%.2f (took %.2fs)", + fragment.name, + stream_details_name, + analysis.bpm, + len(analysis.beats), + len(analysis.downbeats), + analysis.confidence, + total_time, + ) + self.streams.mass.create_task( + self.streams.mass.music.set_smart_fades_analysis( + item_id, provider_instance_id_or_domain, analysis + ) + ) + return analysis + except Exception as e: + total_time = time.perf_counter() - start_time + self.logger.exception( + "Beat analysis error for %s: %s (took %.2fs)", + stream_details_name, + e, + total_time, + ) + return None + + def _librosa_beat_analysis( + self, + audio_array: npt.NDArray[np.float32], + fragment: SmartFadesAnalysisFragment, + sample_rate: int, + ) -> SmartFadesAnalysis | None: + """Perform beat analysis using librosa.""" + try: + # Suppress librosa UserWarnings about empty mel filters + # These warnings are harmless and occur with certain audio characteristics + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="Empty filters detected in mel frequency basis", + category=UserWarning, + ) + tempo, beats_array = librosa.beat.beat_track( + y=audio_array, + sr=sample_rate, + units="time", + ) + # librosa returns np.float64 arrays when units="time" + + if len(beats_array) < 2: + self.logger.warning("Insufficient beats detected: %d", len(beats_array)) + return None + + bpm = float(tempo.item()) if hasattr(tempo, "item") else float(tempo) + + # Calculate confidence based on consistency of intervals + if len(beats_array) > 2: + intervals = np.diff(beats_array) + interval_std = np.std(intervals) + interval_mean = np.mean(intervals) + # Lower coefficient of variation = higher confidence + cv = interval_std / interval_mean if interval_mean > 0 else 1.0 + confidence = max(0.1, 1.0 - cv) + else: + confidence = 0.5 # Low confidence with few beats + + downbeats = self._estimate_musical_downbeats(beats_array, bpm) + + # Store complete fragment analysis + fragment_duration = len(audio_array) / sample_rate + + return SmartFadesAnalysis( + fragment=fragment, + bpm=float(bpm), + beats=beats_array, + downbeats=downbeats, + confidence=float(confidence), + duration=fragment_duration, + ) + + except Exception as e: + self.logger.exception("Librosa beat analysis failed: %s", e) + return None + + def _estimate_musical_downbeats( + self, beats_array: npt.NDArray[np.float64], bpm: float + ) -> npt.NDArray[np.float64]: + """Estimate downbeats using musical logic and beat consistency.""" + if len(beats_array) < 4: + return beats_array[:1] if len(beats_array) > 0 else np.array([]) + + # Calculate expected beat interval from BPM + expected_beat_interval = 60.0 / bpm + + # Look for the most likely starting downbeat by analyzing beat intervals + # In 4/4 time, downbeats should be every 4 beats + best_offset = 0 + best_consistency = 0.0 + + # Try different starting offsets (0, 1, 2, 3) to find most consistent downbeat pattern + for offset in range(min(4, len(beats_array))): + downbeat_candidates = beats_array[offset::4] + + if len(downbeat_candidates) < 2: + continue + + # Calculate consistency score based on interval regularity + intervals = np.diff(downbeat_candidates) + expected_downbeat_interval = 4 * expected_beat_interval + + # Score based on how close intervals are to expected 4-beat interval + interval_errors = ( + np.abs(intervals - expected_downbeat_interval) / expected_downbeat_interval + ) + consistency = 1.0 - np.mean(interval_errors) + + if consistency > best_consistency: + best_consistency = float(consistency) + best_offset = offset + + # Use the best offset to generate final downbeats + downbeats = beats_array[best_offset::4] + + self.logger.log( + VERBOSE_LOG_LEVEL, + "Downbeat estimation: offset=%d, consistency=%.2f, %d downbeats from %d beats", + best_offset, + best_consistency, + len(downbeats), + len(beats_array), + ) + + return downbeats + + async def _analyze_track_beats( + self, + audio_data: npt.NDArray[np.float32], + fragment: SmartFadesAnalysisFragment, + sample_rate: int, + ) -> SmartFadesAnalysis | None: + """Analyze track for beat tracking using librosa.""" + try: + return await asyncio.to_thread( + self._librosa_beat_analysis, audio_data, fragment, sample_rate + ) + except Exception as e: + self.logger.exception("Beat tracking analysis failed: %s", e) + return None diff --git a/music_assistant/controllers/streams/smart_fades/fades.py b/music_assistant/controllers/streams/smart_fades/fades.py new file mode 100644 index 00000000..9714c346 --- /dev/null +++ b/music_assistant/controllers/streams/smart_fades/fades.py @@ -0,0 +1,598 @@ +"""Smart Fades - Audio fade implementations.""" + +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +import aiofiles +import numpy as np +import numpy.typing as npt +import shortuuid + +from music_assistant.constants import VERBOSE_LOG_LEVEL +from music_assistant.controllers.streams.smart_fades.filters import ( + CrossfadeFilter, + Filter, + FrequencySweepFilter, + TimeStretchFilter, + TrimFilter, +) +from music_assistant.helpers.process import communicate +from music_assistant.helpers.util import remove_file +from music_assistant.models.smart_fades import ( + SmartFadesAnalysis, +) + +if TYPE_CHECKING: + from music_assistant_models.media_items import AudioFormat + +SMART_CROSSFADE_DURATION = 45 + + +class SmartFade(ABC): + """Abstract base class for Smart Fades.""" + + filters: list[Filter] + + def __init__(self, logger: logging.Logger) -> None: + """Initialize SmartFade base class.""" + self.filters = [] + self.logger = logger + + @abstractmethod + def _build(self) -> None: + """Build the smart fades filter chain.""" + ... + + def _get_ffmpeg_filters( + self, + input_fadein_label: str = "[1]", + input_fadeout_label: str = "[0]", + ) -> list[str]: + """Get FFmpeg filters for smart fades.""" + if not self.filters: + self._build() + filters = [] + _cur_fadein_label = input_fadein_label + _cur_fadeout_label = input_fadeout_label + for audio_filter in self.filters: + filter_strings = audio_filter.apply(_cur_fadein_label, _cur_fadeout_label) + filters.extend(filter_strings) + _cur_fadein_label = f"[{audio_filter.output_fadein_label}]" + _cur_fadeout_label = f"[{audio_filter.output_fadeout_label}]" + return filters + + async def apply( + self, + fade_out_part: bytes, + fade_in_part: bytes, + pcm_format: AudioFormat, + ) -> bytes: + """Apply the smart fade to the given PCM audio parts.""" + # Write the fade_out_part to a temporary file + fadeout_filename = f"/tmp/{shortuuid.random(20)}.pcm" # noqa: S108 + async with aiofiles.open(fadeout_filename, "wb") as outfile: + await outfile.write(fade_out_part) + args = [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + # Input 1: fadeout part (as file) + "-acodec", + pcm_format.content_type.name.lower(), # e.g., "pcm_f32le" not just "f32le" + "-ac", + str(pcm_format.channels), + "-ar", + str(pcm_format.sample_rate), + "-channel_layout", + "mono" if pcm_format.channels == 1 else "stereo", + "-f", + pcm_format.content_type.value, + "-i", + fadeout_filename, + # Input 2: fade_in part (stdin) + "-acodec", + pcm_format.content_type.name.lower(), + "-ac", + str(pcm_format.channels), + "-ar", + str(pcm_format.sample_rate), + "-channel_layout", + "mono" if pcm_format.channels == 1 else "stereo", + "-f", + pcm_format.content_type.value, + "-i", + "-", + ] + smart_fade_filters = self._get_ffmpeg_filters() + self.logger.debug( + "Applying smartfade: %s", + self, + ) + args.extend( + [ + "-filter_complex", + ";".join(smart_fade_filters), + # Output format specification - must match input codec format + "-acodec", + pcm_format.content_type.name.lower(), + "-ac", + str(pcm_format.channels), + "-ar", + str(pcm_format.sample_rate), + "-channel_layout", + "mono" if pcm_format.channels == 1 else "stereo", + "-f", + pcm_format.content_type.value, + "-", + ] + ) + self.logger.log(VERBOSE_LOG_LEVEL, "FFmpeg command args: %s", " ".join(args)) + + # Execute the enhanced smart fade with full buffer + _, raw_crossfade_output, stderr = await communicate(args, fade_in_part) + await remove_file(fadeout_filename) + + if raw_crossfade_output: + return raw_crossfade_output + else: + stderr_msg = stderr.decode() if stderr else "(no stderr output)" + raise RuntimeError(f"Smart crossfade failed. FFmpeg stderr: {stderr_msg}") + + def __repr__(self) -> str: + """Return string representation of SmartFade showing the filter chain.""" + if not self.filters: + return f"<{self.__class__.__name__}: 0 filters>" + + chain = " → ".join(repr(f) for f in self.filters) + return f"<{self.__class__.__name__}: {len(self.filters)} filters> {chain}" + + +class SmartCrossFade(SmartFade): + """Smart fades class that implements a Smart Fade mode.""" + + # Only apply time stretching if BPM difference is < this % + time_stretch_bpm_percentage_threshold: float = 5.0 + + def __init__( + self, + logger: logging.Logger, + fade_out_analysis: SmartFadesAnalysis, + fade_in_analysis: SmartFadesAnalysis, + ) -> None: + """Initialize SmartFades with analysis data. + + Args: + fade_out_analysis: Analysis data for the outgoing track + fade_in_analysis: Analysis data for the incoming track + logger: Optional logger for debug output + """ + self.fade_out_analysis = fade_out_analysis + self.fade_in_analysis = fade_in_analysis + super().__init__(logger) + + def _build(self) -> None: + """Build the smart fades filter chain.""" + # Calculate tempo factor for time stretching + bpm_ratio = self.fade_in_analysis.bpm / self.fade_out_analysis.bpm + bpm_diff_percent = abs(1.0 - bpm_ratio) * 100 + + # Extrapolate downbeats for better bar calculation + self.extrapolated_fadeout_downbeats = extrapolate_downbeats( + self.fade_out_analysis.downbeats, + tempo_factor=1.0, + bpm=self.fade_out_analysis.bpm, + ) + + # Calculate optimal crossfade bars that fit in available buffer + crossfade_bars = self._calculate_optimal_crossfade_bars() + + # Calculate beat positions for the selected bar count + fadein_start_pos = self._calculate_optimal_fade_timing(crossfade_bars) + + # Calculate initial crossfade duration (may be adjusted later for downbeat alignment) + crossfade_duration = self._calculate_crossfade_duration(crossfade_bars=crossfade_bars) + + # Add time stretch filter if needed + if ( + 0.1 < bpm_diff_percent <= self.time_stretch_bpm_percentage_threshold + and crossfade_bars > 4 + ): + self.filters.append(TimeStretchFilter(logger=self.logger, stretch_ratio=bpm_ratio)) + # Re-extrapolate downbeats with actual tempo factor for time-stretched audio + self.extrapolated_fadeout_downbeats = extrapolate_downbeats( + self.fade_out_analysis.downbeats, + tempo_factor=bpm_ratio, + bpm=self.fade_out_analysis.bpm, + ) + + # Check if we would have enough audio after beat alignment for the crossfade + if fadein_start_pos and fadein_start_pos + crossfade_duration <= SMART_CROSSFADE_DURATION: + self.filters.append(TrimFilter(logger=self.logger, fadein_start_pos=fadein_start_pos)) + else: + self.logger.log( + VERBOSE_LOG_LEVEL, + "Skipping beat alignment: not enough audio after trim (%.1fs + %.1fs > %.1fs)", + fadein_start_pos, + crossfade_duration, + SMART_CROSSFADE_DURATION, + ) + + # Adjust crossfade duration to align with outgoing track's downbeats + crossfade_duration = self._adjust_crossfade_to_downbeats( + crossfade_duration=crossfade_duration, + fadein_start_pos=fadein_start_pos, + ) + + # 90 BPM -> 1500Hz, 140 BPM -> 2500Hz + avg_bpm = (self.fade_out_analysis.bpm + self.fade_in_analysis.bpm) / 2 + crossover_freq = int(np.clip(1500 + (avg_bpm - 90) * 20, 1500, 2500)) + + # Adjust for BPM mismatch + if abs(bpm_ratio - 1.0) > 0.3: + crossover_freq = int(crossover_freq * 0.85) + + # For shorter fades, use exp/exp curves to avoid abruptness + if crossfade_bars < 8: + fadeout_curve = "exponential" + fadein_curve = "exponential" + # For long fades, use log/linear curves + else: + # Use logarithmic curve to give the next track more space + fadeout_curve = "logarithmic" + # Use linear curve for transition, predictable and not too abrupt + fadein_curve = "linear" + + # Create lowpass filter on the outgoing track (unfiltered → low-pass) + # Extended lowpass effect to gradually remove bass frequencies + fadeout_eq_duration = min(max(crossfade_duration * 2.5, 8.0), SMART_CROSSFADE_DURATION) + # The crossfade always happens at the END of the buffer + fadeout_eq_start = max(0, SMART_CROSSFADE_DURATION - fadeout_eq_duration) + fadeout_sweep = FrequencySweepFilter( + logger=self.logger, + sweep_type="lowpass", + target_freq=crossover_freq, + duration=fadeout_eq_duration, + start_time=fadeout_eq_start, + sweep_direction="fade_in", + poles=1, + curve_type=fadeout_curve, + stream_type="fadeout", + ) + self.filters.append(fadeout_sweep) + + # Create high pass filter on the incoming track (high-pass → unfiltered) + # Quicker highpass removal to avoid lingering vocals after crossfade + fadein_eq_duration = crossfade_duration / 1.5 + fadein_sweep = FrequencySweepFilter( + logger=self.logger, + sweep_type="highpass", + target_freq=crossover_freq, + duration=fadein_eq_duration, + start_time=0, + sweep_direction="fade_out", + poles=1, + curve_type=fadein_curve, + stream_type="fadein", + ) + self.filters.append(fadein_sweep) + + # Add final crossfade filter + crossfade_filter = CrossfadeFilter( + logger=self.logger, crossfade_duration=crossfade_duration + ) + self.filters.append(crossfade_filter) + + def _calculate_crossfade_duration(self, crossfade_bars: int) -> float: + """Calculate final crossfade duration based on musical bars and BPM.""" + # Calculate crossfade duration based on incoming track's BPM + beats_per_bar = 4 + seconds_per_beat = 60.0 / self.fade_in_analysis.bpm + musical_duration = crossfade_bars * beats_per_bar * seconds_per_beat + + # Apply buffer constraint + actual_duration = min(musical_duration, SMART_CROSSFADE_DURATION) + + # Log if we had to constrain the duration + if musical_duration > SMART_CROSSFADE_DURATION: + self.logger.log( + VERBOSE_LOG_LEVEL, + "Constraining crossfade duration from %.1fs to %.1fs (buffer limit)", + musical_duration, + actual_duration, + ) + + return actual_duration + + def _calculate_optimal_crossfade_bars(self) -> int: + """Calculate optimal crossfade bars that fit in available buffer.""" + bpm_in = self.fade_in_analysis.bpm + bpm_out = self.fade_out_analysis.bpm + bpm_diff_percent = abs(1.0 - bpm_in / bpm_out) * 100 + + # Calculate ideal bars based on BPM compatibility + ideal_bars = 10 if bpm_diff_percent <= self.time_stretch_bpm_percentage_threshold else 6 + + # Reduce bars until it fits in the fadein buffer + for bars in [ideal_bars, 8, 6, 4, 2, 1]: + if bars > ideal_bars: + continue + + fadein_start_pos = self._calculate_optimal_fade_timing(bars) + if fadein_start_pos is None: + continue + + # Calculate what the duration would be + test_duration = self._calculate_crossfade_duration(crossfade_bars=bars) + + # Check if it fits in fadein buffer + fadein_buffer = SMART_CROSSFADE_DURATION - fadein_start_pos + if test_duration <= fadein_buffer: + if bars < ideal_bars: + self.logger.log( + VERBOSE_LOG_LEVEL, + "Reduced crossfade from %d to %d bars (fadein buffer=%.1fs, needed=%.1fs)", + ideal_bars, + bars, + fadein_buffer, + test_duration, + ) + return bars + + # Fall back to 1 bar if nothing else fits + return 1 + + def _calculate_optimal_fade_timing(self, crossfade_bars: int) -> float | None: + """Calculate beat positions for alignment.""" + beats_per_bar = 4 + + def calculate_beat_positions( + fade_out_beats: npt.NDArray[np.float64], + fade_in_beats: npt.NDArray[np.float64], + num_beats: int, + ) -> float | None: + """Calculate start positions from beat arrays.""" + if len(fade_out_beats) < num_beats or len(fade_in_beats) < num_beats: + return None + + fade_in_slice = fade_in_beats[:num_beats] + return float(fade_in_slice[0]) + + # Try downbeats first for most musical timing + downbeat_positions = calculate_beat_positions( + self.extrapolated_fadeout_downbeats, self.fade_in_analysis.downbeats, crossfade_bars + ) + if downbeat_positions: + return downbeat_positions + + # Try regular beats if downbeats insufficient + required_beats = crossfade_bars * beats_per_bar + beat_positions = calculate_beat_positions( + self.fade_out_analysis.beats, self.fade_in_analysis.beats, required_beats + ) + if beat_positions: + return beat_positions + + # Fallback: No beat alignment possible + self.logger.log(VERBOSE_LOG_LEVEL, "No beat alignment possible (insufficient beats)") + return None + + def _adjust_crossfade_to_downbeats( + self, + crossfade_duration: float, + fadein_start_pos: float | None, + ) -> float: + """Adjust crossfade duration to align with outgoing track's downbeats.""" + # If we don't have downbeats or beat alignment is disabled, return original duration + if len(self.extrapolated_fadeout_downbeats) == 0 or fadein_start_pos is None: + return crossfade_duration + + # Calculate where the crossfade would start in the buffer + ideal_start_pos = SMART_CROSSFADE_DURATION - crossfade_duration + + # Debug logging + self.logger.log( + VERBOSE_LOG_LEVEL, + "Downbeat adjustment - ideal_start=%.2fs (buffer=%.1fs - crossfade=%.2fs), " + "fadein_start=%.2fs", + ideal_start_pos, + SMART_CROSSFADE_DURATION, + crossfade_duration, + fadein_start_pos, + ) + + # Find the closest downbeats (earlier and later) + earlier_downbeat = None + later_downbeat = None + + for downbeat in self.extrapolated_fadeout_downbeats: + if downbeat <= ideal_start_pos: + earlier_downbeat = downbeat + elif downbeat > ideal_start_pos and later_downbeat is None: + later_downbeat = downbeat + break + + # Try earlier downbeat first (longer crossfade) + if earlier_downbeat is not None: + adjusted_duration = float(SMART_CROSSFADE_DURATION - earlier_downbeat) + if fadein_start_pos + adjusted_duration <= SMART_CROSSFADE_DURATION: + if abs(adjusted_duration - crossfade_duration) > 0.1: + self.logger.log( + VERBOSE_LOG_LEVEL, + "Adjusted crossfade duration from %.2fs to %.2fs to align with " + "downbeat at %.2fs (earlier)", + crossfade_duration, + adjusted_duration, + earlier_downbeat, + ) + return adjusted_duration + + # Try later downbeat (shorter crossfade) + if later_downbeat is not None: + adjusted_duration = float(SMART_CROSSFADE_DURATION - later_downbeat) + if fadein_start_pos + adjusted_duration <= SMART_CROSSFADE_DURATION: + if abs(adjusted_duration - crossfade_duration) > 0.1: + self.logger.log( + VERBOSE_LOG_LEVEL, + "Adjusted crossfade duration from %.2fs to %.2fs to align with " + "downbeat at %.2fs (later)", + crossfade_duration, + adjusted_duration, + later_downbeat, + ) + return adjusted_duration + + # If no suitable downbeat found, return original duration + self.logger.log( + VERBOSE_LOG_LEVEL, + "Could not adjust crossfade duration to downbeats, using original %.2fs", + crossfade_duration, + ) + return crossfade_duration + + +class StandardCrossFade(SmartFade): + """Standard crossfade class that implements a standard crossfade mode.""" + + def __init__(self, logger: logging.Logger, crossfade_duration: float = 10.0) -> None: + """Initialize StandardCrossFade with crossfade duration.""" + self.crossfade_duration = crossfade_duration + super().__init__(logger) + + def _build(self) -> None: + """Build the standard crossfade filter chain.""" + self.filters = [ + CrossfadeFilter(logger=self.logger, crossfade_duration=self.crossfade_duration), + ] + + async def apply( + self, fade_out_part: bytes, fade_in_part: bytes, pcm_format: AudioFormat + ) -> bytes: + """Apply the standard crossfade to the given PCM audio parts.""" + # We need to override the default apply here, since standard crossfade only needs to be + # applied to the overlapping parts, not the full buffers. + crossfade_size = int(pcm_format.pcm_sample_size * self.crossfade_duration) + # Pre-crossfade: outgoing track minus the crossfaded portion + pre_crossfade = fade_out_part[:-crossfade_size] + # Post-crossfade: incoming track minus the crossfaded portion + post_crossfade = fade_in_part[crossfade_size:] + # Adjust portions to exact crossfade size + adjusted_fade_in_part = fade_in_part[:crossfade_size] + adjusted_fade_out_part = fade_out_part[-crossfade_size:] + # Adjust the duration to match actual sizes + self.crossfade_duration = min( + len(adjusted_fade_in_part) / pcm_format.pcm_sample_size, + len(adjusted_fade_out_part) / pcm_format.pcm_sample_size, + ) + # Crossfaded portion: user's configured duration + crossfaded_section = await super().apply( + adjusted_fade_out_part, adjusted_fade_in_part, pcm_format + ) + # Full result: everything concatenated + return pre_crossfade + crossfaded_section + post_crossfade + + +# HELPER METHODS +def get_bpm_diff_percentage(bpm1: float, bpm2: float) -> float: + """Calculate BPM difference percentage between two BPM values.""" + return abs(1.0 - bpm1 / bpm2) * 100 + + +def extrapolate_downbeats( + downbeats: npt.NDArray[np.float64], + tempo_factor: float, + buffer_size: float = SMART_CROSSFADE_DURATION, + bpm: float | None = None, +) -> npt.NDArray[np.float64]: + """Extrapolate downbeats based on actual intervals when detection is incomplete. + + This is needed when we want to perform beat alignment in an 'atmospheric' outro + that does not have any detected downbeats. + + Args: + downbeats: Array of detected downbeat positions in seconds + tempo_factor: Tempo adjustment factor for time stretching + buffer_size: Maximum buffer size in seconds + bpm: Optional BPM for validation when extrapolating with only 2 downbeats + """ + # Handle case with exactly 2 downbeats (with BPM validation) + if len(downbeats) == 2 and bpm is not None: + interval = float(downbeats[1] - downbeats[0]) + + # Expected interval for this BPM (assuming 4/4 time signature) + expected_interval = (60.0 / bpm) * 4 + + # Only extrapolate if interval matches BPM within 15% tolerance + if abs(interval - expected_interval) / expected_interval < 0.15: + # Adjust detected downbeats for time stretching first + adjusted_downbeats = downbeats / tempo_factor + last_downbeat = adjusted_downbeats[-1] + + # If the last downbeat is close to the buffer end, no extrapolation needed + if last_downbeat >= buffer_size - 5: + return adjusted_downbeats + + # Adjust the interval for time stretching + adjusted_interval = interval / tempo_factor + + # Extrapolate forward from last adjusted downbeat using adjusted interval + extrapolated = [] + current_pos = last_downbeat + adjusted_interval + max_extrapolation_distance = 125.0 # Don't extrapolate more than 25s + + while ( + current_pos < buffer_size + and (current_pos - last_downbeat) <= max_extrapolation_distance + ): + extrapolated.append(current_pos) + current_pos += adjusted_interval + + if extrapolated: + # Combine adjusted detected downbeats and extrapolated downbeats + return np.concatenate([adjusted_downbeats, np.array(extrapolated)]) + + return adjusted_downbeats + # else: interval doesn't match BPM, fall through to return original + + if len(downbeats) < 2: + # Need at least 2 downbeats to extrapolate + return downbeats / tempo_factor + + # Adjust detected downbeats for time stretching first + adjusted_downbeats = downbeats / tempo_factor + last_downbeat = adjusted_downbeats[-1] + + # If the last downbeat is close to the buffer end, no extrapolation needed + if last_downbeat >= buffer_size - 5: + return adjusted_downbeats + + # Calculate intervals from ORIGINAL downbeats (before time stretching) + intervals = np.diff(downbeats) + median_interval = float(np.median(intervals)) + std_interval = float(np.std(intervals)) + + # Only extrapolate if intervals are consistent (low standard deviation) + if std_interval > 0.2: + return adjusted_downbeats + + # Adjust the interval for time stretching + # When slowing down (tempo_factor < 1.0), intervals get longer + adjusted_interval = median_interval / tempo_factor + + # Extrapolate forward from last adjusted downbeat using adjusted interval + extrapolated = [] + current_pos = last_downbeat + adjusted_interval + max_extrapolation_distance = 25.0 # Don't extrapolate more than 25s + + while current_pos < buffer_size and (current_pos - last_downbeat) <= max_extrapolation_distance: + extrapolated.append(current_pos) + current_pos += adjusted_interval + + if extrapolated: + # Combine adjusted detected downbeats and extrapolated downbeats + return np.concatenate([adjusted_downbeats, np.array(extrapolated)]) + + return adjusted_downbeats diff --git a/music_assistant/controllers/streams/smart_fades/filters.py b/music_assistant/controllers/streams/smart_fades/filters.py new file mode 100644 index 00000000..14e810a2 --- /dev/null +++ b/music_assistant/controllers/streams/smart_fades/filters.py @@ -0,0 +1,222 @@ +"""Smart Fades - Audio filter implementations.""" + +import logging +from abc import ABC, abstractmethod + + +class Filter(ABC): + """Abstract base class for audio filters.""" + + output_fadeout_label: str + output_fadein_label: str + + def __init__(self, logger: logging.Logger) -> None: + """Initialize filter base class.""" + self.logger = logger + + @abstractmethod + def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: + """Apply the filter and return the FFmpeg filter strings.""" + + +class TimeStretchFilter(Filter): + """Filter that applies time stretching to match BPM using rubberband.""" + + output_fadeout_label: str = "fadeout_stretched" + output_fadein_label: str = "fadein_unchanged" + + def __init__( + self, + logger: logging.Logger, + stretch_ratio: float, + ): + """Initialize time stretch filter.""" + self.stretch_ratio = stretch_ratio + super().__init__(logger) + + def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: + """Create FFmpeg filters to gradually adjust tempo from original BPM to target BPM.""" + return [ + f"{input_fadeout_label}rubberband=tempo={self.stretch_ratio:.6f}:transients=mixed:detector=soft:pitchq=quality" + f"[{self.output_fadeout_label}]", + f"{input_fadein_label}anull[{self.output_fadein_label}]", # codespell:ignore anull + ] + + def __repr__(self) -> str: + """Return string representation of TimeStretchFilter.""" + return f"TimeStretch(ratio={self.stretch_ratio:.2f})" + + +class TrimFilter(Filter): + """Filter that trims incoming track to align with downbeats.""" + + output_fadeout_label: str = "fadeout_beatalign" + output_fadein_label: str = "fadein_beatalign" + + def __init__(self, logger: logging.Logger, fadein_start_pos: float): + """Initialize beat align filter. + + Args: + fadein_start_pos: Position in seconds to trim the incoming track to + """ + self.fadein_start_pos = fadein_start_pos + super().__init__(logger) + + def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: + """Trim the incoming track to align with downbeats.""" + return [ + f"{input_fadeout_label}anull[{self.output_fadeout_label}]", # codespell:ignore anull + f"{input_fadein_label}atrim=start={self.fadein_start_pos},asetpts=PTS-STARTPTS[{self.output_fadein_label}]", + ] + + def __repr__(self) -> str: + """Return string representation of TrimFilter.""" + return f"Trim(trim={self.fadein_start_pos:.2f}s)" + + +class FrequencySweepFilter(Filter): + """Filter that creates frequency sweep effects (lowpass/highpass transitions).""" + + output_fadeout_label: str = "frequency_sweep" + output_fadein_label: str = "frequency_sweep" + + def __init__( + self, + logger: logging.Logger, + sweep_type: str, + target_freq: int, + duration: float, + start_time: float, + sweep_direction: str, + poles: int, + curve_type: str, + stream_type: str = "fadeout", + ): + """Initialize frequency sweep filter. + + Args: + sweep_type: 'lowpass' or 'highpass' + target_freq: Target frequency for the filter + duration: Duration of the sweep in seconds + start_time: When to start the sweep + sweep_direction: 'fade_in' (unfiltered->filtered) or 'fade_out' (filtered->unfiltered) + poles: Number of poles for the filter + curve_type: 'linear', 'exponential', or 'logarithmic' + stream_type: 'fadeout' or 'fadein' - which stream to process + """ + self.sweep_type = sweep_type + self.target_freq = target_freq + self.duration = duration + self.start_time = start_time + self.sweep_direction = sweep_direction + self.poles = poles + self.curve_type = curve_type + self.stream_type = stream_type + + # Set output labels based on stream type + if stream_type == "fadeout": + self.output_fadeout_label = f"fadeout_{sweep_type}" + self.output_fadein_label = "fadein_passthrough" + else: + self.output_fadeout_label = "fadeout_passthrough" + self.output_fadein_label = f"fadein_{sweep_type}" + + super().__init__(logger) + + def _generate_volume_expr(self, start: float, dur: float, direction: str, curve: str) -> str: + t_expr = f"t-{start}" # Time relative to start + norm_t = f"min(max({t_expr},0),{dur})/{dur}" # Normalized 0-1 + + if curve == "exponential": + # Exponential curve for smoother transitions + if direction == "up": + return f"'pow({norm_t},2)':eval=frame" + else: + return f"'1-pow({norm_t},2)':eval=frame" + elif curve == "logarithmic": + # Logarithmic curve for more aggressive initial change + if direction == "up": + return f"'sqrt({norm_t})':eval=frame" + else: + return f"'1-sqrt({norm_t})':eval=frame" + elif direction == "up": + return f"'{norm_t}':eval=frame" + else: + return f"'1-{norm_t}':eval=frame" + + def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: + """Generate FFmpeg filters for frequency sweep effect.""" + # Select the correct input based on stream type + if self.stream_type == "fadeout": + input_label = input_fadeout_label + output_label = self.output_fadeout_label + passthrough_label = self.output_fadein_label + passthrough_input = input_fadein_label + else: + input_label = input_fadein_label + output_label = self.output_fadein_label + passthrough_label = self.output_fadeout_label + passthrough_input = input_fadeout_label + + orig_label = f"{output_label}_orig" + filter_label = f"{output_label}_to{self.sweep_type[:2]}" + filtered_label = f"{output_label}_filtered" + orig_faded_label = f"{output_label}_orig_faded" + filtered_faded_label = f"{output_label}_filtered_faded" + + # Determine volume ramp directions based on sweep direction + if self.sweep_direction == "fade_in": + # Fade from dry to wet (unfiltered to filtered) + orig_direction = "down" + filter_direction = "up" + else: # fade_out + # Fade from wet to dry (filtered to unfiltered) + orig_direction = "up" + filter_direction = "down" + + # Build filter chain + orig_volume_expr = self._generate_volume_expr( + self.start_time, self.duration, orig_direction, self.curve_type + ) + filtered_volume_expr = self._generate_volume_expr( + self.start_time, self.duration, filter_direction, self.curve_type + ) + + return [ + # Pass through the other stream unchanged + f"{passthrough_input}anull[{passthrough_label}]", # codespell:ignore anull + # Split input into two paths + f"{input_label}asplit=2[{orig_label}][{filter_label}]", + # Apply frequency filter to one path + f"[{filter_label}]{self.sweep_type}=f={self.target_freq}:poles={self.poles}[{filtered_label}]", + # Apply time-varying volume to original path + f"[{orig_label}]volume={orig_volume_expr}[{orig_faded_label}]", + # Apply time-varying volume to filtered path + f"[{filtered_label}]volume={filtered_volume_expr}[{filtered_faded_label}]", + # Mix the two paths together + f"[{orig_faded_label}][{filtered_faded_label}]amix=inputs=2:duration=longest:normalize=0[{output_label}]", + ] + + def __repr__(self) -> str: + """Return string representation of FrequencySweepFilter.""" + return f"FreqSweep({self.sweep_type}@{self.target_freq}Hz)" + + +class CrossfadeFilter(Filter): + """Filter that applies the final crossfade between fadeout and fadein streams.""" + + output_fadeout_label: str = "crossfade" + output_fadein_label: str = "crossfade" + + def __init__(self, logger: logging.Logger, crossfade_duration: float): + """Initialize crossfade filter.""" + self.crossfade_duration = crossfade_duration + super().__init__(logger) + + def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: + """Apply the acrossfade filter.""" + return [f"{input_fadeout_label}{input_fadein_label}acrossfade=d={self.crossfade_duration}"] + + def __repr__(self) -> str: + """Return string representation of CrossfadeFilter.""" + return f"Crossfade(d={self.crossfade_duration:.1f}s)" diff --git a/music_assistant/controllers/streams/smart_fades/mixer.py b/music_assistant/controllers/streams/smart_fades/mixer.py new file mode 100644 index 00000000..240fb523 --- /dev/null +++ b/music_assistant/controllers/streams/smart_fades/mixer.py @@ -0,0 +1,147 @@ +"""Smart Fades Mixer - Mixes audio tracks using smart fades.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from music_assistant.controllers.streams.smart_fades.fades import ( + SmartCrossFade, + SmartFade, + StandardCrossFade, +) +from music_assistant.helpers.audio import ( + align_audio_to_frame_boundary, + strip_silence, +) +from music_assistant.models.smart_fades import ( + SmartFadesAnalysis, + SmartFadesAnalysisFragment, + SmartFadesMode, +) + +if TYPE_CHECKING: + from music_assistant_models.media_items import AudioFormat + from music_assistant_models.streamdetails import StreamDetails + + from music_assistant.controllers.streams.streams_controller import StreamsController + + +class SmartFadesMixer: + """Smart fades mixer class that mixes tracks based on analysis data.""" + + def __init__(self, streams: StreamsController) -> None: + """Initialize smart fades mixer.""" + self.streams = streams + self.logger = streams.logger.getChild("smart_fades_mixer") + + async def mix( + self, + fade_in_part: bytes, + fade_out_part: bytes, + fade_in_streamdetails: StreamDetails, + fade_out_streamdetails: StreamDetails, + pcm_format: AudioFormat, + standard_crossfade_duration: int = 10, + mode: SmartFadesMode = SmartFadesMode.SMART_CROSSFADE, + ) -> bytes: + """Apply crossfade with internal state management and smart/standard fallback logic.""" + if mode == SmartFadesMode.DISABLED: + # No crossfade, just concatenate + # Note that this should not happen since we check this before calling mix() + # but just to be sure... + return fade_out_part + fade_in_part + + # strip silence from end of audio of fade_out_part + fade_out_part = await strip_silence( + self.streams.mass, + fade_out_part, + pcm_format=pcm_format, + reverse=True, + ) + # Ensure frame alignment after silence stripping + fade_out_part = align_audio_to_frame_boundary(fade_out_part, pcm_format) + + # strip silence from begin of audio of fade_in_part + fade_in_part = await strip_silence( + self.streams.mass, + fade_in_part, + pcm_format=pcm_format, + reverse=False, + ) + # Ensure frame alignment after silence stripping + fade_in_part = align_audio_to_frame_boundary(fade_in_part, pcm_format) + if mode == SmartFadesMode.STANDARD_CROSSFADE: + smart_fade: SmartFade = StandardCrossFade( + logger=self.logger, + crossfade_duration=standard_crossfade_duration, + ) + return await smart_fade.apply( + fade_out_part, + fade_in_part, + pcm_format, + ) + # Attempt smart crossfade with analysis data + fade_out_analysis: SmartFadesAnalysis | None + if stored_analysis := await self.streams.mass.music.get_smart_fades_analysis( + fade_out_streamdetails.item_id, + fade_out_streamdetails.provider, + SmartFadesAnalysisFragment.OUTRO, + ): + fade_out_analysis = stored_analysis + else: + fade_out_analysis = await self.streams.mass.streams.smart_fades_analyzer.analyze( + fade_out_streamdetails.item_id, + fade_out_streamdetails.provider, + SmartFadesAnalysisFragment.OUTRO, + fade_out_part, + pcm_format, + ) + + fade_in_analysis: SmartFadesAnalysis | None + if stored_analysis := await self.streams.mass.music.get_smart_fades_analysis( + fade_in_streamdetails.item_id, + fade_in_streamdetails.provider, + SmartFadesAnalysisFragment.INTRO, + ): + fade_in_analysis = stored_analysis + else: + fade_in_analysis = await self.streams.mass.streams.smart_fades_analyzer.analyze( + fade_in_streamdetails.item_id, + fade_in_streamdetails.provider, + SmartFadesAnalysisFragment.INTRO, + fade_in_part, + pcm_format, + ) + if ( + fade_out_analysis + and fade_in_analysis + and fade_out_analysis.confidence > 0.3 + and fade_in_analysis.confidence > 0.3 + and mode == SmartFadesMode.SMART_CROSSFADE + ): + try: + smart_fade = SmartCrossFade( + logger=self.logger, + fade_out_analysis=fade_out_analysis, + fade_in_analysis=fade_in_analysis, + ) + return await smart_fade.apply( + fade_out_part, + fade_in_part, + pcm_format, + ) + except Exception as e: + self.logger.warning( + "Smart crossfade failed: %s, falling back to standard crossfade", e + ) + + # Always fallback to Standard Crossfade in case something goes wrong + smart_fade = StandardCrossFade( + logger=self.logger, + crossfade_duration=standard_crossfade_duration, + ) + return await smart_fade.apply( + fade_out_part, + fade_in_part, + pcm_format, + ) diff --git a/music_assistant/controllers/streams/streams_controller.py b/music_assistant/controllers/streams/streams_controller.py new file mode 100644 index 00000000..8805788d --- /dev/null +++ b/music_assistant/controllers/streams/streams_controller.py @@ -0,0 +1,1950 @@ +""" +Controller to stream audio to players. + +The streams controller hosts a basic, unprotected HTTP-only webserver +purely to stream audio packets to players and some control endpoints such as +the upnp callbacks and json rpc api for slimproto clients. +""" + +from __future__ import annotations + +import asyncio +import gc +import logging +import os +import urllib.parse +from collections.abc import AsyncGenerator +from dataclasses import dataclass +from typing import TYPE_CHECKING, Final, cast + +from aiofiles.os import wrap +from aiohttp import web +from music_assistant_models.config_entries import ConfigEntry, ConfigValueOption, ConfigValueType +from music_assistant_models.enums import ( + ConfigEntryType, + ContentType, + MediaType, + PlayerFeature, + StreamType, + VolumeNormalizationMode, +) +from music_assistant_models.errors import ( + AudioError, + InvalidDataError, + ProviderUnavailableError, + QueueEmpty, +) +from music_assistant_models.media_items import AudioFormat, Track +from music_assistant_models.player_queue import PlayLogEntry + +from music_assistant.constants import ( + ANNOUNCE_ALERT_FILE, + CONF_BIND_IP, + CONF_BIND_PORT, + CONF_CROSSFADE_DURATION, + CONF_ENTRY_ENABLE_ICY_METADATA, + CONF_ENTRY_LOG_LEVEL, + CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES, + CONF_HTTP_PROFILE, + CONF_OUTPUT_CHANNELS, + CONF_OUTPUT_CODEC, + CONF_PUBLISH_IP, + CONF_SAMPLE_RATES, + CONF_SMART_FADES_MODE, + CONF_VOLUME_NORMALIZATION_FIXED_GAIN_RADIO, + CONF_VOLUME_NORMALIZATION_FIXED_GAIN_TRACKS, + CONF_VOLUME_NORMALIZATION_RADIO, + CONF_VOLUME_NORMALIZATION_TRACKS, + DEFAULT_STREAM_HEADERS, + ICY_HEADERS, + INTERNAL_PCM_FORMAT, + SILENCE_FILE, + VERBOSE_LOG_LEVEL, +) +from music_assistant.controllers.players.player_controller import AnnounceData +from music_assistant.controllers.streams.smart_fades import ( + SmartFadesMixer, +) +from music_assistant.controllers.streams.smart_fades.analyzer import SmartFadesAnalyzer +from music_assistant.controllers.streams.smart_fades.fades import SMART_CROSSFADE_DURATION +from music_assistant.helpers.audio import LOGGER as AUDIO_LOGGER +from music_assistant.helpers.audio import ( + get_buffered_media_stream, + get_chunksize, + get_media_stream, + get_player_filter_params, + get_stream_details, + resample_pcm_audio, +) +from music_assistant.helpers.buffered_generator import buffered, use_buffer +from music_assistant.helpers.ffmpeg import LOGGER as FFMPEG_LOGGER +from music_assistant.helpers.ffmpeg import check_ffmpeg_version, get_ffmpeg_stream +from music_assistant.helpers.util import ( + divide_chunks, + get_ip_addresses, + get_total_system_memory, + select_free_port, +) +from music_assistant.helpers.webserver import Webserver +from music_assistant.models.core_controller import CoreController +from music_assistant.models.music_provider import MusicProvider +from music_assistant.models.plugin import PluginProvider, PluginSource +from music_assistant.models.smart_fades import SmartFadesMode +from music_assistant.providers.universal_group.constants import UGP_PREFIX +from music_assistant.providers.universal_group.player import UniversalGroupPlayer + +if TYPE_CHECKING: + from music_assistant_models.config_entries import CoreConfig + from music_assistant_models.player import PlayerMedia + from music_assistant_models.player_queue import PlayerQueue + from music_assistant_models.queue_item import QueueItem + from music_assistant_models.streamdetails import StreamDetails + + from music_assistant.mass import MusicAssistant + from music_assistant.models.player import Player + + +isfile = wrap(os.path.isfile) + +CONF_ALLOW_BUFFER: Final[str] = "allow_buffering" +CONF_ALLOW_CROSSFADE_SAME_ALBUM: Final[str] = "allow_crossfade_same_album" +CONF_SMART_FADES_LOG_LEVEL: Final[str] = "smart_fades_log_level" + +# Calculate total system memory once at module load time +TOTAL_SYSTEM_MEMORY_GB: Final[float] = get_total_system_memory() +CONF_ALLOW_BUFFER_DEFAULT = TOTAL_SYSTEM_MEMORY_GB >= 8.0 + + +def parse_pcm_info(content_type: str) -> tuple[int, int, int]: + """Parse PCM info from a codec/content_type string.""" + params = ( + dict(urllib.parse.parse_qsl(content_type.replace(";", "&"))) if ";" in content_type else {} + ) + sample_rate = int(params.get("rate", 44100)) + sample_size = int(params.get("bitrate", 16)) + channels = int(params.get("channels", 2)) + return (sample_rate, sample_size, channels) + + +@dataclass +class CrossfadeData: + """Data class to hold crossfade data.""" + + data: bytes + fade_in_size: int + pcm_format: AudioFormat # Format of the 'data' bytes (current/previous track's format) + fade_in_pcm_format: AudioFormat # Format for 'fade_in_size' (next track's format) + queue_item_id: str + + +class StreamsController(CoreController): + """Webserver Controller to stream audio to players.""" + + domain: str = "streams" + + def __init__(self, mass: MusicAssistant) -> None: + """Initialize instance.""" + super().__init__(mass) + self._server = Webserver(self.logger, enable_dynamic_routes=True) + self.register_dynamic_route = self._server.register_dynamic_route + self.unregister_dynamic_route = self._server.unregister_dynamic_route + self.manifest.name = "Streamserver" + self.manifest.description = ( + "Music Assistant's core controller that is responsible for " + "streaming audio to players on the local network." + ) + self.manifest.icon = "cast-audio" + self.announcements: dict[str, AnnounceData] = {} + self._crossfade_data: dict[str, CrossfadeData] = {} + self._bind_ip: str = "0.0.0.0" + self._smart_fades_mixer = SmartFadesMixer(self) + self._smart_fades_analyzer = SmartFadesAnalyzer(self) + + @property + def base_url(self) -> str: + """Return the base_url for the streamserver.""" + return self._server.base_url + + @property + def bind_ip(self) -> str: + """Return the IP address this streamserver is bound to.""" + return self._bind_ip + + @property + def smart_fades_mixer(self) -> SmartFadesMixer: + """Return the SmartFadesMixer instance.""" + return self._smart_fades_mixer + + @property + def smart_fades_analyzer(self) -> SmartFadesAnalyzer: + """Return the SmartFadesAnalyzer instance.""" + return self._smart_fades_analyzer + + async def get_config_entries( + self, + action: str | None = None, + values: dict[str, ConfigValueType] | None = None, + ) -> tuple[ConfigEntry, ...]: + """Return all Config Entries for this core module (if any).""" + ip_addresses = await get_ip_addresses() + default_port = await select_free_port(8097, 9200) + return ( + ConfigEntry( + key=CONF_PUBLISH_IP, + type=ConfigEntryType.STRING, + default_value=ip_addresses[0], + label="Published IP address", + description="This IP address is communicated to players where to find this server." + "\nMake sure that this IP can be reached by players on the local network, " + "otherwise audio streaming will not work.", + required=False, + ), + ConfigEntry( + key=CONF_BIND_PORT, + type=ConfigEntryType.INTEGER, + default_value=default_port, + label="TCP Port", + description="The TCP port to run the server. " + "Make sure that this server can be reached " + "on the given IP and TCP port by players on the local network.", + ), + ConfigEntry( + key=CONF_ALLOW_BUFFER, + type=ConfigEntryType.BOOLEAN, + default_value=CONF_ALLOW_BUFFER_DEFAULT, + label="Allow (in-memory) buffering of (track) audio", + description="By default, Music Assistant tries to be as resource " + "efficient as possible when streaming audio, especially considering " + "low-end devices such as Raspberry Pi's. This means that audio " + "buffering is disabled by default to reduce memory usage. \n\n" + "Enabling this option allows for in-memory buffering of audio, " + "which (massively) improves playback (and seeking) performance but it comes " + "at the cost of increased memory usage. " + "If you run Music Assistant on a capable device with enough memory, " + "enabling this option is strongly recommended.", + required=False, + category="audio", + ), + ConfigEntry( + key=CONF_VOLUME_NORMALIZATION_RADIO, + type=ConfigEntryType.STRING, + default_value=VolumeNormalizationMode.FALLBACK_FIXED_GAIN, + label="Volume normalization method for radio streams", + options=[ + ConfigValueOption(x.value.replace("_", " ").title(), x.value) + for x in VolumeNormalizationMode + ], + category="audio", + ), + ConfigEntry( + key=CONF_VOLUME_NORMALIZATION_TRACKS, + type=ConfigEntryType.STRING, + default_value=VolumeNormalizationMode.FALLBACK_DYNAMIC, + label="Volume normalization method for tracks", + options=[ + ConfigValueOption(x.value.replace("_", " ").title(), x.value) + for x in VolumeNormalizationMode + ], + category="audio", + ), + ConfigEntry( + key=CONF_VOLUME_NORMALIZATION_FIXED_GAIN_RADIO, + type=ConfigEntryType.FLOAT, + range=(-20, 10), + default_value=-6, + label="Fixed/fallback gain adjustment for radio streams", + category="audio", + ), + ConfigEntry( + key=CONF_VOLUME_NORMALIZATION_FIXED_GAIN_TRACKS, + type=ConfigEntryType.FLOAT, + range=(-20, 10), + default_value=-6, + label="Fixed/fallback gain adjustment for tracks", + category="audio", + ), + ConfigEntry( + key=CONF_ALLOW_CROSSFADE_SAME_ALBUM, + type=ConfigEntryType.BOOLEAN, + default_value=False, + label="Allow crossfade between tracks from the same album", + description="Enabling this option allows for crossfading between tracks " + "that are part of the same album.", + category="audio", + ), + ConfigEntry( + key=CONF_BIND_IP, + type=ConfigEntryType.STRING, + default_value="0.0.0.0", + options=[ConfigValueOption(x, x) for x in {"0.0.0.0", *ip_addresses}], + label="Bind to IP/interface", + description="Start the stream server on this specific interface. \n" + "Use 0.0.0.0 to bind to all interfaces, which is the default. \n" + "This is an advanced setting that should normally " + "not be adjusted in regular setups.", + category="advanced", + required=False, + ), + ConfigEntry( + key=CONF_SMART_FADES_LOG_LEVEL, + type=ConfigEntryType.STRING, + label="Smart Fades Log level", + description="Log level for the Smart Fades mixer and analyzer.", + options=CONF_ENTRY_LOG_LEVEL.options, + default_value="GLOBAL", + category="advanced", + ), + ) + + async def setup(self, config: CoreConfig) -> None: + """Async initialize of module.""" + # copy log level to audio/ffmpeg loggers + AUDIO_LOGGER.setLevel(self.logger.level) + FFMPEG_LOGGER.setLevel(self.logger.level) + self._setup_smart_fades_logger(config) + # perform check for ffmpeg version + await check_ffmpeg_version() + # start the webserver + self.publish_port = config.get_value(CONF_BIND_PORT) + self.publish_ip = config.get_value(CONF_PUBLISH_IP) + self._bind_ip = bind_ip = str(config.get_value(CONF_BIND_IP)) + # print a big fat message in the log where the streamserver is running + # because this is a common source of issues for people with more complex setups + self.logger.log( + logging.INFO if self.mass.config.onboard_done else logging.WARNING, + "\n\n################################################################################\n" + "Starting streamserver on %s:%s\n" + "This is the IP address that is communicated to players.\n" + "If this is incorrect, audio will not play!\n" + "See the documentation how to configure the publish IP for the Streamserver\n" + "in Settings --> Core modules --> Streamserver\n" + "################################################################################\n", + self.publish_ip, + self.publish_port, + ) + await self._server.setup( + bind_ip=bind_ip, + bind_port=cast("int", self.publish_port), + base_url=f"http://{self.publish_ip}:{self.publish_port}", + static_routes=[ + ( + "*", + "/flow/{session_id}/{queue_id}/{queue_item_id}.{fmt}", + self.serve_queue_flow_stream, + ), + ( + "*", + "/single/{session_id}/{queue_id}/{queue_item_id}.{fmt}", + self.serve_queue_item_stream, + ), + ( + "*", + "/command/{queue_id}/{command}.mp3", + self.serve_command_request, + ), + ( + "*", + "/announcement/{player_id}.{fmt}", + self.serve_announcement_stream, + ), + ( + "*", + "/pluginsource/{plugin_source}/{player_id}.{fmt}", + self.serve_plugin_source_stream, + ), + ], + ) + # Start periodic garbage collection task + # This ensures memory from audio buffers and streams is cleaned up regularly + self.mass.call_later(900, self._periodic_garbage_collection) # 15 minutes + + async def close(self) -> None: + """Cleanup on exit.""" + await self._server.close() + + async def resolve_stream_url( + self, + session_id: str, + queue_item: QueueItem, + flow_mode: bool = False, + player_id: str | None = None, + ) -> str: + """Resolve the stream URL for the given QueueItem.""" + if not player_id: + player_id = queue_item.queue_id + conf_output_codec = await self.mass.config.get_player_config_value( + player_id, CONF_OUTPUT_CODEC, default="flac", return_type=str + ) + output_codec = ContentType.try_parse(conf_output_codec or "flac") + fmt = output_codec.value + # handle raw pcm without exact format specifiers + if output_codec.is_pcm() and ";" not in fmt: + fmt += f";codec=pcm;rate={44100};bitrate={16};channels={2}" + base_path = "flow" if flow_mode else "single" + return f"{self._server.base_url}/{base_path}/{session_id}/{queue_item.queue_id}/{queue_item.queue_item_id}.{fmt}" # noqa: E501 + + async def get_plugin_source_url( + self, + plugin_source: PluginSource, + player_id: str, + ) -> str: + """Get the url for the Plugin Source stream/proxy.""" + if plugin_source.audio_format.content_type.is_pcm(): + fmt = ContentType.WAV.value + else: + fmt = plugin_source.audio_format.content_type.value + return f"{self._server.base_url}/pluginsource/{plugin_source.id}/{player_id}.{fmt}" + + async def serve_queue_item_stream(self, request: web.Request) -> web.StreamResponse: + """Stream single queueitem audio to a player.""" + self._log_request(request) + queue_id = request.match_info["queue_id"] + queue = self.mass.player_queues.get(queue_id) + if not queue: + raise web.HTTPNotFound(reason=f"Unknown Queue: {queue_id}") + session_id = request.match_info["session_id"] + if queue.session_id and session_id != queue.session_id: + raise web.HTTPNotFound(reason=f"Unknown (or invalid) session: {session_id}") + queue_player = self.mass.players.get(queue_id) + queue_item_id = request.match_info["queue_item_id"] + queue_item = self.mass.player_queues.get_item(queue_id, queue_item_id) + if not queue_item: + raise web.HTTPNotFound(reason=f"Unknown Queue item: {queue_item_id}") + if not queue_item.streamdetails: + try: + queue_item.streamdetails = await get_stream_details( + mass=self.mass, queue_item=queue_item + ) + except Exception as e: + self.logger.error( + "Failed to get streamdetails for QueueItem %s: %s", queue_item_id, e + ) + queue_item.available = False + raise web.HTTPNotFound(reason=f"No streamdetails for Queue item: {queue_item_id}") + + # pick output format based on the streamdetails and player capabilities + if not queue_player: + raise web.HTTPNotFound(reason=f"Unknown Player: {queue_id}") + + output_format = await self.get_output_format( + output_format_str=request.match_info["fmt"], + player=queue_player, + content_sample_rate=queue_item.streamdetails.audio_format.sample_rate, + # always use f32 internally for extra headroom for filters etc + content_bit_depth=INTERNAL_PCM_FORMAT.bit_depth, + ) + + # prepare request, add some DLNA/UPNP compatible headers + headers = { + **DEFAULT_STREAM_HEADERS, + "icy-name": queue_item.name, + "contentFeatures.dlna.org": "DLNA.ORG_OP=01;DLNA.ORG_FLAGS=01500000000000000000000000000000", # noqa: E501 + "Accept-Ranges": "none", + "Content-Type": f"audio/{output_format.output_format_str}", + } + resp = web.StreamResponse( + status=200, + reason="OK", + headers=headers, + ) + resp.content_type = f"audio/{output_format.output_format_str}" + http_profile = await self.mass.config.get_player_config_value( + queue_id, CONF_HTTP_PROFILE, default="default", return_type=str + ) + if http_profile == "forced_content_length" and not queue_item.duration: + # just set an insane high content length to make sure the player keeps playing + resp.content_length = get_chunksize(output_format, 12 * 3600) + elif http_profile == "forced_content_length" and queue_item.duration: + # guess content length based on duration + resp.content_length = get_chunksize(output_format, queue_item.duration) + elif http_profile == "chunked": + resp.enable_chunked_encoding() + + await resp.prepare(request) + + # return early if this is not a GET request + if request.method != "GET": + return resp + + if queue_item.media_type != MediaType.TRACK: + # no crossfade on non-tracks + smart_fades_mode = SmartFadesMode.DISABLED + else: + smart_fades_mode = await self.mass.config.get_player_config_value( + queue.queue_id, CONF_SMART_FADES_MODE, return_type=SmartFadesMode + ) + standard_crossfade_duration = self.mass.config.get_raw_player_config_value( + queue.queue_id, CONF_CROSSFADE_DURATION, 10 + ) + if ( + smart_fades_mode != SmartFadesMode.DISABLED + and PlayerFeature.GAPLESS_PLAYBACK not in queue_player.supported_features + ): + # crossfade is not supported on this player due to missing gapless playback + self.logger.warning( + "Crossfade disabled: Player %s does not support gapless playback, " + "consider enabling flow mode to enable crossfade on this player.", + queue_player.display_name if queue_player else "Unknown Player", + ) + smart_fades_mode = SmartFadesMode.DISABLED + + # work out pcm format based on streamdetails + pcm_format = AudioFormat( + sample_rate=queue_item.streamdetails.audio_format.sample_rate, + # always use f32 internally for extra headroom for filters etc + content_type=INTERNAL_PCM_FORMAT.content_type, + bit_depth=INTERNAL_PCM_FORMAT.bit_depth, + channels=queue_item.streamdetails.audio_format.channels, + ) + if smart_fades_mode != SmartFadesMode.DISABLED: + # crossfade is enabled, use special crossfaded single item stream + # where the crossfade of the next track is present in the stream of + # a single track. This only works if the player supports gapless playback! + audio_input = self.get_queue_item_stream_with_smartfade( + queue_item=queue_item, + pcm_format=pcm_format, + smart_fades_mode=smart_fades_mode, + standard_crossfade_duration=standard_crossfade_duration, + ) + else: + # no crossfade, just a regular single item stream + audio_input = self.get_queue_item_stream( + queue_item=queue_item, + pcm_format=pcm_format, + seek_position=queue_item.streamdetails.seek_position, + ) + # stream the audio + # this final ffmpeg process in the chain will convert the raw, lossless PCM audio into + # the desired output format for the player including any player specific filter params + # such as channels mixing, DSP, resampling and, only if needed, encoding to lossy formats + + # readrate filter input args to control buffering + # we need to slowly feed the music to avoid the player stopping and later + # restarting (or completely failing) the audio stream by keeping the buffer short. + # this is reported to be an issue especially with Chromecast players. + # see for example: https://github.com/music-assistant/support/issues/3717 + user_agent = request.headers.get("User-Agent", "") + if queue_item.media_type == MediaType.RADIO: + # keep very short buffer for radio streams + # to keep them (more or less) realtime and prevent time outs + read_rate_input_args = ["-readrate", "1.0", "-readrate_initial_burst", "2"] + elif "Network_Module" in user_agent or "transferMode.dlna.org" in request.headers: + # and ofcourse we have an exception of the exception. Where most players actually NEED + # the readrate filter to avoid disconnecting, some other players (DLNA/MusicCast) + # actually fail when the filter is used. So we disable it completely for those players. + read_rate_input_args = None # disable readrate for DLNA players + else: + # allow buffer ahead of 10 seconds and read 1.5x faster than realtime + read_rate_input_args = ["-readrate", "1.5", "-readrate_initial_burst", "10"] + + first_chunk_received = False + bytes_sent = 0 + async for chunk in get_ffmpeg_stream( + audio_input=audio_input, + input_format=pcm_format, + output_format=output_format, + filter_params=get_player_filter_params( + self.mass, + player_id=queue_player.player_id, + input_format=pcm_format, + output_format=output_format, + ), + extra_input_args=read_rate_input_args, + ): + try: + await resp.write(chunk) + bytes_sent += len(chunk) + if not first_chunk_received: + first_chunk_received = True + # inform the queue that the track is now loaded in the buffer + # so for example the next track can be enqueued + self.mass.player_queues.track_loaded_in_buffer( + queue_item.queue_id, queue_item.queue_item_id + ) + except (BrokenPipeError, ConnectionResetError, ConnectionError) as err: + if first_chunk_received and not queue_player.stop_called: + # Player disconnected (unexpected) after receiving at least some data + # This could indicate buffering issues, network problems, + # or player-specific issues + bytes_expected = get_chunksize(output_format, queue_item.duration or 3600) + self.logger.warning( + "Player %s disconnected prematurely from stream for %s (%s) - " + "error: %s, sent %d bytes, expected (approx) bytes=%d", + queue.display_name, + queue_item.name, + queue_item.uri, + err.__class__.__name__, + bytes_sent, + bytes_expected, + ) + break + if queue_item.streamdetails.stream_error: + self.logger.error( + "Error streaming QueueItem %s (%s) to %s - will try to skip to next item", + queue_item.name, + queue_item.uri, + queue.display_name, + ) + # try to skip to the next item in the queue after a short delay + self.mass.call_later(5, self.mass.player_queues.next(queue_id)) + return resp + + async def serve_queue_flow_stream(self, request: web.Request) -> web.StreamResponse: + """Stream Queue Flow audio to player.""" + self._log_request(request) + queue_id = request.match_info["queue_id"] + queue = self.mass.player_queues.get(queue_id) + if not queue: + raise web.HTTPNotFound(reason=f"Unknown Queue: {queue_id}") + if not (queue_player := self.mass.players.get(queue_id)): + raise web.HTTPNotFound(reason=f"Unknown Player: {queue_id}") + start_queue_item_id = request.match_info["queue_item_id"] + start_queue_item = self.mass.player_queues.get_item(queue_id, start_queue_item_id) + if not start_queue_item: + raise web.HTTPNotFound(reason=f"Unknown Queue item: {start_queue_item_id}") + + # select the highest possible PCM settings for this player + flow_pcm_format = await self._select_flow_format(queue_player) + + # work out output format/details + output_format = await self.get_output_format( + output_format_str=request.match_info["fmt"], + player=queue_player, + content_sample_rate=flow_pcm_format.sample_rate, + content_bit_depth=flow_pcm_format.bit_depth, + ) + # work out ICY metadata support + icy_preference = self.mass.config.get_raw_player_config_value( + queue_id, + CONF_ENTRY_ENABLE_ICY_METADATA.key, + CONF_ENTRY_ENABLE_ICY_METADATA.default_value, + ) + enable_icy = request.headers.get("Icy-MetaData", "") == "1" and icy_preference != "disabled" + icy_meta_interval = 256000 if icy_preference == "full" else 16384 + + # prepare request, add some DLNA/UPNP compatible headers + headers = { + **DEFAULT_STREAM_HEADERS, + **ICY_HEADERS, + "contentFeatures.dlna.org": "DLNA.ORG_OP=01;DLNA.ORG_FLAGS=01700000000000000000000000000000", # noqa: E501 + "Accept-Ranges": "none", + "Content-Type": f"audio/{output_format.output_format_str}", + } + if enable_icy: + headers["icy-metaint"] = str(icy_meta_interval) + + resp = web.StreamResponse( + status=200, + reason="OK", + headers=headers, + ) + http_profile = await self.mass.config.get_player_config_value( + queue_id, CONF_HTTP_PROFILE, default="default", return_type=str + ) + if http_profile == "forced_content_length": + # just set an insane high content length to make sure the player keeps playing + resp.content_length = get_chunksize(output_format, 12 * 3600) + elif http_profile == "chunked": + resp.enable_chunked_encoding() + + await resp.prepare(request) + + # return early if this is not a GET request + if request.method != "GET": + return resp + + # all checks passed, start streaming! + # this final ffmpeg process in the chain will convert the raw, lossless PCM audio into + # the desired output format for the player including any player specific filter params + # such as channels mixing, DSP, resampling and, only if needed, encoding to lossy formats + self.logger.debug("Start serving Queue flow audio stream for %s", queue.display_name) + + async for chunk in get_ffmpeg_stream( + audio_input=self.get_queue_flow_stream( + queue=queue, + start_queue_item=start_queue_item, + pcm_format=flow_pcm_format, + ), + input_format=flow_pcm_format, + output_format=output_format, + filter_params=get_player_filter_params( + self.mass, queue_player.player_id, flow_pcm_format, output_format + ), + # we need to slowly feed the music to avoid the player stopping and later + # restarting (or completely failing) the audio stream by keeping the buffer short. + # this is reported to be an issue especially with Chromecast players. + # see for example: https://github.com/music-assistant/support/issues/3717 + # allow buffer ahead of 8 seconds and read slightly faster than realtime + extra_input_args=["-readrate", "1.01", "-readrate_initial_burst", "8"], + chunk_size=icy_meta_interval if enable_icy else get_chunksize(output_format), + ): + try: + await resp.write(chunk) + except (BrokenPipeError, ConnectionResetError, ConnectionError): + # race condition + break + + if not enable_icy: + continue + + # if icy metadata is enabled, send the icy metadata after the chunk + if ( + # use current item here and not buffered item, otherwise + # the icy metadata will be too much ahead + (current_item := queue.current_item) + and current_item.streamdetails + and current_item.streamdetails.stream_title + ): + title = current_item.streamdetails.stream_title + elif queue and current_item and current_item.name: + title = current_item.name + else: + title = "Music Assistant" + metadata = f"StreamTitle='{title}';".encode() + if icy_preference == "full" and current_item and current_item.image: + metadata += f"StreamURL='{current_item.image.path}'".encode() + while len(metadata) % 16 != 0: + metadata += b"\x00" + length = len(metadata) + length_b = chr(int(length / 16)).encode() + await resp.write(length_b + metadata) + + return resp + + async def serve_command_request(self, request: web.Request) -> web.FileResponse: + """Handle special 'command' request for a player.""" + self._log_request(request) + queue_id = request.match_info["queue_id"] + command = request.match_info["command"] + if command == "next": + self.mass.create_task(self.mass.player_queues.next(queue_id)) + return web.FileResponse(SILENCE_FILE, headers={"icy-name": "Music Assistant"}) + + async def serve_announcement_stream(self, request: web.Request) -> web.StreamResponse: + """Stream announcement audio to a player.""" + self._log_request(request) + player_id = request.match_info["player_id"] + player = self.mass.player_queues.get(player_id) + if not player: + raise web.HTTPNotFound(reason=f"Unknown Player: {player_id}") + if not (announce_data := self.announcements.get(player_id)): + raise web.HTTPNotFound(reason=f"No pending announcements for Player: {player_id}") + + # work out output format/details + fmt = request.match_info["fmt"] + audio_format = AudioFormat(content_type=ContentType.try_parse(fmt)) + + http_profile = await self.mass.config.get_player_config_value( + player_id, CONF_HTTP_PROFILE, default="default", return_type=str + ) + if http_profile == "forced_content_length": + # given the fact that an announcement is just a short audio clip, + # just send it over completely at once so we have a fixed content length + data = b"" + async for chunk in self.get_announcement_stream( + announcement_url=announce_data["announcement_url"], + output_format=audio_format, + pre_announce=announce_data["pre_announce"], + pre_announce_url=announce_data["pre_announce_url"], + ): + data += chunk + return web.Response( + body=data, + content_type=f"audio/{audio_format.output_format_str}", + headers=DEFAULT_STREAM_HEADERS, + ) + + resp = web.StreamResponse( + status=200, + reason="OK", + headers=DEFAULT_STREAM_HEADERS, + ) + resp.content_type = f"audio/{audio_format.output_format_str}" + if http_profile == "chunked": + resp.enable_chunked_encoding() + + await resp.prepare(request) + + # return early if this is not a GET request + if request.method != "GET": + return resp + + # all checks passed, start streaming! + self.logger.debug( + "Start serving audio stream for Announcement %s to %s", + announce_data["announcement_url"], + player.display_name, + ) + async for chunk in self.get_announcement_stream( + announcement_url=announce_data["announcement_url"], + output_format=audio_format, + pre_announce=announce_data["pre_announce"], + pre_announce_url=announce_data["pre_announce_url"], + ): + try: + await resp.write(chunk) + except (BrokenPipeError, ConnectionResetError): + break + + self.logger.debug( + "Finished serving audio stream for Announcement %s to %s", + announce_data["announcement_url"], + player.display_name, + ) + + return resp + + async def serve_plugin_source_stream(self, request: web.Request) -> web.StreamResponse: + """Stream PluginSource audio to a player.""" + self._log_request(request) + plugin_source_id = request.match_info["plugin_source"] + provider = cast("PluginProvider", self.mass.get_provider(plugin_source_id)) + if not provider: + raise ProviderUnavailableError(f"Unknown PluginSource: {plugin_source_id}") + # work out output format/details + player_id = request.match_info["player_id"] + player = self.mass.players.get(player_id) + if not player: + raise web.HTTPNotFound(reason=f"Unknown Player: {player_id}") + plugin_source = provider.get_source() + output_format = await self.get_output_format( + output_format_str=request.match_info["fmt"], + player=player, + content_sample_rate=plugin_source.audio_format.sample_rate, + content_bit_depth=plugin_source.audio_format.bit_depth, + ) + headers = { + **DEFAULT_STREAM_HEADERS, + "contentFeatures.dlna.org": "DLNA.ORG_OP=01;DLNA.ORG_FLAGS=01700000000000000000000000000000", # noqa: E501 + "icy-name": plugin_source.name, + "Accept-Ranges": "none", + "Content-Type": f"audio/{output_format.output_format_str}", + } + + resp = web.StreamResponse( + status=200, + reason="OK", + headers=headers, + ) + resp.content_type = f"audio/{output_format.output_format_str}" + http_profile = await self.mass.config.get_player_config_value( + player_id, CONF_HTTP_PROFILE, default="default", return_type=str + ) + if http_profile == "forced_content_length": + # just set an insanely high content length to make sure the player keeps playing + resp.content_length = get_chunksize(output_format, 12 * 3600) + elif http_profile == "chunked": + resp.enable_chunked_encoding() + + await resp.prepare(request) + + # return early if this is not a GET request + if request.method != "GET": + return resp + + # all checks passed, start streaming! + if not plugin_source.audio_format: + raise InvalidDataError(f"No audio format for plugin source {plugin_source_id}") + async for chunk in self.get_plugin_source_stream( + plugin_source_id=plugin_source_id, + output_format=output_format, + player_id=player_id, + player_filter_params=get_player_filter_params( + self.mass, player_id, plugin_source.audio_format, output_format + ), + ): + try: + await resp.write(chunk) + except (BrokenPipeError, ConnectionResetError, ConnectionError): + break + return resp + + def get_command_url(self, player_or_queue_id: str, command: str) -> str: + """Get the url for the special command stream.""" + return f"{self.base_url}/command/{player_or_queue_id}/{command}.mp3" + + def get_announcement_url( + self, + player_id: str, + announce_data: AnnounceData, + content_type: ContentType = ContentType.MP3, + ) -> str: + """Get the url for the special announcement stream.""" + self.announcements[player_id] = announce_data + # use stream server to host announcement on local network + # this ensures playback on all players, including ones that do not + # like https hosts and it also offers the pre-announce 'bell' + return f"{self.base_url}/announcement/{player_id}.{content_type.value}" + + def get_stream( + self, media: PlayerMedia, pcm_format: AudioFormat + ) -> AsyncGenerator[bytes, None]: + """ + Get a stream of the given media as raw PCM audio. + + This is used as helper for player providers that can consume the raw PCM + audio stream directly (e.g. AirPlay) and not rely on HTTP transport. + """ + # select audio source + if media.media_type == MediaType.ANNOUNCEMENT: + # special case: stream announcement + assert media.custom_data + audio_source = self.get_announcement_stream( + media.custom_data["announcement_url"], + output_format=pcm_format, + pre_announce=media.custom_data["pre_announce"], + pre_announce_url=media.custom_data["pre_announce_url"], + ) + elif media.media_type == MediaType.PLUGIN_SOURCE: + # special case: plugin source stream + assert media.custom_data + audio_source = self.get_plugin_source_stream( + plugin_source_id=media.custom_data["source_id"], + output_format=pcm_format, + # need to pass player_id from the PlayerMedia object + # because this could have been a group + player_id=media.custom_data["player_id"], + ) + elif ( + media.media_type == MediaType.FLOW_STREAM + and media.source_id + and media.source_id.startswith(UGP_PREFIX) + and media.uri + and "/ugp/" in media.uri + ): + # special case: member player accessing UGP stream + # Check URI to distinguish from the UGP accessing its own stream + ugp_player = cast("UniversalGroupPlayer", self.mass.players.get(media.source_id)) + ugp_stream = ugp_player.stream + assert ugp_stream is not None # for type checker + if ugp_stream.base_pcm_format == pcm_format: + # no conversion needed + audio_source = ugp_stream.subscribe_raw() + else: + audio_source = ugp_stream.get_stream(output_format=pcm_format) + elif media.source_id and media.queue_item_id and media.media_type == MediaType.FLOW_STREAM: + # regular queue (flow) stream request + queue = self.mass.player_queues.get(media.source_id) + assert queue + start_queue_item = self.mass.player_queues.get_item( + media.source_id, media.queue_item_id + ) + assert start_queue_item + audio_source = self.mass.streams.get_queue_flow_stream( + queue=queue, + start_queue_item=start_queue_item, + pcm_format=pcm_format, + ) + elif media.source_id and media.queue_item_id: + # single item stream (e.g. radio) + queue_item = self.mass.player_queues.get_item(media.source_id, media.queue_item_id) + assert queue_item + audio_source = buffered( + self.get_queue_item_stream( + queue_item=queue_item, + pcm_format=pcm_format, + ), + buffer_size=10, + min_buffer_before_yield=2, + ) + else: + # assume url or some other direct path + # NOTE: this will fail if its an uri not playable by ffmpeg + audio_source = get_ffmpeg_stream( + audio_input=media.uri, + input_format=AudioFormat(content_type=ContentType.try_parse(media.uri)), + output_format=pcm_format, + ) + return audio_source + + @use_buffer(buffer_size=30, min_buffer_before_yield=2) + async def get_queue_flow_stream( + self, + queue: PlayerQueue, + start_queue_item: QueueItem, + pcm_format: AudioFormat, + ) -> AsyncGenerator[bytes, None]: + """ + Get a flow stream of all tracks in the queue as raw PCM audio. + + yields chunks of exactly 1 second of audio in the given pcm_format. + """ + # ruff: noqa: PLR0915 + assert pcm_format.content_type.is_pcm() + queue_track = None + last_fadeout_part: bytes = b"" + last_streamdetails: StreamDetails | None = None + last_play_log_entry: PlayLogEntry | None = None + queue.flow_mode = True + if not start_queue_item: + # this can happen in some (edge case) race conditions + return + pcm_sample_size = pcm_format.pcm_sample_size + if start_queue_item.media_type != MediaType.TRACK: + # no crossfade on non-tracks + smart_fades_mode = SmartFadesMode.DISABLED + standard_crossfade_duration = 0 + else: + smart_fades_mode = await self.mass.config.get_player_config_value( + queue.queue_id, CONF_SMART_FADES_MODE, return_type=SmartFadesMode + ) + standard_crossfade_duration = self.mass.config.get_raw_player_config_value( + queue.queue_id, CONF_CROSSFADE_DURATION, 10 + ) + self.logger.info( + "Start Queue Flow stream for Queue %s - crossfade: %s %s", + queue.display_name, + smart_fades_mode, + f"({standard_crossfade_duration}s)" + if smart_fades_mode == SmartFadesMode.STANDARD_CROSSFADE + else "", + ) + total_bytes_sent = 0 + total_chunks_received = 0 + + while True: + # get (next) queue item to stream + if queue_track is None: + queue_track = start_queue_item + else: + try: + queue_track = await self.mass.player_queues.load_next_queue_item( + queue.queue_id, queue_track.queue_item_id + ) + except QueueEmpty: + break + + if queue_track.streamdetails is None: + raise InvalidDataError( + "No Streamdetails known for queue item %s", + queue_track.queue_item_id, + ) + + self.logger.debug( + "Start Streaming queue track: %s (%s) for queue %s", + queue_track.streamdetails.uri, + queue_track.name, + queue.display_name, + ) + # append to play log so the queue controller can work out which track is playing + play_log_entry = PlayLogEntry(queue_track.queue_item_id) + queue.flow_mode_stream_log.append(play_log_entry) + # calculate crossfade buffer size + crossfade_buffer_duration = ( + SMART_CROSSFADE_DURATION + if smart_fades_mode == SmartFadesMode.SMART_CROSSFADE + else standard_crossfade_duration + ) + crossfade_buffer_duration = min( + crossfade_buffer_duration, + int(queue_track.streamdetails.duration / 2) + if queue_track.streamdetails.duration + else crossfade_buffer_duration, + ) + # Ensure crossfade buffer size is aligned to frame boundaries + # Frame size = bytes_per_sample * channels + bytes_per_sample = pcm_format.bit_depth // 8 + frame_size = bytes_per_sample * pcm_format.channels + crossfade_buffer_size = int(pcm_format.pcm_sample_size * crossfade_buffer_duration) + # Round down to nearest frame boundary + crossfade_buffer_size = (crossfade_buffer_size // frame_size) * frame_size + + bytes_written = 0 + buffer = b"" + # handle incoming audio chunks + first_chunk_received = False + # buffer size needs to be big enough to include the crossfade part + + async for chunk in self.get_queue_item_stream( + queue_track, + pcm_format=pcm_format, + seek_position=queue_track.streamdetails.seek_position, + raise_on_error=False, + ): + total_chunks_received += 1 + if not first_chunk_received: + first_chunk_received = True + # inform the queue that the track is now loaded in the buffer + # so the next track can be preloaded + self.mass.player_queues.track_loaded_in_buffer( + queue.queue_id, queue_track.queue_item_id + ) + if total_chunks_received < 10 and smart_fades_mode != SmartFadesMode.DISABLED: + # we want a stream to start as quickly as possible + # so for the first 10 chunks we keep a very short buffer + req_buffer_size = pcm_format.pcm_sample_size + else: + req_buffer_size = ( + pcm_sample_size + if smart_fades_mode == SmartFadesMode.DISABLED + else crossfade_buffer_size + ) + + # ALWAYS APPEND CHUNK TO BUFFER + buffer += chunk + del chunk + if len(buffer) < req_buffer_size: + # buffer is not full enough, move on + # yield control to event loop with 10ms delay + await asyncio.sleep(0.01) + continue + + #### HANDLE CROSSFADE OF PREVIOUS TRACK AND NEW TRACK + if last_fadeout_part and last_streamdetails: + # perform crossfade + fadein_part = buffer[:crossfade_buffer_size] + remaining_bytes = buffer[crossfade_buffer_size:] + # Use the mixer to handle all crossfade logic + crossfade_part = await self._smart_fades_mixer.mix( + fade_in_part=fadein_part, + fade_out_part=last_fadeout_part, + fade_in_streamdetails=queue_track.streamdetails, + fade_out_streamdetails=last_streamdetails, + pcm_format=pcm_format, + standard_crossfade_duration=standard_crossfade_duration, + mode=smart_fades_mode, + ) + # because the crossfade exists of both the fadein and fadeout part + # we need to correct the bytes_written accordingly so the duration + # calculations at the end of the track are correct + crossfade_part_len = len(crossfade_part) + bytes_written += int(crossfade_part_len / 2) + if last_play_log_entry: + assert last_play_log_entry.seconds_streamed is not None + last_play_log_entry.seconds_streamed += ( + crossfade_part_len / 2 / pcm_sample_size + ) + # yield crossfade_part (in pcm_sample_size chunks) + for _chunk in divide_chunks(crossfade_part, pcm_sample_size): + yield _chunk + del _chunk + del crossfade_part + # also write the leftover bytes from the crossfade action + if remaining_bytes: + yield remaining_bytes + bytes_written += len(remaining_bytes) + del remaining_bytes + # clear vars + last_fadeout_part = b"" + last_streamdetails = None + buffer = b"" + + #### OTHER: enough data in buffer, feed to output + while len(buffer) > req_buffer_size: + yield buffer[:pcm_sample_size] + bytes_written += pcm_sample_size + buffer = buffer[pcm_sample_size:] + + #### HANDLE END OF TRACK + if last_fadeout_part: + # edge case: we did not get enough data to make the crossfade + for _chunk in divide_chunks(last_fadeout_part, pcm_sample_size): + yield _chunk + del _chunk + bytes_written += len(last_fadeout_part) + last_fadeout_part = b"" + if self._crossfade_allowed( + queue_track, smart_fades_mode=smart_fades_mode, flow_mode=True + ): + # if crossfade is enabled, save fadeout part to pickup for next track + last_fadeout_part = buffer[-crossfade_buffer_size:] + last_streamdetails = queue_track.streamdetails + last_play_log_entry = play_log_entry + remaining_bytes = buffer[:-crossfade_buffer_size] + if remaining_bytes: + yield remaining_bytes + bytes_written += len(remaining_bytes) + del remaining_bytes + elif buffer: + # no crossfade enabled, just yield the buffer last part + bytes_written += len(buffer) + for _chunk in divide_chunks(buffer, pcm_sample_size): + yield _chunk + del _chunk + # make sure the buffer gets cleaned up + del buffer + + # update duration details based on the actual pcm data we sent + # this also accounts for crossfade and silence stripping + seconds_streamed = bytes_written / pcm_sample_size + queue_track.streamdetails.seconds_streamed = seconds_streamed + queue_track.streamdetails.duration = int( + queue_track.streamdetails.seek_position + seconds_streamed + ) + play_log_entry.seconds_streamed = seconds_streamed + play_log_entry.duration = queue_track.streamdetails.duration + total_bytes_sent += bytes_written + self.logger.debug( + "Finished Streaming queue track: %s (%s) on queue %s", + queue_track.streamdetails.uri, + queue_track.name, + queue.display_name, + ) + #### HANDLE END OF QUEUE FLOW STREAM + # end of queue flow: make sure we yield the last_fadeout_part + if last_fadeout_part: + for _chunk in divide_chunks(last_fadeout_part, pcm_sample_size): + yield _chunk + del _chunk + # correct seconds streamed/duration + last_part_seconds = len(last_fadeout_part) / pcm_sample_size + streamdetails = queue_track.streamdetails + assert streamdetails is not None + streamdetails.seconds_streamed = ( + streamdetails.seconds_streamed or 0 + ) + last_part_seconds + streamdetails.duration = int((streamdetails.duration or 0) + last_part_seconds) + last_fadeout_part = b"" + total_bytes_sent += bytes_written + self.logger.info("Finished Queue Flow stream for Queue %s", queue.display_name) + + async def get_announcement_stream( + self, + announcement_url: str, + output_format: AudioFormat, + pre_announce: bool | str = False, + pre_announce_url: str = ANNOUNCE_ALERT_FILE, + ) -> AsyncGenerator[bytes, None]: + """Get the special announcement stream.""" + filter_params = ["loudnorm=I=-10:LRA=11:TP=-2"] + + if pre_announce: + # Note: TTS URLs might take a while to load cause the actual data are often generated + # asynchronously by the TTS provider. If we ask ffmpeg to mix the pre-announce, it will + # wait until it reads the TTS data, so the whole stream will be delayed. It is much + # faster to first play the pre-announce using a separate ffmpeg stream, and only + # afterwards play the TTS itself. + # + # For this to be effective the player itself needs to be able to start playback fast. + # Finally, if the output_format is non-PCM, raw concatenation can be problematic. + # So far players seem to tolerate this, but it might break some player in the future. + + async for chunk in get_ffmpeg_stream( + audio_input=pre_announce_url, + input_format=AudioFormat(content_type=ContentType.try_parse(pre_announce_url)), + output_format=output_format, + filter_params=filter_params, + chunk_size=get_chunksize(output_format, 1), + ): + yield chunk + + # work out output format/details + fmt = announcement_url.rsplit(".")[-1] + audio_format = AudioFormat(content_type=ContentType.try_parse(fmt)) + async for chunk in get_ffmpeg_stream( + audio_input=announcement_url, + input_format=audio_format, + output_format=output_format, + filter_params=filter_params, + chunk_size=get_chunksize(output_format, 1), + ): + yield chunk + + async def get_plugin_source_stream( + self, + plugin_source_id: str, + output_format: AudioFormat, + player_id: str, + player_filter_params: list[str] | None = None, + ) -> AsyncGenerator[bytes, None]: + """Get the special plugin source stream.""" + plugin_prov = cast("PluginProvider", self.mass.get_provider(plugin_source_id)) + if not plugin_prov: + raise ProviderUnavailableError(f"Unknown PluginSource: {plugin_source_id}") + + plugin_source = plugin_prov.get_source() + self.logger.debug( + "Start streaming PluginSource %s to %s using output format %s", + plugin_source_id, + player_id, + output_format, + ) + # this should already be set by the player controller, but just to be sure + plugin_source.in_use_by = player_id + + try: + async for chunk in get_ffmpeg_stream( + audio_input=cast( + "str | AsyncGenerator[bytes, None]", + plugin_prov.get_audio_stream(player_id) + if plugin_source.stream_type == StreamType.CUSTOM + else plugin_source.path, + ), + input_format=plugin_source.audio_format, + output_format=output_format, + filter_params=player_filter_params, + extra_input_args=["-y", "-re"], + ): + if plugin_source.in_use_by != player_id: + # another player took over or the stream ended, stop streaming + break + yield chunk + finally: + self.logger.debug( + "Finished streaming PluginSource %s to %s", plugin_source_id, player_id + ) + await asyncio.sleep(1) # prevent race conditions when selecting source + if plugin_source.in_use_by == player_id: + # release control + plugin_source.in_use_by = None + + async def get_queue_item_stream( + self, + queue_item: QueueItem, + pcm_format: AudioFormat, + seek_position: int = 0, + raise_on_error: bool = True, + ) -> AsyncGenerator[bytes, None]: + """Get the (PCM) audio stream for a single queue item.""" + # collect all arguments for ffmpeg + streamdetails = queue_item.streamdetails + assert streamdetails + filter_params: list[str] = [] + + # handle volume normalization + gain_correct: float | None = None + if streamdetails.volume_normalization_mode == VolumeNormalizationMode.DYNAMIC: + # volume normalization using loudnorm filter (in dynamic mode) + # which also collects the measurement on the fly during playback + # more info: https://k.ylo.ph/2016/04/04/loudnorm.html + filter_rule = f"loudnorm=I={streamdetails.target_loudness}:TP=-2.0:LRA=10.0:offset=0.0" + filter_rule += ":print_format=json" + filter_params.append(filter_rule) + elif streamdetails.volume_normalization_mode == VolumeNormalizationMode.FIXED_GAIN: + # apply user defined fixed volume/gain correction + config_key = ( + CONF_VOLUME_NORMALIZATION_FIXED_GAIN_TRACKS + if streamdetails.media_type == MediaType.TRACK + else CONF_VOLUME_NORMALIZATION_FIXED_GAIN_RADIO + ) + gain_value = await self.mass.config.get_core_config_value( + self.domain, config_key, default=0.0, return_type=float + ) + gain_correct = round(gain_value, 2) + filter_params.append(f"volume={gain_correct}dB") + elif streamdetails.volume_normalization_mode == VolumeNormalizationMode.MEASUREMENT_ONLY: + # volume normalization with known loudness measurement + # apply volume/gain correction + target_loudness = ( + float(streamdetails.target_loudness) + if streamdetails.target_loudness is not None + else 0.0 + ) + if streamdetails.prefer_album_loudness and streamdetails.loudness_album is not None: + gain_correct = target_loudness - float(streamdetails.loudness_album) + elif streamdetails.loudness is not None: + gain_correct = target_loudness - float(streamdetails.loudness) + else: + gain_correct = 0.0 + gain_correct = round(gain_correct, 2) + filter_params.append(f"volume={gain_correct}dB") + streamdetails.volume_normalization_gain_correct = gain_correct + + allow_buffer = bool( + self.mass.config.get_raw_core_config_value( + self.domain, CONF_ALLOW_BUFFER, CONF_ALLOW_BUFFER_DEFAULT + ) + and streamdetails.duration + ) + + self.logger.debug( + "Starting queue item stream for %s (%s)" + " - using buffer: %s" + " - using fade-in: %s" + " - using volume normalization: %s", + queue_item.name, + streamdetails.uri, + allow_buffer, + streamdetails.fade_in, + streamdetails.volume_normalization_mode, + ) + if allow_buffer: + media_stream_gen = get_buffered_media_stream( + self.mass, + streamdetails=streamdetails, + pcm_format=pcm_format, + seek_position=int(seek_position), + filter_params=filter_params, + ) + else: + media_stream_gen = get_media_stream( + self.mass, + streamdetails=streamdetails, + pcm_format=pcm_format, + seek_position=int(seek_position), + filter_params=filter_params, + ) + + first_chunk_received = False + fade_in_buffer = b"" + bytes_received = 0 + finished = False + stream_started_at = asyncio.get_event_loop().time() + try: + async for chunk in media_stream_gen: + bytes_received += len(chunk) + if not first_chunk_received: + first_chunk_received = True + self.logger.debug( + "First audio chunk received for %s (%s) after %.2f seconds", + queue_item.name, + streamdetails.uri, + asyncio.get_event_loop().time() - stream_started_at, + ) + # handle optional fade-in + if streamdetails.fade_in: + if len(fade_in_buffer) < pcm_format.pcm_sample_size * 4: + fade_in_buffer += chunk + elif fade_in_buffer: + async for fade_chunk in get_ffmpeg_stream( + # NOTE: get_ffmpeg_stream signature says str | AsyncGenerator + # but FFMpeg class actually accepts bytes too. This works at + # runtime but needs type: ignore for mypy. + audio_input=fade_in_buffer + chunk, # type: ignore[arg-type] + input_format=pcm_format, + output_format=pcm_format, + filter_params=["afade=type=in:start_time=0:duration=3"], + ): + yield fade_chunk + fade_in_buffer = b"" + streamdetails.fade_in = False + else: + yield chunk + # help garbage collection by explicitly deleting chunk + del chunk + finished = True + except AudioError as err: + streamdetails.stream_error = True + queue_item.available = False + if raise_on_error: + raise + # yes, we swallow the error here after logging it + # so the outer stream can handle it gracefully + self.logger.error( + "AudioError while streaming queue item %s (%s): %s", + queue_item.name, + streamdetails.uri, + err, + ) + finally: + # determine how many seconds we've streamed + # for pcm output we can calculate this easily + seconds_streamed = bytes_received / pcm_format.pcm_sample_size + streamdetails.seconds_streamed = seconds_streamed + self.logger.debug( + "stream %s for %s in %.2f seconds - seconds streamed/buffered: %.2f", + "aborted" if not finished else "finished", + streamdetails.uri, + asyncio.get_event_loop().time() - stream_started_at, + seconds_streamed, + ) + # report stream to provider + if (finished or seconds_streamed >= 90) and ( + music_prov := self.mass.get_provider(streamdetails.provider) + ): + if TYPE_CHECKING: # avoid circular import + assert isinstance(music_prov, MusicProvider) + self.mass.create_task(music_prov.on_streamed(streamdetails)) + + @use_buffer(buffer_size=30, min_buffer_before_yield=2) + async def get_queue_item_stream_with_smartfade( + self, + queue_item: QueueItem, + pcm_format: AudioFormat, + smart_fades_mode: SmartFadesMode = SmartFadesMode.SMART_CROSSFADE, + standard_crossfade_duration: int = 10, + ) -> AsyncGenerator[bytes, None]: + """Get the audio stream for a single queue item with (smart) crossfade to the next item.""" + queue = self.mass.player_queues.get(queue_item.queue_id) + if not queue: + raise RuntimeError(f"Queue {queue_item.queue_id} not found") + + streamdetails = queue_item.streamdetails + assert streamdetails + crossfade_data = self._crossfade_data.pop(queue.queue_id, None) + + if crossfade_data and streamdetails.seek_position > 0: + # don't do crossfade when seeking into track + crossfade_data = None + if crossfade_data and (crossfade_data.queue_item_id != queue_item.queue_item_id): + # edge case alert: the next item changed just while we were preloading/crossfading + self.logger.warning( + "Skipping crossfade data for queue %s - next item changed!", queue.display_name + ) + crossfade_data = None + + self.logger.debug( + "Start Streaming queue track: %s (%s) for queue %s " + "- crossfade mode: %s " + "- crossfading from previous track: %s ", + queue_item.streamdetails.uri if queue_item.streamdetails else "Unknown URI", + queue_item.name, + queue.display_name, + smart_fades_mode, + "true" if crossfade_data else "false", + ) + + buffer = b"" + bytes_written = 0 + # calculate crossfade buffer size + crossfade_buffer_duration = ( + SMART_CROSSFADE_DURATION + if smart_fades_mode == SmartFadesMode.SMART_CROSSFADE + else standard_crossfade_duration + ) + crossfade_buffer_duration = min( + crossfade_buffer_duration, + int(streamdetails.duration / 2) + if streamdetails.duration + else crossfade_buffer_duration, + ) + # Ensure crossfade buffer size is aligned to frame boundaries + # Frame size = bytes_per_sample * channels + bytes_per_sample = pcm_format.bit_depth // 8 + frame_size = bytes_per_sample * pcm_format.channels + crossfade_buffer_size = int(pcm_format.pcm_sample_size * crossfade_buffer_duration) + # Round down to nearest frame boundary + crossfade_buffer_size = (crossfade_buffer_size // frame_size) * frame_size + fade_out_data: bytes | None = None + + if crossfade_data: + # Calculate discard amount in seconds (format-independent) + # Use fade_in_pcm_format because fade_in_size is in the next track's original format + fade_in_duration_seconds = ( + crossfade_data.fade_in_size / crossfade_data.fade_in_pcm_format.pcm_sample_size + ) + discard_seconds = int(fade_in_duration_seconds) - 1 + # Calculate discard amounts in CURRENT track's format + discard_bytes = int(discard_seconds * pcm_format.pcm_sample_size) + # Convert fade_in_size to current track's format for correct leftover calculation + fade_in_size_in_current_format = int( + fade_in_duration_seconds * pcm_format.pcm_sample_size + ) + discard_leftover = fade_in_size_in_current_format - discard_bytes + else: + discard_seconds = streamdetails.seek_position + discard_leftover = 0 + total_chunks_received = 0 + req_buffer_size = crossfade_buffer_size + async for chunk in self.get_queue_item_stream( + queue_item, pcm_format, seek_position=discard_seconds + ): + total_chunks_received += 1 + if discard_leftover: + # discard leftover bytes from crossfade data + chunk = chunk[discard_leftover:] # noqa: PLW2901 + discard_leftover = 0 + + if total_chunks_received < 10: + # we want a stream to start as quickly as possible + # so for the first 10 chunks we keep a very short buffer + req_buffer_size = pcm_format.pcm_sample_size + else: + req_buffer_size = crossfade_buffer_size + + # ALWAYS APPEND CHUNK TO BUFFER + buffer += chunk + del chunk + if len(buffer) < req_buffer_size: + # buffer is not full enough, move on + continue + + #### HANDLE CROSSFADE DATA FROM PREVIOUS TRACK + if crossfade_data: + # send the (second half of the) crossfade data + if crossfade_data.pcm_format != pcm_format: + # edge case: pcm format mismatch, we need to resample + self.logger.debug( + "Resampling crossfade data from %s to %s for queue %s", + crossfade_data.pcm_format.sample_rate, + pcm_format.sample_rate, + queue.display_name, + ) + resampled_data = await resample_pcm_audio( + crossfade_data.data, + crossfade_data.pcm_format, + pcm_format, + ) + if resampled_data: + for _chunk in divide_chunks(resampled_data, pcm_format.pcm_sample_size): + yield _chunk + bytes_written += len(resampled_data) + else: + # Resampling failed, error already logged in resample_pcm_audio + # Skip crossfade data entirely - stream continues without it + self.logger.warning( + "Skipping crossfade data for queue %s due to resampling failure", + queue.display_name, + ) + else: + for _chunk in divide_chunks(crossfade_data.data, pcm_format.pcm_sample_size): + yield _chunk + bytes_written += len(crossfade_data.data) + # clear vars + crossfade_data = None + + #### OTHER: enough data in buffer, feed to output + while len(buffer) > req_buffer_size: + yield buffer[: pcm_format.pcm_sample_size] + bytes_written += pcm_format.pcm_sample_size + buffer = buffer[pcm_format.pcm_sample_size :] + + #### HANDLE END OF TRACK + + if crossfade_data: + # edge case: we did not get enough data to send the crossfade data + # send the (second half of the) crossfade data + if crossfade_data.pcm_format != pcm_format: + # (yet another) edge case: pcm format mismatch, we need to resample + self.logger.debug( + "Resampling remaining crossfade data from %s to %s for queue %s", + crossfade_data.pcm_format.sample_rate, + pcm_format.sample_rate, + queue.display_name, + ) + resampled_crossfade_data = await resample_pcm_audio( + crossfade_data.data, + crossfade_data.pcm_format, + pcm_format, + ) + if resampled_crossfade_data: + crossfade_data.data = resampled_crossfade_data + else: + # Resampling failed, error already logged in resample_pcm_audio + # Skip the crossfade data entirely + self.logger.warning( + "Skipping remaining crossfade data for queue %s due to resampling failure", + queue.display_name, + ) + crossfade_data = None + if crossfade_data: + for _chunk in divide_chunks(crossfade_data.data, pcm_format.pcm_sample_size): + yield _chunk + bytes_written += len(crossfade_data.data) + crossfade_data = None + next_queue_item: QueueItem | None = None + if not self._crossfade_allowed( + queue_item, smart_fades_mode=smart_fades_mode, flow_mode=False + ): + # no crossfade enabled/allowed, just yield the buffer last part + bytes_written += len(buffer) + for _chunk in divide_chunks(buffer, pcm_format.pcm_sample_size): + yield _chunk + else: + # if crossfade is enabled, save fadeout part in buffer to pickup for next track + fade_out_data = buffer + buffer = b"" + # get next track for crossfade + self.logger.debug( + "Preloading NEXT track for crossfade for queue %s", + queue.display_name, + ) + try: + next_queue_item = await self.mass.player_queues.load_next_queue_item( + queue.queue_id, queue_item.queue_item_id + ) + # set index_in_buffer to prevent our next track is overwritten while preloading + if next_queue_item.streamdetails is None: + raise InvalidDataError( + f"No streamdetails for next queue item {next_queue_item.queue_item_id}" + ) + queue.index_in_buffer = self.mass.player_queues.index_by_id( + queue.queue_id, next_queue_item.queue_item_id + ) + next_queue_item_pcm_format = AudioFormat( + content_type=INTERNAL_PCM_FORMAT.content_type, + bit_depth=INTERNAL_PCM_FORMAT.bit_depth, + sample_rate=next_queue_item.streamdetails.audio_format.sample_rate, + channels=next_queue_item.streamdetails.audio_format.channels, + ) + async for chunk in self.get_queue_item_stream( + next_queue_item, next_queue_item_pcm_format + ): + # append to buffer until we reach crossfade size + # we only need the first X seconds of the NEXT track so we can + # perform the crossfade. + # the crossfaded audio of the previous and next track will be + # sent in two equal parts: first half now, second half + # when the next track starts. We use CrossfadeData to store + # the second half to be picked up by the next track's stream generator. + # Note that we more or less expect the user to have enabled the in-memory + # buffer so we can keep the next track's audio data in memory. + buffer += chunk + del chunk + if len(buffer) >= crossfade_buffer_size: + break + #### HANDLE CROSSFADE OF PREVIOUS TRACK AND NEW TRACK + # Store original buffer size before any resampling for fade_in_size calculation + # This size is in the next track's original format which is what we need + original_buffer_size = len(buffer) + if next_queue_item_pcm_format != pcm_format: + # edge case: pcm format mismatch, we need to resample the next track's + # beginning part before crossfading + self.logger.debug( + "Resampling next track's crossfade from %s to %s for queue %s", + next_queue_item_pcm_format.sample_rate, + pcm_format.sample_rate, + queue.display_name, + ) + resampled_buffer = await resample_pcm_audio( + buffer, + next_queue_item_pcm_format, + pcm_format, + ) + if resampled_buffer: + buffer = resampled_buffer + else: + # Resampling failed, error already logged in resample_pcm_audio + # Cannot crossfade safely - yield fade_out_data and raise error + self.logger.error( + "Failed to resample next track for crossfade in queue %s - " + "skipping crossfade", + queue.display_name, + ) + yield fade_out_data + bytes_written += len(fade_out_data) + raise AudioError("Failed to resample next track for crossfade") + try: + crossfade_bytes = await self._smart_fades_mixer.mix( + fade_in_part=buffer, + fade_out_part=fade_out_data, + fade_in_streamdetails=next_queue_item.streamdetails, + fade_out_streamdetails=streamdetails, + pcm_format=pcm_format, + standard_crossfade_duration=standard_crossfade_duration, + mode=smart_fades_mode, + ) + # send half of the crossfade_part (= approx the fadeout part) + split_point = (len(crossfade_bytes) + 1) // 2 + crossfade_first = crossfade_bytes[:split_point] + crossfade_second = crossfade_bytes[split_point:] + del crossfade_bytes + bytes_written += len(crossfade_first) + for _chunk in divide_chunks(crossfade_first, pcm_format.pcm_sample_size): + yield _chunk + # store the other half for the next track + # IMPORTANT: crossfade_second data is in CURRENT track's format (pcm_format) + # because it was created from the resampled buffer used for mixing. + # BUT fade_in_size represents bytes in NEXT track's original format + # (next_queue_item_pcm_format) because that's how much of the next track + # was consumed during the crossfade. We need both formats to correctly + # handle the crossfade data when the next track starts. + self._crossfade_data[queue_item.queue_id] = CrossfadeData( + data=crossfade_second, + fade_in_size=original_buffer_size, + pcm_format=pcm_format, # Format of the data (current track) + fade_in_pcm_format=next_queue_item_pcm_format, # Format for fade_in_size + queue_item_id=next_queue_item.queue_item_id, + ) + except Exception as err: + self.logger.error( + "Failed to create crossfade for queue %s: %s - " + "falling back to no crossfade", + queue.display_name, + err, + ) + # Fallback: just yield the fade_out_data without crossfade + yield fade_out_data + bytes_written += len(fade_out_data) + next_queue_item = None + except (QueueEmpty, AudioError): + # end of queue reached, next item skipped or crossfade failed + # no crossfade possible, just yield the fade_out_data + next_queue_item = None + yield fade_out_data + bytes_written += len(fade_out_data) + del fade_out_data + # make sure the buffer gets cleaned up + del buffer + # update duration details based on the actual pcm data we sent + # this also accounts for crossfade and silence stripping + seconds_streamed = bytes_written / pcm_format.pcm_sample_size + streamdetails.seconds_streamed = seconds_streamed + streamdetails.duration = int(streamdetails.seek_position + seconds_streamed) + self.logger.debug( + "Finished Streaming queue track: %s (%s) on queue %s " + "- crossfade data prepared for next track: %s", + streamdetails.uri, + queue_item.name, + queue.display_name, + next_queue_item.name if next_queue_item else "N/A", + ) + + def _log_request(self, request: web.Request) -> None: + """Log request.""" + if self.logger.isEnabledFor(VERBOSE_LOG_LEVEL): + self.logger.log( + VERBOSE_LOG_LEVEL, + "Got %s request to %s from %s\nheaders: %s\n", + request.method, + request.path, + request.remote, + request.headers, + ) + else: + self.logger.debug( + "Got %s request to %s from %s", + request.method, + request.path, + request.remote, + ) + + async def get_output_format( + self, + output_format_str: str, + player: Player, + content_sample_rate: int, + content_bit_depth: int, + ) -> AudioFormat: + """Parse (player specific) output format details for given format string.""" + content_type: ContentType = ContentType.try_parse(output_format_str) + supported_rates_conf = cast( + "list[tuple[str, str]]", + await self.mass.config.get_player_config_value( + player.player_id, CONF_SAMPLE_RATES, unpack_splitted_values=True + ), + ) + output_channels_str = self.mass.config.get_raw_player_config_value( + player.player_id, CONF_OUTPUT_CHANNELS, "stereo" + ) + supported_sample_rates = tuple(int(x[0]) for x in supported_rates_conf) + supported_bit_depths = tuple(int(x[1]) for x in supported_rates_conf) + + player_max_bit_depth = max(supported_bit_depths) + output_bit_depth = min(content_bit_depth, player_max_bit_depth) + if content_sample_rate in supported_sample_rates: + output_sample_rate = content_sample_rate + else: + output_sample_rate = max(supported_sample_rates) + + if not content_type.is_lossless(): + # no point in having a higher bit depth for lossy formats + output_bit_depth = 16 + output_sample_rate = min(48000, output_sample_rate) + if content_type == ContentType.WAV and output_bit_depth > 16: + # WAV 24bit is not widely supported, fallback to 16bit + output_bit_depth = 16 + if output_format_str == "pcm": + content_type = ContentType.from_bit_depth(output_bit_depth) + return AudioFormat( + content_type=content_type, + sample_rate=output_sample_rate, + bit_depth=output_bit_depth, + channels=1 if output_channels_str != "stereo" else 2, + ) + + async def _select_flow_format( + self, + player: Player, + ) -> AudioFormat: + """Parse (player specific) flow stream PCM format.""" + supported_rates_conf = cast( + "list[tuple[str, str]]", + await self.mass.config.get_player_config_value( + player.player_id, CONF_SAMPLE_RATES, unpack_splitted_values=True + ), + ) + supported_sample_rates = tuple(int(x[0]) for x in supported_rates_conf) + output_sample_rate = INTERNAL_PCM_FORMAT.sample_rate + for sample_rate in (192000, 96000, 48000, 44100): + if sample_rate in supported_sample_rates: + output_sample_rate = sample_rate + break + return AudioFormat( + content_type=INTERNAL_PCM_FORMAT.content_type, + sample_rate=output_sample_rate, + bit_depth=INTERNAL_PCM_FORMAT.bit_depth, + channels=2, + ) + + def _crossfade_allowed( + self, queue_item: QueueItem, smart_fades_mode: SmartFadesMode, flow_mode: bool = False + ) -> bool: + """Get the crossfade config for a queue item.""" + if smart_fades_mode == SmartFadesMode.DISABLED: + return False + if not (queue_player := self.mass.players.get(queue_item.queue_id)): + return False # just a guard + if queue_item.media_type != MediaType.TRACK: + self.logger.debug("Skipping crossfade: current item is not a track") + return False + # check if the next item is part of the same album + next_item = self.mass.player_queues.get_next_item( + queue_item.queue_id, queue_item.queue_item_id + ) + if not next_item: + # there is no next item! + return False + # check if next item is a track + if next_item.media_type != MediaType.TRACK: + self.logger.debug("Skipping crossfade: next item is not a track") + return False + if ( + isinstance(queue_item.media_item, Track) + and isinstance(next_item.media_item, Track) + and queue_item.media_item.album + and next_item.media_item.album + and queue_item.media_item.album == next_item.media_item.album + and not self.mass.config.get_raw_core_config_value( + self.domain, CONF_ALLOW_CROSSFADE_SAME_ALBUM, False + ) + ): + # in general, crossfade is not desired for tracks of the same (gapless) album + # because we have no accurate way to determine if the album is gapless or not, + # for now we just never crossfade between tracks of the same album + self.logger.debug("Skipping crossfade: next item is part of the same album") + return False + + # check if next item sample rate matches + if ( + not flow_mode + and next_item.streamdetails + and queue_item.streamdetails + and next_item.streamdetails.audio_format + and queue_item.streamdetails.audio_format + and ( + queue_item.streamdetails.audio_format.sample_rate + != next_item.streamdetails.audio_format.sample_rate + ) + and (queue_player := self.mass.players.get(queue_item.queue_id)) + and not ( + PlayerFeature.GAPLESS_DIFFERENT_SAMPLERATE in queue_player.supported_features + or self.mass.config.get_raw_player_config_value( + queue_player.player_id, + CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES.key, + CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES.default_value, + ) + ) + ): + self.logger.debug("Skipping crossfade: sample rate mismatch") + return False + return True + + async def _periodic_garbage_collection(self) -> None: + """Periodic garbage collection to free up memory from audio buffers and streams.""" + self.logger.log( + VERBOSE_LOG_LEVEL, + "Running periodic garbage collection...", + ) + # Run garbage collection in executor to avoid blocking the event loop + # Since this runs periodically (not in response to subprocess cleanup), + # it's safe to run in a thread without causing thread-safety issues + loop = asyncio.get_running_loop() + collected = await loop.run_in_executor(None, gc.collect) + self.logger.log( + VERBOSE_LOG_LEVEL, + "Garbage collection completed, collected %d objects", + collected, + ) + # Schedule next run in 15 minutes + self.mass.call_later(900, self._periodic_garbage_collection) + + def _setup_smart_fades_logger(self, config: CoreConfig) -> None: + """Set up smart fades logger level.""" + log_level = str(config.get_value(CONF_SMART_FADES_LOG_LEVEL)) + if log_level == "GLOBAL": + self.smart_fades_analyzer.logger.setLevel(self.logger.level) + self.smart_fades_mixer.logger.setLevel(self.logger.level) + else: + self.smart_fades_analyzer.logger.setLevel(log_level) + self.smart_fades_mixer.logger.setLevel(log_level) diff --git a/music_assistant/helpers/smart_fades.py b/music_assistant/helpers/smart_fades.py deleted file mode 100644 index 259ea7d1..00000000 --- a/music_assistant/helpers/smart_fades.py +++ /dev/null @@ -1,1146 +0,0 @@ -"""Smart Fades - Object-oriented implementation with intelligent fades and adaptive filtering.""" - -from __future__ import annotations - -import asyncio -import logging -import time -import warnings -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING - -import aiofiles -import librosa -import numpy as np -import numpy.typing as npt -import shortuuid - -from music_assistant.constants import VERBOSE_LOG_LEVEL -from music_assistant.helpers.audio import ( - align_audio_to_frame_boundary, - strip_silence, -) -from music_assistant.helpers.process import communicate -from music_assistant.helpers.util import remove_file -from music_assistant.models.smart_fades import ( - SmartFadesAnalysis, - SmartFadesAnalysisFragment, - SmartFadesMode, -) - -if TYPE_CHECKING: - from music_assistant_models.media_items import AudioFormat - from music_assistant_models.streamdetails import StreamDetails - - from music_assistant.mass import MusicAssistant - -SMART_CROSSFADE_DURATION = 45 -ANALYSIS_FPS = 100 - - -class SmartFadesAnalyzer: - """Smart fades analyzer that performs audio analysis.""" - - def __init__(self, mass: MusicAssistant) -> None: - """Initialize smart fades analyzer.""" - self.mass = mass - self.logger = logging.getLogger(__name__) - - async def analyze( - self, - item_id: str, - provider_instance_id_or_domain: str, - fragment: SmartFadesAnalysisFragment, - audio_data: bytes, - pcm_format: AudioFormat, - ) -> SmartFadesAnalysis | None: - """Analyze a track's beats for BPM matching smart fade.""" - stream_details_name = f"{provider_instance_id_or_domain}://{item_id}" - start_time = time.perf_counter() - self.logger.debug( - "Starting %s beat analysis for track : %s", fragment.name, stream_details_name - ) - - # Validate input audio data is frame-aligned - audio_data = align_audio_to_frame_boundary(audio_data, pcm_format) - - fragment_duration = len(audio_data) / (pcm_format.pcm_sample_size) - try: - self.logger.log( - VERBOSE_LOG_LEVEL, - "Audio data: %.2fs, %d bytes", - fragment_duration, - len(audio_data), - ) - # Convert PCM bytes to numpy array and then to mono for analysis - audio_array = np.frombuffer(audio_data, dtype=np.float32) - if pcm_format.channels > 1: - # Ensure array size is divisible by channel count - samples_per_channel = len(audio_array) // pcm_format.channels - valid_samples = samples_per_channel * pcm_format.channels - if valid_samples != len(audio_array): - self.logger.warning( - "Audio buffer size (%d) not divisible by channels (%d), " - "truncating %d samples", - len(audio_array), - pcm_format.channels, - len(audio_array) - valid_samples, - ) - audio_array = audio_array[:valid_samples] - - # Reshape to separate channels and take average for mono conversion - audio_array = audio_array.reshape(-1, pcm_format.channels) - mono_audio = np.asarray(np.mean(audio_array, axis=1, dtype=np.float32)) - else: - # Single channel - ensure consistent array type - mono_audio = np.asarray(audio_array, dtype=np.float32) - - # Validate that the audio is finite (no NaN or Inf values) - if not np.all(np.isfinite(mono_audio)): - self.logger.error( - "Audio buffer contains non-finite values (NaN/Inf) for %s, cannot analyze", - stream_details_name, - ) - return None - - analysis = await self._analyze_track_beats(mono_audio, fragment, pcm_format.sample_rate) - - total_time = time.perf_counter() - start_time - if not analysis: - self.logger.debug( - "No analysis results found after analyzing audio for: %s (took %.2fs).", - stream_details_name, - total_time, - ) - return None - self.logger.debug( - "Smart fades analysis completed for %s: BPM=%.1f, %d beats, " - "%d downbeats, confidence=%.2f (took %.2fs)", - stream_details_name, - analysis.bpm, - len(analysis.beats), - len(analysis.downbeats), - analysis.confidence, - total_time, - ) - self.mass.create_task( - self.mass.music.set_smart_fades_analysis( - item_id, provider_instance_id_or_domain, analysis - ) - ) - return analysis - except Exception as e: - total_time = time.perf_counter() - start_time - self.logger.exception( - "Beat analysis error for %s: %s (took %.2fs)", - stream_details_name, - e, - total_time, - ) - return None - - def _librosa_beat_analysis( - self, - audio_array: npt.NDArray[np.float32], - fragment: SmartFadesAnalysisFragment, - sample_rate: int, - ) -> SmartFadesAnalysis | None: - """Perform beat analysis using librosa.""" - try: - # Suppress librosa UserWarnings about empty mel filters - # These warnings are harmless and occur with certain audio characteristics - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message="Empty filters detected in mel frequency basis", - category=UserWarning, - ) - tempo, beats_array = librosa.beat.beat_track( - y=audio_array, - sr=sample_rate, - units="time", - ) - # librosa returns np.float64 arrays when units="time" - - if len(beats_array) < 2: - self.logger.warning("Insufficient beats detected: %d", len(beats_array)) - return None - - bpm = float(tempo.item()) if hasattr(tempo, "item") else float(tempo) - - # Calculate confidence based on consistency of intervals - if len(beats_array) > 2: - intervals = np.diff(beats_array) - interval_std = np.std(intervals) - interval_mean = np.mean(intervals) - # Lower coefficient of variation = higher confidence - cv = interval_std / interval_mean if interval_mean > 0 else 1.0 - confidence = max(0.1, 1.0 - cv) - else: - confidence = 0.5 # Low confidence with few beats - - downbeats = self._estimate_musical_downbeats(beats_array, bpm) - - # Store complete fragment analysis - fragment_duration = len(audio_array) / sample_rate - - return SmartFadesAnalysis( - fragment=fragment, - bpm=float(bpm), - beats=beats_array, - downbeats=downbeats, - confidence=float(confidence), - duration=fragment_duration, - ) - - except Exception as e: - self.logger.exception("Librosa beat analysis failed: %s", e) - return None - - def _estimate_musical_downbeats( - self, beats_array: npt.NDArray[np.float64], bpm: float - ) -> npt.NDArray[np.float64]: - """Estimate downbeats using musical logic and beat consistency.""" - if len(beats_array) < 4: - return beats_array[:1] if len(beats_array) > 0 else np.array([]) - - # Calculate expected beat interval from BPM - expected_beat_interval = 60.0 / bpm - - # Look for the most likely starting downbeat by analyzing beat intervals - # In 4/4 time, downbeats should be every 4 beats - best_offset = 0 - best_consistency = 0.0 - - # Try different starting offsets (0, 1, 2, 3) to find most consistent downbeat pattern - for offset in range(min(4, len(beats_array))): - downbeat_candidates = beats_array[offset::4] - - if len(downbeat_candidates) < 2: - continue - - # Calculate consistency score based on interval regularity - intervals = np.diff(downbeat_candidates) - expected_downbeat_interval = 4 * expected_beat_interval - - # Score based on how close intervals are to expected 4-beat interval - interval_errors = ( - np.abs(intervals - expected_downbeat_interval) / expected_downbeat_interval - ) - consistency = 1.0 - np.mean(interval_errors) - - if consistency > best_consistency: - best_consistency = float(consistency) - best_offset = offset - - # Use the best offset to generate final downbeats - downbeats = beats_array[best_offset::4] - - self.logger.debug( - "Downbeat estimation: offset=%d, consistency=%.2f, %d downbeats from %d beats", - best_offset, - best_consistency, - len(downbeats), - len(beats_array), - ) - - return downbeats - - async def _analyze_track_beats( - self, - audio_data: npt.NDArray[np.float32], - fragment: SmartFadesAnalysisFragment, - sample_rate: int, - ) -> SmartFadesAnalysis | None: - """Analyze track for beat tracking using librosa.""" - try: - return await asyncio.to_thread( - self._librosa_beat_analysis, audio_data, fragment, sample_rate - ) - except Exception as e: - self.logger.exception("Beat tracking analysis failed: %s", e) - return None - - -############################# -# SMART FADES EQ LOGIC -############################# - - -class Filter(ABC): - """Abstract base class for audio filters.""" - - output_fadeout_label: str - output_fadein_label: str - - @abstractmethod - def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: - """Apply the filter and return the FFmpeg filter strings.""" - - -class TimeStretchFilter(Filter): - """Filter that applies time stretching to match BPM using rubberband.""" - - output_fadeout_label: str = "fadeout_stretched" - output_fadein_label: str = "fadein_unchanged" - - def __init__( - self, - stretch_ratio: float, - ): - """Initialize time stretch filter.""" - self.stretch_ratio = stretch_ratio - - def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: - """Create FFmpeg filters to gradually adjust tempo from original BPM to target BPM.""" - return [ - f"{input_fadeout_label}rubberband=tempo={self.stretch_ratio:.6f}:transients=mixed:detector=soft:pitchq=quality" - f"[{self.output_fadeout_label}]", - f"{input_fadein_label}anull[{self.output_fadein_label}]", # codespell:ignore anull - ] - - def __repr__(self) -> str: - """Return string representation of TimeStretchFilter.""" - return f"TimeStretch(ratio={self.stretch_ratio:.2f})" - - -class TrimFilter(Filter): - """Filter that trims incoming track to align with downbeats.""" - - output_fadeout_label: str = "fadeout_beatalign" - output_fadein_label: str = "fadein_beatalign" - - def __init__(self, fadein_start_pos: float): - """Initialize beat align filter. - - Args: - fadein_start_pos: Position in seconds to trim the incoming track to - """ - self.fadein_start_pos = fadein_start_pos - - def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: - """Trim the incoming track to align with downbeats.""" - return [ - f"{input_fadeout_label}anull[{self.output_fadeout_label}]", # codespell:ignore anull - f"{input_fadein_label}atrim=start={self.fadein_start_pos},asetpts=PTS-STARTPTS[{self.output_fadein_label}]", - ] - - def __repr__(self) -> str: - """Return string representation of TrimFilter.""" - return f"Trim(trim={self.fadein_start_pos:.2f}s)" - - -class FrequencySweepFilter(Filter): - """Filter that creates frequency sweep effects (lowpass/highpass transitions).""" - - output_fadeout_label: str = "frequency_sweep" - output_fadein_label: str = "frequency_sweep" - - def __init__( - self, - sweep_type: str, - target_freq: int, - duration: float, - start_time: float, - sweep_direction: str, - poles: int, - curve_type: str, - stream_type: str = "fadeout", - ): - """Initialize frequency sweep filter. - - Args: - sweep_type: 'lowpass' or 'highpass' - target_freq: Target frequency for the filter - duration: Duration of the sweep in seconds - start_time: When to start the sweep - sweep_direction: 'fade_in' (unfiltered->filtered) or 'fade_out' (filtered->unfiltered) - poles: Number of poles for the filter - curve_type: 'linear', 'exponential', or 'logarithmic' - stream_type: 'fadeout' or 'fadein' - which stream to process - """ - self.sweep_type = sweep_type - self.target_freq = target_freq - self.duration = duration - self.start_time = start_time - self.sweep_direction = sweep_direction - self.poles = poles - self.curve_type = curve_type - self.stream_type = stream_type - - # Set output labels based on stream type - if stream_type == "fadeout": - self.output_fadeout_label = f"fadeout_{sweep_type}" - self.output_fadein_label = "fadein_passthrough" - else: - self.output_fadeout_label = "fadeout_passthrough" - self.output_fadein_label = f"fadein_{sweep_type}" - - def _generate_volume_expr(self, start: float, dur: float, direction: str, curve: str) -> str: - t_expr = f"t-{start}" # Time relative to start - norm_t = f"min(max({t_expr},0),{dur})/{dur}" # Normalized 0-1 - - if curve == "exponential": - # Exponential curve for smoother transitions - if direction == "up": - return f"'pow({norm_t},2)':eval=frame" - else: - return f"'1-pow({norm_t},2)':eval=frame" - elif curve == "logarithmic": - # Logarithmic curve for more aggressive initial change - if direction == "up": - return f"'sqrt({norm_t})':eval=frame" - else: - return f"'1-sqrt({norm_t})':eval=frame" - elif direction == "up": - return f"'{norm_t}':eval=frame" - else: - return f"'1-{norm_t}':eval=frame" - - def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: - """Generate FFmpeg filters for frequency sweep effect.""" - # Select the correct input based on stream type - if self.stream_type == "fadeout": - input_label = input_fadeout_label - output_label = self.output_fadeout_label - passthrough_label = self.output_fadein_label - passthrough_input = input_fadein_label - else: - input_label = input_fadein_label - output_label = self.output_fadein_label - passthrough_label = self.output_fadeout_label - passthrough_input = input_fadeout_label - - orig_label = f"{output_label}_orig" - filter_label = f"{output_label}_to{self.sweep_type[:2]}" - filtered_label = f"{output_label}_filtered" - orig_faded_label = f"{output_label}_orig_faded" - filtered_faded_label = f"{output_label}_filtered_faded" - - # Determine volume ramp directions based on sweep direction - if self.sweep_direction == "fade_in": - # Fade from dry to wet (unfiltered to filtered) - orig_direction = "down" - filter_direction = "up" - else: # fade_out - # Fade from wet to dry (filtered to unfiltered) - orig_direction = "up" - filter_direction = "down" - - # Build filter chain - orig_volume_expr = self._generate_volume_expr( - self.start_time, self.duration, orig_direction, self.curve_type - ) - filtered_volume_expr = self._generate_volume_expr( - self.start_time, self.duration, filter_direction, self.curve_type - ) - - return [ - # Pass through the other stream unchanged - f"{passthrough_input}anull[{passthrough_label}]", # codespell:ignore anull - # Split input into two paths - f"{input_label}asplit=2[{orig_label}][{filter_label}]", - # Apply frequency filter to one path - f"[{filter_label}]{self.sweep_type}=f={self.target_freq}:poles={self.poles}[{filtered_label}]", - # Apply time-varying volume to original path - f"[{orig_label}]volume={orig_volume_expr}[{orig_faded_label}]", - # Apply time-varying volume to filtered path - f"[{filtered_label}]volume={filtered_volume_expr}[{filtered_faded_label}]", - # Mix the two paths together - f"[{orig_faded_label}][{filtered_faded_label}]amix=inputs=2:duration=longest:normalize=0[{output_label}]", - ] - - def __repr__(self) -> str: - """Return string representation of FrequencySweepFilter.""" - return f"FreqSweep({self.sweep_type}@{self.target_freq}Hz)" - - -class CrossfadeFilter(Filter): - """Filter that applies the final crossfade between fadeout and fadein streams.""" - - output_fadeout_label: str = "crossfade" - output_fadein_label: str = "crossfade" - - def __init__(self, crossfade_duration: float): - """Initialize crossfade filter.""" - self.crossfade_duration = crossfade_duration - - def apply(self, input_fadein_label: str, input_fadeout_label: str) -> list[str]: - """Apply the acrossfade filter.""" - return [f"{input_fadeout_label}{input_fadein_label}acrossfade=d={self.crossfade_duration}"] - - def __repr__(self) -> str: - """Return string representation of CrossfadeFilter.""" - return f"Crossfade(d={self.crossfade_duration:.1f}s)" - - -class SmartFade(ABC): - """Abstract base class for Smart Fades.""" - - filters: list[Filter] - - def __init__(self) -> None: - """Initialize SmartFade base class.""" - self.logger = logging.getLogger(__name__) - self.filters = [] - - @abstractmethod - def _build(self) -> None: - """Build the smart fades filter chain.""" - ... - - def _get_ffmpeg_filters( - self, - input_fadein_label: str = "[1]", - input_fadeout_label: str = "[0]", - ) -> list[str]: - """Get FFmpeg filters for smart fades.""" - if not self.filters: - self._build() - filters = [] - _cur_fadein_label = input_fadein_label - _cur_fadeout_label = input_fadeout_label - for audio_filter in self.filters: - filter_strings = audio_filter.apply(_cur_fadein_label, _cur_fadeout_label) - filters.extend(filter_strings) - _cur_fadein_label = f"[{audio_filter.output_fadein_label}]" - _cur_fadeout_label = f"[{audio_filter.output_fadeout_label}]" - return filters - - async def apply( - self, - fade_out_part: bytes, - fade_in_part: bytes, - pcm_format: AudioFormat, - ) -> bytes: - """Apply the smart fade to the given PCM audio parts.""" - # Write the fade_out_part to a temporary file - fadeout_filename = f"/tmp/{shortuuid.random(20)}.pcm" # noqa: S108 - async with aiofiles.open(fadeout_filename, "wb") as outfile: - await outfile.write(fade_out_part) - args = [ - "ffmpeg", - "-hide_banner", - "-loglevel", - "error", - # Input 1: fadeout part (as file) - "-acodec", - pcm_format.content_type.name.lower(), # e.g., "pcm_f32le" not just "f32le" - "-ac", - str(pcm_format.channels), - "-ar", - str(pcm_format.sample_rate), - "-channel_layout", - "mono" if pcm_format.channels == 1 else "stereo", - "-f", - pcm_format.content_type.value, - "-i", - fadeout_filename, - # Input 2: fade_in part (stdin) - "-acodec", - pcm_format.content_type.name.lower(), - "-ac", - str(pcm_format.channels), - "-ar", - str(pcm_format.sample_rate), - "-channel_layout", - "mono" if pcm_format.channels == 1 else "stereo", - "-f", - pcm_format.content_type.value, - "-i", - "-", - ] - smart_fade_filters = self._get_ffmpeg_filters() - self.logger.debug( - "Applying smartfade: %s", - self, - ) - args.extend( - [ - "-filter_complex", - ";".join(smart_fade_filters), - # Output format specification - must match input codec format - "-acodec", - pcm_format.content_type.name.lower(), - "-ac", - str(pcm_format.channels), - "-ar", - str(pcm_format.sample_rate), - "-channel_layout", - "mono" if pcm_format.channels == 1 else "stereo", - "-f", - pcm_format.content_type.value, - "-", - ] - ) - self.logger.debug("FFmpeg smartfade args: %s", " ".join(args)) - self.logger.log(VERBOSE_LOG_LEVEL, "FFmpeg command args: %s", " ".join(args)) - - # Execute the enhanced smart fade with full buffer - _, raw_crossfade_output, stderr = await communicate(args, fade_in_part) - await remove_file(fadeout_filename) - - if raw_crossfade_output: - return raw_crossfade_output - else: - stderr_msg = stderr.decode() if stderr else "(no stderr output)" - raise RuntimeError(f"Smart crossfade failed. FFmpeg stderr: {stderr_msg}") - - def __repr__(self) -> str: - """Return string representation of SmartFade showing the filter chain.""" - if not self.filters: - return f"<{self.__class__.__name__}: 0 filters>" - - chain = " → ".join(repr(f) for f in self.filters) - return f"<{self.__class__.__name__}: {len(self.filters)} filters> {chain}" - - -class SmartCrossFade(SmartFade): - """Smart fades class that implements a Smart Fade mode.""" - - # Only apply time stretching if BPM difference is < this % - time_stretch_bpm_percentage_threshold: float = 5.0 - - def __init__( - self, fade_out_analysis: SmartFadesAnalysis, fade_in_analysis: SmartFadesAnalysis - ) -> None: - """Initialize SmartFades with analysis data. - - Args: - fade_out_analysis: Analysis data for the outgoing track - fade_in_analysis: Analysis data for the incoming track - logger: Optional logger for debug output - """ - self.fade_out_analysis = fade_out_analysis - self.fade_in_analysis = fade_in_analysis - super().__init__() - - def _build(self) -> None: - """Build the smart fades filter chain.""" - # Calculate tempo factor for time stretching - bpm_ratio = self.fade_in_analysis.bpm / self.fade_out_analysis.bpm - bpm_diff_percent = abs(1.0 - bpm_ratio) * 100 - - # Extrapolate downbeats for better bar calculation - self.extrapolated_fadeout_downbeats = extrapolate_downbeats( - self.fade_out_analysis.downbeats, - tempo_factor=1.0, - bpm=self.fade_out_analysis.bpm, - ) - - # Calculate optimal crossfade bars that fit in available buffer - crossfade_bars = self._calculate_optimal_crossfade_bars() - - # Calculate beat positions for the selected bar count - fadein_start_pos = self._calculate_optimal_fade_timing(crossfade_bars) - - # Calculate initial crossfade duration (may be adjusted later for downbeat alignment) - crossfade_duration = self._calculate_crossfade_duration(crossfade_bars=crossfade_bars) - - # Add time stretch filter if needed - if ( - 0.1 < bpm_diff_percent <= self.time_stretch_bpm_percentage_threshold - and crossfade_bars > 4 - ): - self.filters.append(TimeStretchFilter(stretch_ratio=bpm_ratio)) - # Re-extrapolate downbeats with actual tempo factor for time-stretched audio - self.extrapolated_fadeout_downbeats = extrapolate_downbeats( - self.fade_out_analysis.downbeats, - tempo_factor=bpm_ratio, - bpm=self.fade_out_analysis.bpm, - ) - - # Check if we would have enough audio after beat alignment for the crossfade - if fadein_start_pos and fadein_start_pos + crossfade_duration <= SMART_CROSSFADE_DURATION: - self.filters.append(TrimFilter(fadein_start_pos=fadein_start_pos)) - else: - self.logger.debug( - "Skipping beat alignment: not enough audio after trim (%.1fs + %.1fs > %.1fs)", - fadein_start_pos, - crossfade_duration, - SMART_CROSSFADE_DURATION, - ) - - # Adjust crossfade duration to align with outgoing track's downbeats - crossfade_duration = self._adjust_crossfade_to_downbeats( - crossfade_duration=crossfade_duration, - fadein_start_pos=fadein_start_pos, - ) - - # 90 BPM -> 1500Hz, 140 BPM -> 2500Hz - avg_bpm = (self.fade_out_analysis.bpm + self.fade_in_analysis.bpm) / 2 - crossover_freq = int(np.clip(1500 + (avg_bpm - 90) * 20, 1500, 2500)) - - # Adjust for BPM mismatch - if abs(bpm_ratio - 1.0) > 0.3: - crossover_freq = int(crossover_freq * 0.85) - - # For shorter fades, use exp/exp curves to avoid abruptness - if crossfade_bars < 8: - fadeout_curve = "exponential" - fadein_curve = "exponential" - # For long fades, use log/linear curves - else: - # Use logarithmic curve to give the next track more space - fadeout_curve = "logarithmic" - # Use linear curve for transition, predictable and not too abrupt - fadein_curve = "linear" - - # Create lowpass filter on the outgoing track (unfiltered → low-pass) - # Extended lowpass effect to gradually remove bass frequencies - fadeout_eq_duration = min(max(crossfade_duration * 2.5, 8.0), SMART_CROSSFADE_DURATION) - # The crossfade always happens at the END of the buffer - fadeout_eq_start = max(0, SMART_CROSSFADE_DURATION - fadeout_eq_duration) - fadeout_sweep = FrequencySweepFilter( - sweep_type="lowpass", - target_freq=crossover_freq, - duration=fadeout_eq_duration, - start_time=fadeout_eq_start, - sweep_direction="fade_in", - poles=1, - curve_type=fadeout_curve, - stream_type="fadeout", - ) - self.filters.append(fadeout_sweep) - - # Create high pass filter on the incoming track (high-pass → unfiltered) - # Quicker highpass removal to avoid lingering vocals after crossfade - fadein_eq_duration = crossfade_duration / 1.5 - fadein_sweep = FrequencySweepFilter( - sweep_type="highpass", - target_freq=crossover_freq, - duration=fadein_eq_duration, - start_time=0, - sweep_direction="fade_out", - poles=1, - curve_type=fadein_curve, - stream_type="fadein", - ) - self.filters.append(fadein_sweep) - - # Add final crossfade filter - crossfade_filter = CrossfadeFilter(crossfade_duration=crossfade_duration) - self.filters.append(crossfade_filter) - - def _calculate_crossfade_duration(self, crossfade_bars: int) -> float: - """Calculate final crossfade duration based on musical bars and BPM.""" - # Calculate crossfade duration based on incoming track's BPM - beats_per_bar = 4 - seconds_per_beat = 60.0 / self.fade_in_analysis.bpm - musical_duration = crossfade_bars * beats_per_bar * seconds_per_beat - - # Apply buffer constraint - actual_duration = min(musical_duration, SMART_CROSSFADE_DURATION) - - # Log if we had to constrain the duration - if musical_duration > SMART_CROSSFADE_DURATION: - self.logger.debug( - "Constraining crossfade duration from %.1fs to %.1fs (buffer limit)", - musical_duration, - actual_duration, - ) - - return actual_duration - - def _calculate_optimal_crossfade_bars(self) -> int: - """Calculate optimal crossfade bars that fit in available buffer.""" - bpm_in = self.fade_in_analysis.bpm - bpm_out = self.fade_out_analysis.bpm - bpm_diff_percent = abs(1.0 - bpm_in / bpm_out) * 100 - - # Calculate ideal bars based on BPM compatibility - ideal_bars = 10 if bpm_diff_percent <= self.time_stretch_bpm_percentage_threshold else 6 - - # Reduce bars until it fits in the fadein buffer - for bars in [ideal_bars, 8, 6, 4, 2, 1]: - if bars > ideal_bars: - continue - - fadein_start_pos = self._calculate_optimal_fade_timing(bars) - if fadein_start_pos is None: - continue - - # Calculate what the duration would be - test_duration = self._calculate_crossfade_duration(crossfade_bars=bars) - - # Check if it fits in fadein buffer - fadein_buffer = SMART_CROSSFADE_DURATION - fadein_start_pos - if test_duration <= fadein_buffer: - if bars < ideal_bars: - self.logger.debug( - "Reduced crossfade from %d to %d bars (fadein buffer=%.1fs, needed=%.1fs)", - ideal_bars, - bars, - fadein_buffer, - test_duration, - ) - return bars - - # Fall back to 1 bar if nothing else fits - return 1 - - def _calculate_optimal_fade_timing(self, crossfade_bars: int) -> float | None: - """Calculate beat positions for alignment.""" - beats_per_bar = 4 - - def calculate_beat_positions( - fade_out_beats: npt.NDArray[np.float64], - fade_in_beats: npt.NDArray[np.float64], - num_beats: int, - ) -> float | None: - """Calculate start positions from beat arrays.""" - if len(fade_out_beats) < num_beats or len(fade_in_beats) < num_beats: - return None - - fade_in_slice = fade_in_beats[:num_beats] - return float(fade_in_slice[0]) - - # Try downbeats first for most musical timing - downbeat_positions = calculate_beat_positions( - self.extrapolated_fadeout_downbeats, self.fade_in_analysis.downbeats, crossfade_bars - ) - if downbeat_positions: - return downbeat_positions - - # Try regular beats if downbeats insufficient - required_beats = crossfade_bars * beats_per_bar - beat_positions = calculate_beat_positions( - self.fade_out_analysis.beats, self.fade_in_analysis.beats, required_beats - ) - if beat_positions: - return beat_positions - - # Fallback: No beat alignment possible - self.logger.debug("No beat alignment possible (insufficient beats)") - return None - - def _adjust_crossfade_to_downbeats( - self, - crossfade_duration: float, - fadein_start_pos: float | None, - ) -> float: - """Adjust crossfade duration to align with outgoing track's downbeats.""" - # If we don't have downbeats or beat alignment is disabled, return original duration - if len(self.extrapolated_fadeout_downbeats) == 0 or fadein_start_pos is None: - return crossfade_duration - - # Calculate where the crossfade would start in the buffer - ideal_start_pos = SMART_CROSSFADE_DURATION - crossfade_duration - - # Debug logging - self.logger.debug( - "Downbeat adjustment - ideal_start=%.2fs (buffer=%.1fs - crossfade=%.2fs), " - "fadein_start=%.2fs", - ideal_start_pos, - SMART_CROSSFADE_DURATION, - crossfade_duration, - fadein_start_pos, - ) - - # Find the closest downbeats (earlier and later) - earlier_downbeat = None - later_downbeat = None - - for downbeat in self.extrapolated_fadeout_downbeats: - if downbeat <= ideal_start_pos: - earlier_downbeat = downbeat - elif downbeat > ideal_start_pos and later_downbeat is None: - later_downbeat = downbeat - break - - # Try earlier downbeat first (longer crossfade) - if earlier_downbeat is not None: - adjusted_duration = float(SMART_CROSSFADE_DURATION - earlier_downbeat) - if fadein_start_pos + adjusted_duration <= SMART_CROSSFADE_DURATION: - if abs(adjusted_duration - crossfade_duration) > 0.1: - self.logger.debug( - "Adjusted crossfade duration from %.2fs to %.2fs to align with " - "downbeat at %.2fs (earlier)", - crossfade_duration, - adjusted_duration, - earlier_downbeat, - ) - return adjusted_duration - - # Try later downbeat (shorter crossfade) - if later_downbeat is not None: - adjusted_duration = float(SMART_CROSSFADE_DURATION - later_downbeat) - if fadein_start_pos + adjusted_duration <= SMART_CROSSFADE_DURATION: - if abs(adjusted_duration - crossfade_duration) > 0.1: - self.logger.debug( - "Adjusted crossfade duration from %.2fs to %.2fs to align with " - "downbeat at %.2fs (later)", - crossfade_duration, - adjusted_duration, - later_downbeat, - ) - return adjusted_duration - - # If no suitable downbeat found, return original duration - self.logger.debug( - "Could not adjust crossfade duration to downbeats, using original %.2fs", - crossfade_duration, - ) - return crossfade_duration - - -class StandardCrossFade(SmartFade): - """Standard crossfade class that implements a standard crossfade mode.""" - - def __init__(self, crossfade_duration: float = 10.0) -> None: - """Initialize StandardCrossFade with crossfade duration.""" - self.crossfade_duration = crossfade_duration - super().__init__() - - def _build(self) -> None: - """Build the standard crossfade filter chain.""" - self.filters = [ - CrossfadeFilter(crossfade_duration=self.crossfade_duration), - ] - - async def apply( - self, fade_out_part: bytes, fade_in_part: bytes, pcm_format: AudioFormat - ) -> bytes: - """Apply the standard crossfade to the given PCM audio parts.""" - # We need to override the default apply here, since standard crossfade only needs to be - # applied to the overlapping parts, not the full buffers. - crossfade_size = int(pcm_format.pcm_sample_size * self.crossfade_duration) - # Pre-crossfade: outgoing track minus the crossfaded portion - pre_crossfade = fade_out_part[:-crossfade_size] - # Post-crossfade: incoming track minus the crossfaded portion - post_crossfade = fade_in_part[crossfade_size:] - # Adjust portions to exact crossfade size - adjusted_fade_in_part = fade_in_part[:crossfade_size] - adjusted_fade_out_part = fade_out_part[-crossfade_size:] - # Adjust the duration to match actual sizes - self.crossfade_duration = min( - len(adjusted_fade_in_part) / pcm_format.pcm_sample_size, - len(adjusted_fade_out_part) / pcm_format.pcm_sample_size, - ) - # Crossfaded portion: user's configured duration - crossfaded_section = await super().apply( - adjusted_fade_out_part, adjusted_fade_in_part, pcm_format - ) - # Full result: everything concatenated - return pre_crossfade + crossfaded_section + post_crossfade - - -############################# -# SMART FADES MIXER LOGIC -############################# -class SmartFadesMixer: - """Smart fades mixer class that mixes tracks based on analysis data.""" - - def __init__(self, mass: MusicAssistant) -> None: - """Initialize smart fades mixer.""" - self.mass = mass - self.logger = logging.getLogger(__name__) - # TODO: Refactor into stream (or metadata) controller after we have split the controllers - self.analyzer = SmartFadesAnalyzer(mass) - - async def mix( - self, - fade_in_part: bytes, - fade_out_part: bytes, - fade_in_streamdetails: StreamDetails, - fade_out_streamdetails: StreamDetails, - pcm_format: AudioFormat, - standard_crossfade_duration: int = 10, - mode: SmartFadesMode = SmartFadesMode.SMART_CROSSFADE, - ) -> bytes: - """Apply crossfade with internal state management and smart/standard fallback logic.""" - if mode == SmartFadesMode.DISABLED: - # No crossfade, just concatenate - # Note that this should not happen since we check this before calling mix() - # but just to be sure... - return fade_out_part + fade_in_part - - # strip silence from end of audio of fade_out_part - fade_out_part = await strip_silence( - self.mass, - fade_out_part, - pcm_format=pcm_format, - reverse=True, - ) - # Ensure frame alignment after silence stripping - fade_out_part = align_audio_to_frame_boundary(fade_out_part, pcm_format) - - # strip silence from begin of audio of fade_in_part - fade_in_part = await strip_silence( - self.mass, - fade_in_part, - pcm_format=pcm_format, - reverse=False, - ) - # Ensure frame alignment after silence stripping - fade_in_part = align_audio_to_frame_boundary(fade_in_part, pcm_format) - if mode == SmartFadesMode.STANDARD_CROSSFADE: - smart_fade: SmartFade = StandardCrossFade( - crossfade_duration=standard_crossfade_duration - ) - return await smart_fade.apply( - fade_out_part, - fade_in_part, - pcm_format, - ) - # Attempt smart crossfade with analysis data - fade_out_analysis: SmartFadesAnalysis | None - if stored_analysis := await self.mass.music.get_smart_fades_analysis( - fade_out_streamdetails.item_id, - fade_out_streamdetails.provider, - SmartFadesAnalysisFragment.OUTRO, - ): - fade_out_analysis = stored_analysis - else: - fade_out_analysis = await self.analyzer.analyze( - fade_out_streamdetails.item_id, - fade_out_streamdetails.provider, - SmartFadesAnalysisFragment.OUTRO, - fade_out_part, - pcm_format, - ) - - fade_in_analysis: SmartFadesAnalysis | None - if stored_analysis := await self.mass.music.get_smart_fades_analysis( - fade_in_streamdetails.item_id, - fade_in_streamdetails.provider, - SmartFadesAnalysisFragment.INTRO, - ): - fade_in_analysis = stored_analysis - else: - fade_in_analysis = await self.analyzer.analyze( - fade_in_streamdetails.item_id, - fade_in_streamdetails.provider, - SmartFadesAnalysisFragment.INTRO, - fade_in_part, - pcm_format, - ) - if ( - fade_out_analysis - and fade_in_analysis - and fade_out_analysis.confidence > 0.3 - and fade_in_analysis.confidence > 0.3 - and mode == SmartFadesMode.SMART_CROSSFADE - ): - try: - smart_fade = SmartCrossFade(fade_out_analysis, fade_in_analysis) - return await smart_fade.apply( - fade_out_part, - fade_in_part, - pcm_format, - ) - except Exception as e: - self.logger.warning( - "Smart crossfade failed: %s, falling back to standard crossfade", e - ) - - # Always fallback to Standard Crossfade in case something goes wrong - smart_fade = StandardCrossFade(crossfade_duration=standard_crossfade_duration) - return await smart_fade.apply( - fade_out_part, - fade_in_part, - pcm_format, - ) - - -# HELPER METHODS -def get_bpm_diff_percentage(bpm1: float, bpm2: float) -> float: - """Calculate BPM difference percentage between two BPM values.""" - return abs(1.0 - bpm1 / bpm2) * 100 - - -def extrapolate_downbeats( - downbeats: npt.NDArray[np.float64], - tempo_factor: float, - buffer_size: float = SMART_CROSSFADE_DURATION, - bpm: float | None = None, -) -> npt.NDArray[np.float64]: - """Extrapolate downbeats based on actual intervals when detection is incomplete. - - This is needed when we want to perform beat alignment in an 'atmospheric' outro - that does not have any detected downbeats. - - Args: - downbeats: Array of detected downbeat positions in seconds - tempo_factor: Tempo adjustment factor for time stretching - buffer_size: Maximum buffer size in seconds - bpm: Optional BPM for validation when extrapolating with only 2 downbeats - """ - # Handle case with exactly 2 downbeats (with BPM validation) - if len(downbeats) == 2 and bpm is not None: - interval = float(downbeats[1] - downbeats[0]) - - # Expected interval for this BPM (assuming 4/4 time signature) - expected_interval = (60.0 / bpm) * 4 - - # Only extrapolate if interval matches BPM within 15% tolerance - if abs(interval - expected_interval) / expected_interval < 0.15: - # Adjust detected downbeats for time stretching first - adjusted_downbeats = downbeats / tempo_factor - last_downbeat = adjusted_downbeats[-1] - - # If the last downbeat is close to the buffer end, no extrapolation needed - if last_downbeat >= buffer_size - 5: - return adjusted_downbeats - - # Adjust the interval for time stretching - adjusted_interval = interval / tempo_factor - - # Extrapolate forward from last adjusted downbeat using adjusted interval - extrapolated = [] - current_pos = last_downbeat + adjusted_interval - max_extrapolation_distance = 125.0 # Don't extrapolate more than 25s - - while ( - current_pos < buffer_size - and (current_pos - last_downbeat) <= max_extrapolation_distance - ): - extrapolated.append(current_pos) - current_pos += adjusted_interval - - if extrapolated: - # Combine adjusted detected downbeats and extrapolated downbeats - return np.concatenate([adjusted_downbeats, np.array(extrapolated)]) - - return adjusted_downbeats - # else: interval doesn't match BPM, fall through to return original - - if len(downbeats) < 2: - # Need at least 2 downbeats to extrapolate - return downbeats / tempo_factor - - # Adjust detected downbeats for time stretching first - adjusted_downbeats = downbeats / tempo_factor - last_downbeat = adjusted_downbeats[-1] - - # If the last downbeat is close to the buffer end, no extrapolation needed - if last_downbeat >= buffer_size - 5: - return adjusted_downbeats - - # Calculate intervals from ORIGINAL downbeats (before time stretching) - intervals = np.diff(downbeats) - median_interval = float(np.median(intervals)) - std_interval = float(np.std(intervals)) - - # Only extrapolate if intervals are consistent (low standard deviation) - if std_interval > 0.2: - return adjusted_downbeats - - # Adjust the interval for time stretching - # When slowing down (tempo_factor < 1.0), intervals get longer - adjusted_interval = median_interval / tempo_factor - - # Extrapolate forward from last adjusted downbeat using adjusted interval - extrapolated = [] - current_pos = last_downbeat + adjusted_interval - max_extrapolation_distance = 25.0 # Don't extrapolate more than 25s - - while current_pos < buffer_size and (current_pos - last_downbeat) <= max_extrapolation_distance: - extrapolated.append(current_pos) - current_pos += adjusted_interval - - if extrapolated: - # Combine adjusted detected downbeats and extrapolated downbeats - return np.concatenate([adjusted_downbeats, np.array(extrapolated)]) - - return adjusted_downbeats