Volume normalization improvements (#1657)
authorMarcel van der Veldt <m.vanderveldt@outlook.com>
Wed, 11 Sep 2024 21:04:35 +0000 (23:04 +0200)
committerGitHub <noreply@github.com>
Wed, 11 Sep 2024 21:04:35 +0000 (23:04 +0200)
13 files changed:
music_assistant/common/models/config_entries.py
music_assistant/common/models/streamdetails.py
music_assistant/constants.py
music_assistant/server/controllers/music.py
music_assistant/server/controllers/player_queues.py
music_assistant/server/controllers/streams.py
music_assistant/server/helpers/audio.py
music_assistant/server/helpers/tags.py
music_assistant/server/providers/airplay/__init__.py
music_assistant/server/providers/filesystem_local/__init__.py
music_assistant/server/providers/slimproto/__init__.py
music_assistant/server/providers/snapcast/__init__.py
music_assistant/server/providers/ugp/__init__.py

index 4c2a404b32a1a3d4ff3890c1edc0f98efd09b641..2c9ebd4a704dc45e3cd8c9c03709c7aed7cc77f3 100644 (file)
@@ -631,7 +631,7 @@ CONF_ENTRY_ENABLE_ICY_METADATA = ConfigEntry(
         ConfigValueOption("Profile 2 - full info (including image)", "full"),
     ),
     depends_on=CONF_FLOW_MODE,
-    default_value="basic",
+    default_value="disabled",
     label="Try to ingest metadata into stream (ICY)",
     category="advanced",
     description="Try to ingest metadata into the stream (ICY) to show track info on the player, "
index c80a1f67bc32902a50350123e288a61365edd6ef..5dc0c6b698d9b9164aed182bffdfdba473cc9b41 100644 (file)
@@ -11,17 +11,6 @@ from music_assistant.common.models.enums import MediaType, StreamType
 from music_assistant.common.models.media_items import AudioFormat
 
 
-@dataclass(kw_only=True)
-class LoudnessMeasurement(DataClassDictMixin):
-    """Model for EBU-R128 loudness measurement details."""
-
-    integrated: float
-    true_peak: float
-    lra: float
-    threshold: float
-    target_offset: float | None = None
-
-
 @dataclass(kw_only=True)
 class StreamDetails(DataClassDictMixin):
     """Model for streamdetails."""
@@ -55,11 +44,14 @@ class StreamDetails(DataClassDictMixin):
     # the fields below will be set/controlled by the streamcontroller
     seek_position: int = 0
     fade_in: bool = False
-    loudness: LoudnessMeasurement | None = None
+    enable_volume_normalization: bool = False
+    loudness: float | None = None
+    loudness_album: float | None = None
+    prefer_album_loudness: bool = False
+    force_dynamic_volume_normalization: bool = False
     queue_id: str | None = None
     seconds_streamed: float | None = None
     target_loudness: float | None = None
-    bypass_loudness_normalization: bool = False
     strip_silence_begin: bool = False
     strip_silence_end: bool = False
     stream_error: bool | None = None
index 3f76d6c4f30eb8fbe5867b3abfa149fa03e1e388..ea3c9feb2146f8f563bc846eb2e7aeb02d27b966 100644 (file)
@@ -67,17 +67,16 @@ CONF_SAMPLE_RATES: Final[str] = "sample_rates"
 CONF_HTTP_PROFILE: Final[str] = "http_profile"
 CONF_SYNC_LEADER: Final[str] = "sync_leader"
 CONF_BYPASS_NORMALIZATION_RADIO: Final[str] = "bypass_normalization_radio"
-CONF_BYPASS_NORMALIZATION_SHORT: Final[str] = "bypass_normalization_short"
 CONF_PREVENT_SYNC_LEADER_OFF: Final[str] = "prevent_sync_leader_off"
 CONF_SYNCGROUP_DEFAULT_ON: Final[str] = "syncgroup_default_on"
 CONF_ENABLE_ICY_METADATA: Final[str] = "enable_icy_metadata"
+CONF_VOLUME_NORMALIZATION_RADIO: Final[str] = "volume_normalization_radio"
 
 # config default values
 DEFAULT_HOST: Final[str] = "0.0.0.0"
 DEFAULT_PORT: Final[int] = 8095
 
 # common db tables
-DB_TABLE_TRACK_LOUDNESS: Final[str] = "track_loudness"
 DB_TABLE_PLAYLOG: Final[str] = "playlog"
 DB_TABLE_ARTISTS: Final[str] = "artists"
 DB_TABLE_ALBUMS: Final[str] = "albums"
@@ -91,6 +90,7 @@ DB_TABLE_PROVIDER_MAPPINGS: Final[str] = "provider_mappings"
 DB_TABLE_ALBUM_TRACKS: Final[str] = "album_tracks"
 DB_TABLE_TRACK_ARTISTS: Final[str] = "track_artists"
 DB_TABLE_ALBUM_ARTISTS: Final[str] = "album_artists"
+DB_TABLE_LOUDNESS_MEASUREMENTS: Final[str] = "loudness_measurements"
 
 
 # all other
index 58a13f3353ed1b5210f0439b1bbf5c31d68db66e..e96384119b1955d7df0b05fb9048c6370e2e415c 100644 (file)
@@ -37,19 +37,18 @@ from music_assistant.common.models.media_items import (
     SearchResults,
 )
 from music_assistant.common.models.provider import SyncTask
-from music_assistant.common.models.streamdetails import LoudnessMeasurement
 from music_assistant.constants import (
     DB_TABLE_ALBUM_ARTISTS,
     DB_TABLE_ALBUM_TRACKS,
     DB_TABLE_ALBUMS,
     DB_TABLE_ARTISTS,
+    DB_TABLE_LOUDNESS_MEASUREMENTS,
     DB_TABLE_PLAYLISTS,
     DB_TABLE_PLAYLOG,
     DB_TABLE_PROVIDER_MAPPINGS,
     DB_TABLE_RADIOS,
     DB_TABLE_SETTINGS,
     DB_TABLE_TRACK_ARTISTS,
-    DB_TABLE_TRACK_LOUDNESS,
     DB_TABLE_TRACKS,
     PROVIDERS_WITH_SHAREABLE_URLS,
 )
@@ -73,7 +72,7 @@ DEFAULT_SYNC_INTERVAL = 3 * 60  # default sync interval in minutes
 CONF_SYNC_INTERVAL = "sync_interval"
 CONF_DELETED_PROVIDERS = "deleted_providers"
 CONF_ADD_LIBRARY_ON_PLAY = "add_library_on_play"
-DB_SCHEMA_VERSION: Final[int] = 8
+DB_SCHEMA_VERSION: Final[int] = 9
 
 
 class MusicController(CoreController):
@@ -662,47 +661,43 @@ class MusicController(CoreController):
         await self.mass.metadata.update_metadata(library_item, force_refresh=True)
         return library_item
 
-    async def set_track_loudness(
-        self, item_id: str, provider_instance_id_or_domain: str, loudness: LoudnessMeasurement
+    async def set_loudness(
+        self,
+        item_id: str,
+        provider_instance_id_or_domain: str,
+        loudness: float,
+        album_loudness: float | None = None,
+        media_type: MediaType = MediaType.TRACK,
     ) -> None:
-        """Store Loudness Measurement for a track in db."""
-        if provider := self.mass.get_provider(provider_instance_id_or_domain):
-            await self.database.insert(
-                DB_TABLE_TRACK_LOUDNESS,
-                {
-                    "item_id": item_id,
-                    "provider": provider.lookup_key,
-                    "integrated": round(loudness.integrated, 2),
-                    "true_peak": round(loudness.true_peak, 2),
-                    "lra": round(loudness.lra, 2),
-                    "threshold": round(loudness.threshold, 2),
-                    "target_offset": round(loudness.target_offset, 2),
-                },
-                allow_replace=True,
-            )
+        """Store (EBU-R128) Integrated Loudness Measurement for a mediaitem in db."""
+        values = {
+            "item_id": item_id,
+            "media_type": media_type.value,
+            "provider": provider_instance_id_or_domain,
+            "loudness": loudness,
+        }
+        if album_loudness is not None:
+            values["loudness_album"] = album_loudness
+        await self.database.insert_or_replace(DB_TABLE_LOUDNESS_MEASUREMENTS, values)
+
+    async def get_loudness(
+        self,
+        item_id: str,
+        provider_instance_id_or_domain: str,
+        media_type: MediaType = MediaType.TRACK,
+    ) -> tuple[float, float] | None:
+        """Get (EBU-R128) Integrated Loudness Measurement for a mediaitem in db."""
+        db_row = await self.database.get_row(
+            DB_TABLE_LOUDNESS_MEASUREMENTS,
+            {
+                "item_id": item_id,
+                "media_type": media_type.value,
+                "provider": provider_instance_id_or_domain,
+            },
+        )
+        if db_row and db_row["loudness"] != inf and db_row["loudness"] != -inf:
+            return (db_row["loudness"], db_row["loudness_album"])
 
-    async def get_track_loudness(
-        self, item_id: str, provider_instance_id_or_domain: str
-    ) -> LoudnessMeasurement | None:
-        """Get Loudness Measurement for a track in db."""
-        if provider := self.mass.get_provider(provider_instance_id_or_domain):
-            if result := await self.database.get_row(
-                DB_TABLE_TRACK_LOUDNESS,
-                {
-                    "item_id": item_id,
-                    "provider": provider.lookup_key,
-                },
-            ):
-                if result["integrated"] == inf or result["integrated"] == -inf:
-                    return None
-
-                return LoudnessMeasurement(
-                    integrated=result["integrated"],
-                    true_peak=result["true_peak"],
-                    lra=result["lra"],
-                    threshold=result["threshold"],
-                    target_offset=result["target_offset"],
-                )
         return None
 
     async def mark_item_played(
@@ -1064,7 +1059,6 @@ class MusicController(CoreController):
                 DB_TABLE_RADIOS,
                 DB_TABLE_ALBUM_TRACKS,
                 DB_TABLE_PLAYLOG,
-                DB_TABLE_TRACK_LOUDNESS,
                 DB_TABLE_PROVIDER_MAPPINGS,
             ):
                 await self.database.execute(f"DROP TABLE IF EXISTS {table}")
@@ -1098,6 +1092,24 @@ class MusicController(CoreController):
                 if "duplicate column" not in str(err):
                     raise
 
+        if prev_version <= 8:
+            # migrate track_loudness --> loudness_measurements
+            async for db_row in self.database.iter_items("track_loudness"):
+                if db_row["integrated"] == inf or db_row["integrated"] == -inf:
+                    continue
+                if db_row["provider"] in ("radiobrowser", "tunein"):
+                    continue
+                await self.database.insert_or_replace(
+                    DB_TABLE_LOUDNESS_MEASUREMENTS,
+                    {
+                        "item_id": db_row["item_id"],
+                        "media_type": "track",
+                        "provider": db_row["provider"],
+                        "loudness": db_row["integrated"],
+                    },
+                )
+            await self.database.execute("DROP TABLE IF EXISTS track_loudness")
+
         # save changes
         await self.database.commit()
 
@@ -1121,18 +1133,6 @@ class MusicController(CoreController):
                     [type] TEXT
                 );"""
         )
-        await self.database.execute(
-            f"""CREATE TABLE IF NOT EXISTS {DB_TABLE_TRACK_LOUDNESS}(
-                    [id] INTEGER PRIMARY KEY AUTOINCREMENT,
-                    [item_id] TEXT NOT NULL,
-                    [provider] TEXT NOT NULL,
-                    [integrated] REAL,
-                    [true_peak] REAL,
-                    [lra] REAL,
-                    [threshold] REAL,
-                    [target_offset] REAL,
-                    UNIQUE(item_id, provider));"""
-        )
         await self.database.execute(
             f"""CREATE TABLE IF NOT EXISTS {DB_TABLE_PLAYLOG}(
                 [id] INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -1270,6 +1270,18 @@ class MusicController(CoreController):
             UNIQUE(album_id, artist_id)
             );"""
         )
+
+        await self.database.execute(
+            f"""CREATE TABLE IF NOT EXISTS {DB_TABLE_LOUDNESS_MEASUREMENTS}(
+                    [id] INTEGER PRIMARY KEY AUTOINCREMENT,
+                    [media_type] TEXT NOT NULL,
+                    [item_id] TEXT NOT NULL,
+                    [provider] TEXT NOT NULL,
+                    [loudness] REAL,
+                    [loudness_album] REAL,
+                    UNIQUE(media_type,item_id,provider));"""
+        )
+
         await self.database.commit()
 
     async def __create_database_indexes(self) -> None:
@@ -1366,6 +1378,11 @@ class MusicController(CoreController):
             f"CREATE INDEX IF NOT EXISTS {DB_TABLE_ALBUM_ARTISTS}_artist_id_idx "
             f"on {DB_TABLE_ALBUM_ARTISTS}(artist_id);"
         )
+        # index on loudness measurements table
+        await self.database.execute(
+            f"CREATE INDEX IF NOT EXISTS {DB_TABLE_LOUDNESS_MEASUREMENTS}_idx "
+            f"on {DB_TABLE_LOUDNESS_MEASUREMENTS}(media_type,item_id,provider);"
+        )
         await self.database.commit()
 
     async def __create_database_triggers(self) -> None:
index e4584ec028b28939217a050c6e7c297107a7089d..c47425fe082426ece1b6fbf9c6f849b61cb3a3f7 100644 (file)
@@ -785,10 +785,34 @@ class PlayerQueuesController(CoreController):
         queue.flow_mode = player_needs_flow_mode and next_index is not None
         queue.stream_finished = False
         queue.end_of_track_reached = False
+
+        # work out if we are playing an album and if we should prefer album loudness
+        if (
+            next_index is not None
+            and (next_item := self.get_item(queue_id, next_index))
+            and (
+                queue_item.media_item
+                and hasattr(queue_item.media_item, "album")
+                and hasattr(next_item.media_item, "album")
+                and queue_item.media_item.album
+                and next_item.media_item
+                and next_item.media_item.album
+                and queue_item.media_item.album.item_id == next_item.media_item.album.item_id
+            )
+        ):
+            prefer_album_loudness = True
+        else:
+            prefer_album_loudness = False
+
         # get streamdetails - do this here to catch unavailable items early
         queue_item.streamdetails = await get_stream_details(
-            self.mass, queue_item, seek_position=seek_position, fade_in=fade_in
+            self.mass,
+            queue_item,
+            seek_position=seek_position,
+            fade_in=fade_in,
+            prefer_album_loudness=prefer_album_loudness,
         )
+
         # allow stripping silence from the end of the track if crossfade is enabled
         # this will allow for smoother crossfades
         if await self.mass.config.get_player_config_value(queue_id, CONF_CROSSFADE):
@@ -1054,11 +1078,30 @@ class PlayerQueuesController(CoreController):
             if next_index is None:
                 raise QueueEmpty("No more tracks left in the queue.")
             queue_item = self.get_item(queue_id, next_index)
+
+            # work out if we are playing an album and if we should prefer album loudness
+            if (
+                next_index is not None
+                and (next_item := self.get_item(queue_id, next_index))
+                and (
+                    queue_item.media_item
+                    and queue_item.media_item.album
+                    and next_item.media_item
+                    and next_item.media_item.album
+                    and queue_item.media_item.album.item_id == next_item.media_item.album.item_id
+                )
+            ):
+                prefer_album_loudness = True
+            else:
+                prefer_album_loudness = False
+
             try:
                 # Check if the QueueItem is playable. For example, YT Music returns Radio Items
                 # that are not playable which will stop playback.
                 queue_item.streamdetails = await get_stream_details(
-                    mass=self.mass, queue_item=queue_item
+                    mass=self.mass,
+                    queue_item=queue_item,
+                    prefer_album_loudness=prefer_album_loudness,
                 )
                 # Preload the full MediaItem for the QueueItem, making sure to get the
                 # maximum quality of thumbs
index 6aed2e99f81ba4bfa523a8f9e4324638307ef714..e51d7765166aee7e6eb4ca15c7272428abaaf9a5 100644 (file)
@@ -19,6 +19,7 @@ from aiohttp import web
 
 from music_assistant.common.helpers.util import get_ip, select_free_port, try_parse_bool
 from music_assistant.common.models.config_entries import (
+    CONF_ENTRY_ENABLE_ICY_METADATA,
     ConfigEntry,
     ConfigValueOption,
     ConfigValueType,
@@ -31,15 +32,14 @@ from music_assistant.constants import (
     ANNOUNCE_ALERT_FILE,
     CONF_BIND_IP,
     CONF_BIND_PORT,
-    CONF_BYPASS_NORMALIZATION_RADIO,
-    CONF_BYPASS_NORMALIZATION_SHORT,
     CONF_CROSSFADE,
     CONF_CROSSFADE_DURATION,
-    CONF_ENABLE_ICY_METADATA,
     CONF_HTTP_PROFILE,
     CONF_OUTPUT_CHANNELS,
     CONF_PUBLISH_IP,
     CONF_SAMPLE_RATES,
+    CONF_VOLUME_NORMALIZATION,
+    CONF_VOLUME_NORMALIZATION_RADIO,
     MASS_LOGO_ONLINE,
     SILENCE_FILE,
     VERBOSE_LOG_LEVEL,
@@ -172,26 +172,23 @@ class StreamsController(CoreController):
                 category="advanced",
             ),
             ConfigEntry(
-                key=CONF_BYPASS_NORMALIZATION_RADIO,
-                type=ConfigEntryType.BOOLEAN,
-                default_value=True,
-                label="Bypass volume normalization for radio streams",
-                description="Radio streams are often already normalized according "
-                "to the EBU standard, so it doesn't make a lot of sense to normalize them again "
-                "in Music Assistant unless you hear big jumps in volume during playback, "
-                "such as commercials.",
-                category="advanced",
-            ),
-            ConfigEntry(
-                key=CONF_BYPASS_NORMALIZATION_SHORT,
-                type=ConfigEntryType.BOOLEAN,
-                default_value=True,
-                label="Bypass volume normalization for effects and short sounds",
-                description="The volume normalizer of ffmpeg (used in Music Assistant), "
-                "is designed to work best with longer audio streams and can have troubles when "
-                "its applied to very short sound clips (< 30 seconds), "
-                "for example sound effects. With this option enabled, the volume normalizer "
-                "will be bypassed for all audio that has a duration of less than 60 seconds.",
+                key=CONF_VOLUME_NORMALIZATION_RADIO,
+                type=ConfigEntryType.STRING,
+                default_value="standard",
+                label="Volume normalization method to use for radio streams",
+                description="Radio streams often have varying loudness levels, especially "
+                "during announcements and commercials. \n"
+                "You can choose to enforce dynamic volume normalization to radio streams, "
+                "even if a (average) loudness measurement for the radio station exists. \n\n"
+                "Options: \n"
+                "- Disabled - do not apply volume normalization at all \n"
+                "- Force dynamic - Enforce dynamic volume levelling at all times \n"
+                "- Standard - use normalization based on previous measurement, ",
+                options=(
+                    ConfigValueOption("Disabled", "disabled"),
+                    ConfigValueOption("Force dynamic", "dynamic"),
+                    ConfigValueOption("Standard", "standard"),
+                ),
                 category="advanced",
             ),
         )
@@ -339,10 +336,19 @@ class StreamsController(CoreController):
             queue.display_name,
         )
         self.mass.player_queues.track_loaded_in_buffer(queue_id, queue_item_id)
+
+        # pick pcm format based on the streamdetails and player capabilities
+        if self.mass.config.get_raw_player_config_value(queue_id, CONF_VOLUME_NORMALIZATION, True):
+            # prefer f32 when volume normalization is enabled
+            bit_depth = 32
+            floating_point = True
+        else:
+            bit_depth = queue_item.streamdetails.audio_format.bit_depth
+            floating_point = False
         pcm_format = AudioFormat(
-            content_type=ContentType.from_bit_depth(output_format.bit_depth),
+            content_type=ContentType.from_bit_depth(bit_depth, floating_point),
             sample_rate=queue_item.streamdetails.audio_format.sample_rate,
-            bit_depth=queue_item.streamdetails.audio_format.bit_depth,
+            bit_depth=bit_depth,
             channels=2,
         )
         chunk_num = 0
@@ -397,10 +403,12 @@ class StreamsController(CoreController):
         )
         # work out ICY metadata support
         icy_preference = self.mass.config.get_raw_player_config_value(
-            queue_id, CONF_ENABLE_ICY_METADATA, "basic"
+            queue_id,
+            CONF_ENTRY_ENABLE_ICY_METADATA.key,
+            CONF_ENTRY_ENABLE_ICY_METADATA.default_value,
         )
         enable_icy = request.headers.get("Icy-MetaData", "") == "1" and icy_preference != "disabled"
-        icy_meta_interval = 16384
+        icy_meta_interval = 256000 if icy_preference == "full" else 16384
 
         # prepare request, add some DLNA/UPNP compatible headers
         http_profile: str = await self.mass.config.get_player_config_value(
@@ -652,8 +660,7 @@ class StreamsController(CoreController):
                     crossfade_part = await crossfade_pcm_parts(
                         fadein_part,
                         last_fadeout_part,
-                        pcm_format.bit_depth,
-                        pcm_format.sample_rate,
+                        pcm_format=pcm_format,
                     )
                     # send crossfade_part (as one big chunk)
                     bytes_written += len(crossfade_part)
@@ -761,37 +768,24 @@ class StreamsController(CoreController):
         # collect all arguments for ffmpeg
         filter_params = []
         extra_input_args = []
-        # add loudnorm filter: volume normalization
-        # more info: https://k.ylo.ph/2016/04/04/loudnorm.html
-        if (
-            streamdetails.target_loudness is not None
-            and not streamdetails.bypass_loudness_normalization
-        ):
-            if streamdetails.loudness:
-                # we have a measurement so we can do linear mode
-                target_loudness = streamdetails.target_loudness
-                # we must ensure that target loudness does not exceed the measured value
-                # otherwise ffmpeg falls back to dynamic again
-                # https://github.com/slhck/ffmpeg-normalize/issues/251
-                target_loudness = min(
-                    streamdetails.target_loudness,
-                    streamdetails.loudness.integrated + streamdetails.loudness.lra - 1,
-                )
-                filter_rule = f"loudnorm=I={target_loudness}:TP=-2.0:LRA=7.0:linear=true"
-                filter_rule += f":measured_I={streamdetails.loudness.integrated}"
-                filter_rule += f":measured_LRA={streamdetails.loudness.lra}"
-                filter_rule += f":measured_tp={streamdetails.loudness.true_peak}"
-                filter_rule += f":measured_thresh={streamdetails.loudness.threshold}"
-                if streamdetails.loudness.target_offset is not None:
-                    filter_rule += f":offset={streamdetails.loudness.target_offset}"
-            else:
-                # if we have no measurement, we use dynamic mode
+        # handle volume normalization
+        if streamdetails.enable_volume_normalization and streamdetails.target_loudness is not None:
+            if streamdetails.force_dynamic_volume_normalization or streamdetails.loudness is None:
+                # volume normalization with unknown loudness measurement
+                # use loudnorm filter in dynamic mode
                 # which also collects the measurement on the fly during playback
+                # more info: https://k.ylo.ph/2016/04/04/loudnorm.html
                 filter_rule = (
-                    f"loudnorm=I={streamdetails.target_loudness}:TP=-2.0:LRA=7.0:offset=0.0"
+                    f"loudnorm=I={streamdetails.target_loudness}:TP=-2.0:LRA=10.0:offset=0.0"
                 )
-            filter_rule += ":print_format=json"
-            filter_params.append(filter_rule)
+                filter_rule += ":print_format=json"
+                filter_params.append(filter_rule)
+            else:
+                # volume normalization with known loudness measurement
+                # apply fixed volume/gain correction
+                gain_correct = streamdetails.target_loudness - streamdetails.loudness
+                gain_correct = round(gain_correct, 2)
+                filter_params.append(f"volume={gain_correct}dB")
         if streamdetails.stream_type == StreamType.CUSTOM:
             audio_source = self.mass.get_provider(streamdetails.provider).get_audio_stream(
                 streamdetails,
@@ -911,9 +905,17 @@ class StreamsController(CoreController):
             if sample_rate in supported_sample_rates:
                 output_sample_rate = sample_rate
                 break
-        output_bit_depth = min(24, player_max_bit_depth)
+        if self.mass.config.get_raw_player_config_value(
+            player.player_id, CONF_VOLUME_NORMALIZATION, True
+        ):
+            # prefer f32 when volume normalization is enabled
+            output_bit_depth = 32
+            floating_point = True
+        else:
+            output_bit_depth = min(24, player_max_bit_depth)
+            floating_point = False
         return AudioFormat(
-            content_type=ContentType.from_bit_depth(output_bit_depth),
+            content_type=ContentType.from_bit_depth(output_bit_depth, floating_point),
             sample_rate=output_sample_rate,
             bit_depth=output_bit_depth,
             channels=2,
index 62099ff6ea409a66bdbedcf8daa436d92ea036a4..4809b8ee7e93efa1e271e8587069f46412461661 100644 (file)
@@ -32,15 +32,14 @@ from music_assistant.common.models.errors import (
     MusicAssistantError,
 )
 from music_assistant.common.models.media_items import AudioFormat, ContentType
-from music_assistant.common.models.streamdetails import LoudnessMeasurement, StreamDetails
+from music_assistant.common.models.streamdetails import StreamDetails
 from music_assistant.constants import (
-    CONF_BYPASS_NORMALIZATION_RADIO,
-    CONF_BYPASS_NORMALIZATION_SHORT,
     CONF_EQ_BASS,
     CONF_EQ_MID,
     CONF_EQ_TREBLE,
     CONF_OUTPUT_CHANNELS,
     CONF_VOLUME_NORMALIZATION,
+    CONF_VOLUME_NORMALIZATION_RADIO,
     CONF_VOLUME_NORMALIZATION_TARGET,
     MASS_LOGGER_NAME,
     VERBOSE_LOG_LEVEL,
@@ -209,12 +208,10 @@ class FFMpeg(AsyncProcess):
 async def crossfade_pcm_parts(
     fade_in_part: bytes,
     fade_out_part: bytes,
-    bit_depth: int,
-    sample_rate: int,
+    pcm_format: AudioFormat,
 ) -> bytes:
     """Crossfade two chunks of pcm/raw audio using ffmpeg."""
-    sample_size = int(sample_rate * (bit_depth / 8) * 2)
-    fmt = ContentType.from_bit_depth(bit_depth)
+    sample_size = pcm_format.pcm_sample_size
     # calculate the fade_length from the smallest chunk
     fade_length = min(len(fade_in_part), len(fade_out_part)) / sample_size
     fadeoutfile = create_tempfile()
@@ -228,24 +225,24 @@ async def crossfade_pcm_parts(
         "quiet",
         # fadeout part (as file)
         "-acodec",
-        fmt.name.lower(),
+        pcm_format.content_type.name.lower(),
         "-f",
-        fmt,
+        pcm_format.content_type.value,
         "-ac",
-        "2",
+        str(pcm_format.channels),
         "-ar",
-        str(sample_rate),
+        str(pcm_format.sample_rate),
         "-i",
         fadeoutfile.name,
         # fade_in part (stdin)
         "-acodec",
-        fmt.name.lower(),
+        pcm_format.content_type.name.lower(),
         "-f",
-        fmt,
+        pcm_format.content_type.value,
         "-ac",
-        "2",
+        str(pcm_format.channels),
         "-ar",
-        str(sample_rate),
+        str(pcm_format.sample_rate),
         "-i",
         "-",
         # filter args
@@ -253,7 +250,7 @@ async def crossfade_pcm_parts(
         f"[0][1]acrossfade=d={fade_length}",
         # output args
         "-f",
-        fmt,
+        pcm_format.content_type.value,
         "-",
     ]
     _returncode, crossfaded_audio, _stderr = await communicate(args, fade_in_part)
@@ -279,22 +276,20 @@ async def crossfade_pcm_parts(
 async def strip_silence(
     mass: MusicAssistant,  # noqa: ARG001
     audio_data: bytes,
-    sample_rate: int,
-    bit_depth: int,
+    pcm_format: AudioFormat,
     reverse: bool = False,
 ) -> bytes:
     """Strip silence from begin or end of pcm audio using ffmpeg."""
-    fmt = ContentType.from_bit_depth(bit_depth)
     args = ["ffmpeg", "-hide_banner", "-loglevel", "quiet"]
     args += [
         "-acodec",
-        fmt.name.lower(),
+        pcm_format.content_type.name.lower(),
         "-f",
-        fmt,
+        pcm_format.content_type.value,
         "-ac",
-        "2",
+        str(pcm_format.channels),
         "-ar",
-        str(sample_rate),
+        str(pcm_format.sample_rate),
         "-i",
         "-",
     ]
@@ -310,14 +305,13 @@ async def strip_silence(
             "atrim=start=0.2,silenceremove=start_periods=1:start_silence=0.1:start_threshold=0.02",
         ]
     # output args
-    args += ["-f", fmt, "-"]
+    args += ["-f", pcm_format.content_type.value, "-"]
     _returncode, stripped_data, _stderr = await communicate(args, audio_data)
 
     # return stripped audio
     bytes_stripped = len(audio_data) - len(stripped_data)
     if LOGGER.isEnabledFor(VERBOSE_LOG_LEVEL):
-        pcm_sample_size = int(sample_rate * (bit_depth / 8) * 2)
-        seconds_stripped = round(bytes_stripped / pcm_sample_size, 2)
+        seconds_stripped = round(bytes_stripped / pcm_format.pcm_sample_size, 2)
         location = "end" if reverse else "begin"
         LOGGER.log(
             VERBOSE_LOG_LEVEL,
@@ -334,6 +328,7 @@ async def get_stream_details(
     queue_item: QueueItem,
     seek_position: int = 0,
     fade_in: bool = False,
+    prefer_album_loudness: bool = False,
 ) -> StreamDetails:
     """Get streamdetails for the given QueueItem.
 
@@ -395,24 +390,25 @@ async def get_stream_details(
     if not streamdetails.duration:
         streamdetails.duration = queue_item.duration
     # handle volume normalization details
-    is_radio = streamdetails.media_type == MediaType.RADIO or not streamdetails.duration
-    streamdetails.bypass_loudness_normalization = (
-        is_radio
-        and await mass.config.get_core_config_value("streams", CONF_BYPASS_NORMALIZATION_RADIO)
-    ) or (
-        streamdetails.duration is not None
-        and streamdetails.duration < 30
-        and await mass.config.get_core_config_value("streams", CONF_BYPASS_NORMALIZATION_SHORT)
-    )
-    if not streamdetails.loudness:
-        streamdetails.loudness = await mass.music.get_track_loudness(
-            streamdetails.item_id, streamdetails.provider
-        )
+    if result := await mass.music.get_loudness(
+        streamdetails.item_id,
+        streamdetails.provider,
+        media_type=queue_item.media_type,
+    ):
+        streamdetails.loudness, streamdetails.loudness_album = result
+    streamdetails.prefer_album_loudness = prefer_album_loudness
     player_settings = await mass.config.get_player_config(streamdetails.queue_id)
-    if not player_settings.get_value(CONF_VOLUME_NORMALIZATION):
-        streamdetails.target_loudness = None
-    else:
-        streamdetails.target_loudness = player_settings.get_value(CONF_VOLUME_NORMALIZATION_TARGET)
+    streamdetails.enable_volume_normalization = player_settings.get_value(CONF_VOLUME_NORMALIZATION)
+    streamdetails.target_loudness = player_settings.get_value(CONF_VOLUME_NORMALIZATION_TARGET)
+
+    radio_norm_pref = await mass.config.get_core_config_value(
+        "streams", CONF_VOLUME_NORMALIZATION_RADIO
+    )
+    if streamdetails.media_type == MediaType.RADIO and radio_norm_pref == "disabled":
+        streamdetails.enable_volume_normalization = False
+    elif streamdetails.media_type == MediaType.RADIO and radio_norm_pref == "dynamic":
+        streamdetails.force_dynamic_volume_normalization = True
+
     process_time = int((time.time() - time_start) * 1000)
     LOGGER.debug("retrieved streamdetails for %s in %s milliseconds", queue_item.uri, process_time)
     return streamdetails
@@ -449,6 +445,12 @@ async def get_media_stream(
             logger=logger,
         ) as ffmpeg_proc:
             async for chunk in ffmpeg_proc.iter_chunked(pcm_format.pcm_sample_size):
+                # for radio streams we just yield all chunks directly
+                if streamdetails.media_type == MediaType.RADIO:
+                    yield chunk
+                    bytes_sent += len(chunk)
+                    continue
+
                 chunk_number += 1
                 # determine buffer size dynamically
                 if chunk_number < 5 and strip_silence_begin:
@@ -469,7 +471,7 @@ async def get_media_stream(
                 if chunk_number == 5 and strip_silence_begin:
                     # strip silence from begin of audio
                     chunk = await strip_silence(  # noqa: PLW2901
-                        mass, buffer, pcm_format.sample_rate, pcm_format.bit_depth
+                        mass, buffer, pcm_format=pcm_format
                     )
                     bytes_sent += len(chunk)
                     yield chunk
@@ -488,8 +490,7 @@ async def get_media_stream(
                 buffer = await strip_silence(
                     mass,
                     buffer,
-                    sample_rate=pcm_format.sample_rate,
-                    bit_depth=pcm_format.bit_depth,
+                    pcm_format=pcm_format,
                     reverse=True,
                 )
             # send remaining bytes in buffer
@@ -534,18 +535,22 @@ async def get_media_stream(
             streamdetails.duration = seconds_streamed
 
         # parse loudnorm data if we have that collected
-        if loudness_details := parse_loudnorm(" ".join(ffmpeg_proc.log_history)):
-            required_seconds = 600 if streamdetails.media_type == MediaType.RADIO else 120
-            if finished or (seconds_streamed >= required_seconds):
+        required_seconds = 600 if streamdetails.media_type == MediaType.RADIO else 120
+        if streamdetails.loudness is None and (finished or (seconds_streamed >= required_seconds)):
+            loudness_details = parse_loudnorm(" ".join(ffmpeg_proc.log_history))
+            if loudness_details is not None:
                 logger.debug(
-                    "Loudness measurement for %s: %s",
+                    "Loudness measurement for %s: %s dB",
                     streamdetails.uri,
                     loudness_details,
                 )
                 streamdetails.loudness = loudness_details
                 mass.create_task(
-                    mass.music.set_track_loudness(
-                        streamdetails.item_id, streamdetails.provider, loudness_details
+                    mass.music.set_loudness(
+                        streamdetails.item_id,
+                        streamdetails.provider,
+                        loudness_details,
+                        media_type=streamdetails.media_type,
                     )
                 )
         # report playback
@@ -1101,6 +1106,9 @@ def get_player_filter_params(
     elif conf_channels == "right":
         filter_params.append("pan=mono|c0=FR")
 
+    # add a peak limiter at the end of the filter chain
+    filter_params.append("alimiter=limit=-2dB:level=false:asc=true")
+
     return filter_params
 
 
@@ -1229,7 +1237,7 @@ def get_ffmpeg_args(
             resample_filter += f":osr={output_format.sample_rate}"
 
         # bit depth conversion: apply dithering when going down to 16 bits
-        if output_format.bit_depth < input_format.bit_depth:
+        if output_format.bit_depth == 16 and input_format.bit_depth > 16:
             resample_filter += ":osf=s16:dither_method=triangular_hp"
 
         filter_params.append(resample_filter)
@@ -1240,7 +1248,7 @@ def get_ffmpeg_args(
     return generic_args + input_args + extra_args + output_args
 
 
-def parse_loudnorm(raw_stderr: bytes | str) -> LoudnessMeasurement | None:
+def parse_loudnorm(raw_stderr: bytes | str) -> float | None:
     """Parse Loudness measurement from ffmpeg stderr output."""
     stderr_data = raw_stderr.decode() if isinstance(raw_stderr, bytes) else raw_stderr
     if "[Parsed_loudnorm_" not in stderr_data:
@@ -1252,10 +1260,4 @@ def parse_loudnorm(raw_stderr: bytes | str) -> LoudnessMeasurement | None:
         loudness_data = json_loads(stderr_data)
     except JSON_DECODE_EXCEPTIONS:
         return None
-    return LoudnessMeasurement(
-        integrated=float(loudness_data["input_i"]),
-        true_peak=float(loudness_data["input_tp"]),
-        lra=float(loudness_data["input_lra"]),
-        threshold=float(loudness_data["input_thresh"]),
-        target_offset=float(loudness_data["target_offset"]),
-    )
+    return float(loudness_data["input_i"])
index bea02281997a74275be1225c510c1ff095ff90fc..f0014b17362c44b6c1a4b896d3c9537939b23379 100644 (file)
@@ -329,6 +329,24 @@ class AudioTags:
                 return value
         return None
 
+    @property
+    def track_loudness(self) -> float | None:
+        """Try to read/calculate the integrated loudness from the tags."""
+        if (tag := self.tags.get("r128trackgain")) is not None:
+            return -23 - float(int(tag.split(" ")[0]) / 256)
+        if (tag := self.tags.get("replaygaintrackgain")) is not None:
+            return -18 - float(tag.split(" ")[0])
+        return None
+
+    @property
+    def track_album_loudness(self) -> float | None:
+        """Try to read/calculate the integrated loudness from the tags (album level)."""
+        if tag := self.tags.get("r128albumgain"):
+            return -23 - float(int(tag.split(" ")[0]) / 256)
+        if (tag := self.tags.get("replaygainalbumgain")) is not None:
+            return -18 - float(tag.split(" ")[0])
+        return None
+
     @classmethod
     def parse(cls, raw: dict) -> AudioTags:
         """Parse instance from raw ffmpeg info output."""
index 08e9293544aa7c1bd23a9dfb71c1373c8e7907e2..7d35ff48a760d8401135b157717d5920c02b9eee 100644 (file)
@@ -110,6 +110,11 @@ CONF_CREDENTIALS = "credentials"
 CACHE_KEY_PREV_VOLUME = "airplay_prev_volume"
 FALLBACK_VOLUME = 20
 
+AIRPLAY_FLOW_PCM_FORMAT = AudioFormat(
+    content_type=ContentType.PCM_F32LE,
+    sample_rate=44100,
+    bit_depth=32,
+)
 AIRPLAY_PCM_FORMAT = AudioFormat(
     content_type=ContentType.from_bit_depth(16), sample_rate=44100, bit_depth=16
 )
@@ -674,23 +679,15 @@ class AirplayProvider(PlayerProvider):
                 ugp_stream = ugp_provider.streams[media.queue_id]
                 input_format = ugp_stream.output_format
                 audio_source = ugp_stream.subscribe()
-            elif media.media_type == MediaType.RADIO and media.queue_id and media.queue_item_id:
-                # radio stream - consume media stream directly
-                input_format = AIRPLAY_PCM_FORMAT
-                queue_item = self.mass.player_queues.get_item(media.queue_id, media.queue_item_id)
-                audio_source = self.mass.streams.get_media_stream(
-                    streamdetails=queue_item.streamdetails,
-                    pcm_format=AIRPLAY_PCM_FORMAT,
-                )
             elif media.queue_id and media.queue_item_id:
                 # regular queue (flow) stream request
-                input_format = AIRPLAY_PCM_FORMAT
+                input_format = AIRPLAY_FLOW_PCM_FORMAT
                 audio_source = self.mass.streams.get_flow_stream(
                     queue=self.mass.player_queues.get(media.queue_id),
                     start_queue_item=self.mass.player_queues.get_item(
                         media.queue_id, media.queue_item_id
                     ),
-                    pcm_format=AIRPLAY_PCM_FORMAT,
+                    pcm_format=input_format,
                 )
             else:
                 # assume url or some other direct path
index 61e669253ba84b377da50cb115c33ec3391deee0..910199a93cc3ec916ce7097ab3b8d3fb11c5aacd 100644 (file)
@@ -784,6 +784,11 @@ class LocalFileSystemProvider(MusicProvider):
         if tags.musicbrainz_recordingid:
             track.mbid = tags.musicbrainz_recordingid
         track.metadata.chapters = UniqueList(tags.chapters)
+        # handle (optional) loudness measurement tag(s)
+        if tags.track_loudness is not None:
+            await self.mass.music.set_loudness(
+                track.item_id, self.instance_id, tags.track_loudness, tags.track_album_loudness
+            )
         return track
 
     async def _parse_artist(
index 7dcbfd53faa27ad624744b3060e15c004876c6bc..0a8a74306753fd8592f2df9924b5fc5bba61cba7 100644 (file)
@@ -371,7 +371,9 @@ class SlimprotoProvider(PlayerProvider):
 
         # this is a syncgroup, we need to handle this with a multi client stream
         master_audio_format = AudioFormat(
-            content_type=ContentType.from_bit_depth(24), sample_rate=48000, bit_depth=24
+            content_type=ContentType.PCM_F32LE,
+            sample_rate=48000,
+            bit_depth=32,
         )
         if media.media_type == MediaType.ANNOUNCEMENT:
             # special case: stream announcement
index c0b0ab15277a2344afc85f969522fff1f44bfbab..2b4930c248725bcaf4f596289d58f96d2163d9e1 100644 (file)
@@ -79,6 +79,15 @@ DEFAULT_SNAPCAST_FORMAT = AudioFormat(
     channels=2,
 )
 
+DEFAULT_SNAPCAST_PCM_FORMAT = AudioFormat(
+    # the format that is used as intermediate pcm stream,
+    # we prefer F32 here to account for volume normalization
+    content_type=ContentType.PCM_F32LE,
+    sample_rate=48000,
+    bit_depth=32,
+    channels=2,
+)
+
 
 async def setup(
     mass: MusicAssistant, manifest: ProviderManifest, config: ProviderConfig
@@ -447,7 +456,7 @@ class SnapCastProvider(PlayerProvider):
         await self._get_snapgroup(player_id).set_stream("default")
         await self.cmd_stop(player_id=player_id)
 
-    async def play_media(self, player_id: str, media: PlayerMedia) -> None:  # noqa: PLR0915
+    async def play_media(self, player_id: str, media: PlayerMedia) -> None:
         """Handle PLAY MEDIA on given player."""
         player = self.mass.players.get(player_id)
         if player.synced_to:
@@ -477,23 +486,15 @@ class SnapCastProvider(PlayerProvider):
             ugp_stream = ugp_provider.streams[media.queue_id]
             input_format = ugp_stream.output_format
             audio_source = ugp_stream.subscribe()
-        elif media.media_type == MediaType.RADIO and media.queue_id and media.queue_item_id:
-            # radio stream - consume media stream directly
-            input_format = DEFAULT_SNAPCAST_FORMAT
-            queue_item = self.mass.player_queues.get_item(media.queue_id, media.queue_item_id)
-            audio_source = self.mass.streams.get_media_stream(
-                streamdetails=queue_item.streamdetails,
-                pcm_format=DEFAULT_SNAPCAST_FORMAT,
-            )
         elif media.queue_id and media.queue_item_id:
             # regular queue (flow) stream request
-            input_format = DEFAULT_SNAPCAST_FORMAT
+            input_format = DEFAULT_SNAPCAST_PCM_FORMAT
             audio_source = self.mass.streams.get_flow_stream(
                 queue=self.mass.player_queues.get(media.queue_id),
                 start_queue_item=self.mass.player_queues.get_item(
                     media.queue_id, media.queue_item_id
                 ),
-                pcm_format=DEFAULT_SNAPCAST_FORMAT,
+                pcm_format=input_format,
             )
         else:
             # assume url or some other direct path
index 3d8f24333ad40fcd4ee16d64d01f2c93657c3176..c04dea3c5eaa4814d9f8c3879f998515dfffb550 100644 (file)
@@ -54,7 +54,9 @@ if TYPE_CHECKING:
 # ruff: noqa: ARG002
 
 UGP_FORMAT = AudioFormat(
-    content_type=ContentType.from_bit_depth(16), sample_rate=44100, bit_depth=16
+    content_type=ContentType.PCM_F32LE,
+    sample_rate=44100,
+    bit_depth=32,
 )
 UGP_PREFIX = "ugp_"