Switch from unmaintained pytube to yt-dlp. Check user is subscribed to YTM premium...
authortronikos <tronikos@users.noreply.github.com>
Mon, 24 Jun 2024 08:01:11 +0000 (01:01 -0700)
committerGitHub <noreply@github.com>
Mon, 24 Jun 2024 08:01:11 +0000 (10:01 +0200)
* Add option to use yt_dlp instead of pytube. yt_dlp is much slower but more reliable

* fix

* ruff

* Use format_selector to select bestaudio

* Add yt dlp as default stream resolver.

* Add check for YT Premium users.

* Remove unnecessary pytube log adjustment.

* Fix conflicts.

* fix return type and remove unused const

* use YTM_DOMAIN constant

* Disable quiet in yt_dlp for debug log level

* Catch DownloadError and raise UnplayableMediaError

* Check format_id that is likely to change than format

---------

Co-authored-by: Marvin Schenkel <marvinschenkel@gmail.com>
music_assistant/server/providers/ytmusic/__init__.py
music_assistant/server/providers/ytmusic/helpers.py
music_assistant/server/providers/ytmusic/manifest.json
requirements_all.txt

index c50fe60464cfe846b5b3c5b8df219878844a0a11..bc36dd396b409b92f2bad262efea793965dfe4d9 100644 (file)
@@ -4,13 +4,12 @@ from __future__ import annotations
 
 import asyncio
 import logging
-import re
 from collections.abc import AsyncGenerator
 from time import time
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 from urllib.parse import unquote
 
-import pytube
+import yt_dlp
 from ytmusicapi.constants import SUPPORTED_LANGUAGES
 
 from music_assistant.common.models.config_entries import ConfigEntry, ConfigValueType
@@ -74,7 +73,6 @@ CONF_REFRESH_TOKEN = "refresh_token"
 CONF_TOKEN_TYPE = "token_type"
 CONF_EXPIRY_TIME = "expiry_time"
 
-YT_DOMAIN = "https://www.youtube.com"
 YTM_DOMAIN = "https://music.youtube.com"
 YTM_BASE_URL = f"{YTM_DOMAIN}/youtubei/v1/"
 VARIOUS_ARTISTS_YTM_ID = "UCUTXlgdcKU5vfzFqHOWIvkA"
@@ -96,6 +94,7 @@ YT_PERSONAL_PLAYLISTS = (
     "RDTMAK5uy_nilrsVWxrKskY0ZUpVZ3zpB0u4LwWTVJ4",  # Replay Mix
     "RDTMAK5uy_mZtXeU08kxXJOUhL0ETdAuZTh1z7aAFAo",  # Archive Mix
 )
+YTM_PREMIUM_CHECK_TRACK_ID = "dQw4w9WgXcQ"
 
 SUPPORTED_FEATURES = (
     ProviderFeature.LIBRARY_ARTISTS,
@@ -184,19 +183,17 @@ class YoutubeMusicProvider(MusicProvider):
     _headers = None
     _context = None
     _cookies = None
-    _signature_timestamp = 0
     _cipher = None
 
     async def handle_async_init(self) -> None:
         """Set up the YTMusic provider."""
-        logging.getLogger("pytube").setLevel(self.logger.level + 10)
+        logging.getLogger("yt_dlp").setLevel(self.logger.level + 10)
         if not self.config.get_value(CONF_AUTH_TOKEN):
             msg = "Invalid login credentials"
             raise LoginFailed(msg)
         await self._initialize_headers()
         await self._initialize_context()
         self._cookies = {"CONSENT": "YES+1"}
-        self._signature_timestamp = await self._get_signature_timestamp()
         # get default language (that is supported by YTM)
         mass_locale = self.mass.metadata.locale
         for lang_code in SUPPORTED_LANGUAGES:
@@ -205,6 +202,8 @@ class YoutubeMusicProvider(MusicProvider):
                 break
         else:
             self.language = "en"
+        if not await self._user_has_ytm_premium():
+            raise LoginFailed("User does not have Youtube Music Premium")
 
     @property
     def supported_features(self) -> tuple[ProviderFeature, ...]:
@@ -332,7 +331,6 @@ class YoutubeMusicProvider(MusicProvider):
         if track_obj := await get_track(
             prov_track_id=prov_track_id,
             headers=self._headers,
-            signature_timestamp=self._signature_timestamp,
             language=self.language,
         ):
             return await self._parse_track(track_obj)
@@ -519,46 +517,26 @@ class YoutubeMusicProvider(MusicProvider):
             return tracks
         return []
 
-    async def get_stream_details(self, item_id: str, retry=0) -> StreamDetails:
+    async def get_stream_details(self, item_id: str) -> StreamDetails:
         """Return the content details for the given track when it will be streamed."""
-        await self._check_oauth_token()
-        track_obj = await get_track(
-            prov_track_id=item_id,
-            headers=self._headers,
-            signature_timestamp=self._signature_timestamp,
-        )
-        if not track_obj:
-            msg = f"Item {item_id} not found"
-            raise MediaNotFoundError(msg)
-        stream_format = await self._parse_stream_format(track_obj)
-        url = await self._parse_stream_url(stream_format=stream_format, item_id=item_id)
-        if not await self._is_valid_deciphered_url(url=url):
-            if retry > 4:
-                self.logger.warning(
-                    f"Could not resolve a valid URL for item '{item_id}'. "
-                    "Are you playing music on another device using the same account?"
-                )
-                msg = f"Could not resolve a valid URL for item '{item_id}'."
-                raise UnplayableMediaError(msg)
-            self.logger.debug(
-                "Invalid playback URL encountered. Retrying with new signature timestamp."
-            )
-            self._cipher = None
-            self._signature_timestamp = await self._get_signature_timestamp()
-            return await self.get_stream_details(item_id=item_id, retry=retry + 1)
+        stream_format = await self._get_stream_format(item_id=item_id)
+        self.logger.debug("Found stream_format: %s for song %s", stream_format["format"], item_id)
         stream_details = StreamDetails(
             provider=self.instance_id,
             item_id=item_id,
             audio_format=AudioFormat(
-                content_type=ContentType.try_parse(stream_format["mimeType"]),
+                content_type=ContentType.try_parse(stream_format["audio_ext"]),
             ),
             stream_type=StreamType.HTTP,
-            path=url,
+            path=stream_format["url"],
         )
-        if stream_format.get("audioChannels") and str(stream_format.get("audioChannels")).isdigit():
-            stream_details.audio_format.channels = int(stream_format.get("audioChannels"))
-        if stream_format.get("audioSampleRate") and stream_format.get("audioSampleRate").isdigit():
-            stream_details.audio_format.sample_rate = int(stream_format.get("audioSampleRate"))
+        if (
+            stream_format.get("audio_channels")
+            and str(stream_format.get("audio_channels")).isdigit()
+        ):
+            stream_details.audio_format.channels = int(stream_format.get("audio_channels"))
+        if stream_format.get("asr"):
+            stream_details.audio_format.sample_rate = int(stream_format.get("asr"))
         return stream_details
 
     async def _post_data(self, endpoint: str, data: dict[str, str], **kwargs):
@@ -593,6 +571,7 @@ class YoutubeMusicProvider(MusicProvider):
             self.config.update({CONF_EXPIRY_TIME: time() + token["expires_in"]})
             self.config.update({CONF_TOKEN_TYPE: token["token_type"]})
             await self._initialize_headers()
+            await self._update_ytdlp_oauth_token_cache()
 
     async def _initialize_headers(self) -> dict[str, str]:
         """Return headers to include in the requests."""
@@ -603,7 +582,7 @@ class YoutubeMusicProvider(MusicProvider):
             "Accept-Language": "en-US,en;q=0.5",
             "Content-Type": "application/json",
             "X-Goog-AuthUser": "0",
-            "x-origin": "https://music.youtube.com",
+            "x-origin": YTM_DOMAIN,
             "X-Goog-Request-Time": str(int(time())),
             "Authorization": auth,
         }
@@ -686,7 +665,7 @@ class YoutubeMusicProvider(MusicProvider):
                     item_id=str(artist_id),
                     provider_domain=self.domain,
                     provider_instance=self.instance_id,
-                    url=f"https://music.youtube.com/channel/{artist_id}",
+                    url=f"{YTM_DOMAIN}/channel/{artist_id}",
                 )
             },
         )
@@ -789,70 +768,47 @@ class YoutubeMusicProvider(MusicProvider):
             track.duration = int(track_obj["duration_seconds"])
         return track
 
-    async def _get_signature_timestamp(self):
-        """Get a signature timestamp required to generate valid stream URLs."""
-        response = await self._get_data(url=YTM_DOMAIN)
-        match = re.search(r'jsUrl"\s*:\s*"([^"]+)"', response)
-        if match is None:
-            # retry with youtube domain
-            response = await self._get_data(url=YT_DOMAIN)
-            match = re.search(r'jsUrl"\s*:\s*"([^"]+)"', response)
-        if match is None:
-            msg = "Could not identify the URL for base.js player."
-            raise Exception(msg)  # pylint: disable=broad-exception-raised
-        url = YTM_DOMAIN + match.group(1)
-        response = await self._get_data(url=url)
-        match = re.search(r"signatureTimestamp[:=](\d+)", response)
-        if match is None:
-            msg = "Unable to identify the signatureTimestamp."
-            raise Exception(msg)  # pylint: disable=broad-exception-raised
-        return int(match.group(1))
-
-    async def _parse_stream_url(self, stream_format: dict, item_id: str) -> str:
-        """Figure out the stream URL to use based on the YT track object."""
-        url = None
-        if stream_format.get("signatureCipher"):
-            # Secured URL
-            cipher_parts = {}
-            for part in stream_format["signatureCipher"].split("&"):
-                key, val = part.split("=", maxsplit=1)
-                cipher_parts[key] = unquote(val)
-            signature = await self._decipher_signature(
-                ciphered_signature=cipher_parts["s"], item_id=item_id
-            )
-            url = cipher_parts["url"] + "&sig=" + signature
-        elif stream_format.get("url"):
-            # Non secured URL
-            url = stream_format.get("url")
-        else:
-            self.logger.debug(
-                f"Something went wrong. No URL found for stream format {stream_format}"
-            )
-        return url
-
-    async def _decipher_signature(self, ciphered_signature: str, item_id: str):
-        """Decipher the signature, required to build the Stream URL."""
-
-        def _decipher():
-            embed_url = f"https://www.youtube.com/embed/{item_id}"
-            embed_html = pytube.request.get(embed_url)
-            js_url = pytube.extract.js_url(embed_html)
-            ytm_js = pytube.request.get(js_url)
-            cipher = pytube.cipher.Cipher(js=ytm_js)
-            return cipher
-
-        if not self._cipher:
-            self.logger.debug("Creating a new cipher")
-            self._cipher = await asyncio.to_thread(_decipher)
-        return self._cipher.get_signature(ciphered_signature)
-
-    async def _is_valid_deciphered_url(self, url: str) -> bool:
-        """Verify whether the URL has been deciphered using a valid cipher."""
-        async with self.mass.http_session.head(url) as response:
-            # TODO: Remove after 403 issue has been verified as fixed
-            if response.status != 200:
-                self.logger.debug(f"Deciphered URL HTTP status: {response.status}")
-            return response.status != 403
+    async def _get_stream_format(self, item_id: str) -> dict[str, Any]:
+        """Figure out the stream URL to use and return the highest quality."""
+        await self._check_oauth_token()
+
+        def _extract_best_stream_url_format() -> dict[str, Any]:
+            url = f"{YTM_DOMAIN}/watch?v={item_id}"
+            ydl_opts = {
+                "quiet": self.logger.level > logging.DEBUG,
+                "username": "oauth2",
+                "password": "",
+            }
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                try:
+                    info = ydl.extract_info(url, download=False)
+                except yt_dlp.utils.DownloadError as err:
+                    raise UnplayableMediaError(err) from err
+                format_selector = ydl.build_format_selector("m4a/bestaudio")
+                stream_format = next(format_selector({"formats": info["formats"]}))
+                return stream_format
+
+        return await asyncio.to_thread(_extract_best_stream_url_format)
+
+    async def _update_ytdlp_oauth_token_cache(self) -> None:
+        """Update the ytdlp token so we can grab the best available quality audio stream."""
+
+        def _update_oauth_cache() -> None:
+            ydl_opts = {
+                "quiet": self.logger.level > logging.DEBUG,
+                "username": "oauth2",
+                "password": "",
+            }
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                token_data = {
+                    "access_token": self.config.get_value(CONF_AUTH_TOKEN),
+                    "expires": self.config.get_value(CONF_EXPIRY_TIME),
+                    "token_type": self.config.get_value(CONF_TOKEN_TYPE),
+                    "refresh_token": self.config.get_value(CONF_REFRESH_TOKEN),
+                }
+                ydl.cache.store("youtube-oauth2", "token_data", token_data)
+
+        await asyncio.to_thread(_update_oauth_cache)
 
     def _get_item_mapping(self, media_type: MediaType, key: str, name: str) -> ItemMapping:
         return ItemMapping(
@@ -868,6 +824,12 @@ class YoutubeMusicProvider(MusicProvider):
             artist_id = VARIOUS_ARTISTS_YTM_ID
         return self._get_item_mapping(MediaType.ARTIST, artist_id, artist_obj.get("name"))
 
+    async def _user_has_ytm_premium(self) -> bool:
+        """Check if the user has Youtube Music Premium."""
+        stream_format = await self._get_stream_format(YTM_PREMIUM_CHECK_TRACK_ID)
+        # Only premium users can stream the HQ stream of this song
+        return stream_format["format_id"] == "141"
+
     async def _parse_thumbnails(self, thumbnails_obj: dict) -> list[MediaItemImage]:
         """Parse and YTM thumbnails to MediaItemImage."""
         result: list[MediaItemImage] = []
@@ -894,25 +856,3 @@ class YoutubeMusicProvider(MusicProvider):
                 )
             )
         return result
-
-    @classmethod
-    async def _parse_stream_format(cls, track_obj: dict) -> dict:
-        """Grab the highest available audio stream from available streams."""
-        stream_format = {}
-        quality_mapper = {
-            "AUDIO_QUALITY_LOW": 1,
-            "AUDIO_QUALITY_MEDIUM": 2,
-            "AUDIO_QUALITY_HIGH": 3,
-        }
-        if "streamingData" not in track_obj:
-            raise MediaNotFoundError("No stream found for this track")
-        for adaptive_format in track_obj["streamingData"]["adaptiveFormats"]:
-            if adaptive_format["mimeType"].startswith("audio") and (
-                not stream_format
-                or quality_mapper.get(adaptive_format["audioQuality"], 0)
-                > quality_mapper.get(stream_format["audioQuality"], 0)
-            ):
-                stream_format = adaptive_format
-        if stream_format is None:
-            raise MediaNotFoundError("No stream found for this track")
-        return stream_format
index 7ff9486a181e568ba98c02202495c47c78f19e61..73ae6631789693ee45c3d147633e093da6542007 100644 (file)
@@ -70,13 +70,13 @@ async def get_playlist(
 
 
 async def get_track(
-    prov_track_id: str, headers: dict[str, str], signature_timestamp: str, language: str = "en"
+    prov_track_id: str, headers: dict[str, str], language: str = "en"
 ) -> dict[str, str] | None:
     """Async wrapper around the ytmusicapi get_playlist function."""
 
     def _get_song():
         ytm = ytmusicapi.YTMusic(auth=headers, language=language)
-        track_obj = ytm.get_song(videoId=prov_track_id, signatureTimestamp=signature_timestamp)
+        track_obj = ytm.get_song(videoId=prov_track_id)
         track = {}
         if "videoDetails" not in track_obj:
             # video that no longer exists
index ae37e8c06318d220814a25a21b3e0bbcf421d0ca..4ee27fa4b40ce54e7a6d5def14bec390ff92933b 100644 (file)
@@ -4,7 +4,7 @@
   "name": "YouTube Music",
   "description": "Support for the YouTube Music streaming provider in Music Assistant.",
   "codeowners": ["@MarvinSchenkel"],
-  "requirements": ["ytmusicapi==1.7.0", "git+https://github.com/MarvinSchenkel/pytube.git"],
+  "requirements": ["ytmusicapi==1.7.0", "git+https://github.com/coletdjnz/yt-dlp-youtube-oauth2.git", "yt-dlp==2024.5.27"],
   "documentation": "https://music-assistant.io/music-providers/youtube-music/",
   "multi_instance": true
 }
index f89879642287fe4773db210a94c38d3a42903de3..1a9f2bb33743ab4518d2c2df6b4f9b67ad985e64 100644 (file)
@@ -18,7 +18,7 @@ deezer-python-async==0.3.0
 defusedxml==0.7.1
 eyeD3==0.9.7
 faust-cchardet>=2.1.18
-git+https://github.com/MarvinSchenkel/pytube.git
+git+https://github.com/coletdjnz/yt-dlp-youtube-oauth2.git
 hass-client==1.1.1
 ifaddr==0.2.0
 mashumaro==3.13.1
@@ -42,5 +42,6 @@ soundcloudpy==0.1.0
 tidalapi==0.7.6
 unidecode==1.3.8
 xmltodict==0.13.0
+yt-dlp==2024.5.27
 ytmusicapi==1.7.0
 zeroconf==0.132.2