gpodder/ itunes/ rssfeed: fix bad header for certain podcasts (#2425)
authorFabian Munkes <105975993+fmunkes@users.noreply.github.com>
Fri, 26 Sep 2025 23:42:00 +0000 (01:42 +0200)
committerGitHub <noreply@github.com>
Fri, 26 Sep 2025 23:42:00 +0000 (01:42 +0200)
music_assistant/helpers/podcast_parsers.py
music_assistant/providers/gpodder/__init__.py
music_assistant/providers/itunes_podcasts/__init__.py
music_assistant/providers/podcastfeed/__init__.py

index d471cd201e43447623a970ea028623c326cca12d..0ff5a825c2a624f311cc932fc399ba6cdcf9be9c 100644 (file)
@@ -1,8 +1,11 @@
 """Podcastfeed -> Mass."""
 
 from datetime import datetime
+from io import BytesIO
 from typing import Any
 
+import aiohttp
+import podcastparser
 from music_assistant_models.enums import ContentType, ImageType, MediaType
 from music_assistant_models.media_items import (
     AudioFormat,
@@ -16,6 +19,26 @@ from music_assistant_models.media_items import (
 )
 
 
+async def get_podcastparser_dict(
+    *, session: aiohttp.ClientSession, feed_url: str, max_episodes: int = 0
+) -> dict[str, Any]:
+    """Get feed parsed by podcastparser by providing the url.
+
+    max_episodes = 0 does not limit the returned episodes.
+    """
+    response: aiohttp.ClientResponse | None = None
+    # without user agent, some feeds can not be retrieved
+    # https://github.com/music-assistant/support/issues/3596
+    # but, reports on discord show, that also the opposite may be true
+    for headers in [{"User-Agent": "Mozilla/5.0"}, {}]:
+        # raises ClientError on status failure
+        response = await session.get(feed_url, headers=headers, raise_for_status=True)
+    assert response is not None  # for type checking
+    feed_data = await response.read()
+    feed_stream = BytesIO(feed_data)
+    return podcastparser.parse(feed_url, feed_stream, max_episodes=max_episodes)  # type: ignore[no-any-return]
+
+
 def parse_podcast(
     *,
     feed_url: str,
index 35e807abf4ad124fa59a03b99779d841b76c76f0..ce5fce6b3bb409379a0bf033a0075656fb5ab8cc 100644 (file)
@@ -16,10 +16,9 @@ from __future__ import annotations
 import asyncio
 import time
 from collections.abc import AsyncGenerator
-from io import BytesIO
 from typing import TYPE_CHECKING, Any
 
-import podcastparser
+from aiohttp.client_exceptions import ClientError
 from music_assistant_models.config_entries import ConfigEntry, ConfigValueType, ProviderConfig
 from music_assistant_models.enums import (
     ConfigEntryType,
@@ -38,6 +37,7 @@ from music_assistant_models.media_items import AudioFormat, MediaItemType, Podca
 from music_assistant_models.streamdetails import StreamDetails
 
 from music_assistant.helpers.podcast_parsers import (
+    get_podcastparser_dict,
     get_stream_url_and_guid_from_episode,
     parse_podcast,
     parse_podcast_episode,
@@ -349,8 +349,12 @@ class GPodder(MusicProvider):
             self.logger.debug("Adding podcast with feed %s to library", feed_url)
             # parse podcast
             try:
-                parsed_podcast = await self._get_podcast(feed_url)
-            except RuntimeError:
+                parsed_podcast = await get_podcastparser_dict(
+                    session=self.mass.http_session,
+                    feed_url=feed_url,
+                    max_episodes=self.max_episodes,
+                )
+            except ClientError:
                 self.logger.warning(f"Was unable to obtain podcast with feed {feed_url}")
                 continue
             await self._cache_set_podcast(feed_url, parsed_podcast)
@@ -603,15 +607,6 @@ class GPodder(MusicProvider):
                 return stream_url
         return None
 
-    async def _get_podcast(self, feed_url: str) -> dict[str, Any]:
-        # see music-assistant/server@6aae82e
-        response = await self.mass.http_session.get(feed_url, headers={"User-Agent": "Mozilla/5.0"})
-        if response.status != 200:
-            raise RuntimeError
-        feed_data = await response.read()
-        feed_stream = BytesIO(feed_data)
-        return podcastparser.parse(feed_url, feed_stream, max_episodes=self.max_episodes)  # type: ignore[no-any-return]
-
     async def _cache_get_podcast(self, prov_podcast_id: str) -> dict[str, Any]:
         parsed_podcast = await self.mass.cache.get(
             key=prov_podcast_id,
@@ -620,7 +615,11 @@ class GPodder(MusicProvider):
             default=None,
         )
         if parsed_podcast is None:
-            parsed_podcast = await self._get_podcast(feed_url=prov_podcast_id)
+            parsed_podcast = await get_podcastparser_dict(
+                session=self.mass.http_session,
+                feed_url=prov_podcast_id,
+                max_episodes=self.max_episodes,
+            )
             await self._cache_set_podcast(feed_url=prov_podcast_id, parsed_podcast=parsed_podcast)
 
         # this is a dictionary from podcastparser
index dd345c941041afa6275e8b5159dc7fede333843c..c4e61d6bd9a7bd8a40cf4a7799ddc18e20e03f87 100644 (file)
@@ -3,13 +3,12 @@
 from __future__ import annotations
 
 from collections.abc import AsyncGenerator
-from io import BytesIO
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 import aiofiles
 import orjson
-import podcastparser
+from aiohttp.client_exceptions import ClientError
 from music_assistant_models.config_entries import ConfigEntry, ConfigValueOption
 from music_assistant_models.enums import (
     ConfigEntryType,
@@ -32,7 +31,11 @@ from music_assistant_models.media_items import (
 )
 from music_assistant_models.streamdetails import StreamDetails
 
-from music_assistant.helpers.podcast_parsers import parse_podcast, parse_podcast_episode
+from music_assistant.helpers.podcast_parsers import (
+    get_podcastparser_dict,
+    parse_podcast,
+    parse_podcast_episode,
+)
 from music_assistant.helpers.throttle_retry import ThrottlerManager, throttle_with_retries
 from music_assistant.models.music_provider import MusicProvider
 from music_assistant.providers.itunes_podcasts.schema import (
@@ -331,17 +334,14 @@ class ITunesPodcastsProvider(MusicProvider):
             default=None,
         )
         if parsed_podcast is None:
-            # see music-assistant/server@6aae82e
-            response = await self.mass.http_session.get(
-                prov_podcast_id, headers={"User-Agent": "Mozilla/5.0"}
-            )
-            if response.status != 200:
-                raise MediaNotFoundError
-            feed_data = await response.read()
-            feed_stream = BytesIO(feed_data)
-            parsed_podcast = podcastparser.parse(
-                prov_podcast_id, feed_stream, max_episodes=self.max_episodes
-            )
+            try:
+                parsed_podcast = await get_podcastparser_dict(
+                    session=self.mass.http_session,
+                    feed_url=prov_podcast_id,
+                    max_episodes=self.max_episodes,
+                )
+            except ClientError as exc:
+                raise MediaNotFoundError from exc
             await self._cache_set_podcast(feed_url=prov_podcast_id, parsed_podcast=parsed_podcast)
 
         # this is a dictionary from podcastparser
index c10c76789b6d2932e4ee43143b93e7d14287b575..b40bbc910728877f49072e2b368384b3520503b5 100644 (file)
@@ -10,10 +10,10 @@ multiple instances with each one feed must exist.
 from __future__ import annotations
 
 from collections.abc import AsyncGenerator
-from io import BytesIO
 from typing import TYPE_CHECKING, Any
 
 import podcastparser
+from aiohttp.client_exceptions import ClientError
 from music_assistant_models.config_entries import ConfigEntry, ConfigValueType
 from music_assistant_models.enums import (
     ConfigEntryType,
@@ -27,7 +27,11 @@ from music_assistant_models.media_items import AudioFormat, Podcast, PodcastEpis
 from music_assistant_models.streamdetails import StreamDetails
 
 from music_assistant.helpers.compare import create_safe_string
-from music_assistant.helpers.podcast_parsers import parse_podcast, parse_podcast_episode
+from music_assistant.helpers.podcast_parsers import (
+    get_podcastparser_dict,
+    parse_podcast,
+    parse_podcast_episode,
+)
 from music_assistant.models.music_provider import MusicProvider
 
 if TYPE_CHECKING:
@@ -94,8 +98,8 @@ class PodcastMusicprovider(MusicProvider):
 
         try:
             self.parsed_podcast: dict[str, Any] = await self._cache_get_podcast()
-        except RuntimeError as exc:
-            raise RuntimeError("Invalid URL") from exc
+        except ClientError as exc:
+            raise MediaNotFoundError("Invalid URL") from exc
 
     @property
     def is_streaming_provider(self) -> bool:
@@ -203,17 +207,8 @@ class PodcastMusicprovider(MusicProvider):
         )
 
     async def _get_podcast(self) -> dict[str, Any]:
-        # without user agent, some feeds can not be retrieved
-        # https://github.com/music-assistant/support/issues/3596
         assert self.feed_url is not None
-        response = await self.mass.http_session.get(
-            self.feed_url, headers={"User-Agent": "Mozilla/5.0"}
-        )
-        if response.status != 200:
-            raise RuntimeError
-        feed_data = await response.read()
-        feed_stream = BytesIO(feed_data)
-        return podcastparser.parse(self.feed_url, feed_stream)  # type:ignore [no-any-return]
+        return await get_podcastparser_dict(session=self.mass.http_session, feed_url=self.feed_url)
 
     async def _cache_get_podcast(self) -> dict[str, Any]:
         parsed_podcast = await self.mass.cache.get(