From 435c4e594e0581c3d7b2470cd82f66ff0312bcd9 Mon Sep 17 00:00:00 2001 From: Fabian Munkes <105975993+fmunkes@users.noreply.github.com> Date: Sat, 27 Sep 2025 01:42:00 +0200 Subject: [PATCH] gpodder/ itunes/ rssfeed: fix bad header for certain podcasts (#2425) --- music_assistant/helpers/podcast_parsers.py | 23 +++++++++++++++ music_assistant/providers/gpodder/__init__.py | 27 +++++++++--------- .../providers/itunes_podcasts/__init__.py | 28 +++++++++---------- .../providers/podcastfeed/__init__.py | 23 ++++++--------- 4 files changed, 59 insertions(+), 42 deletions(-) diff --git a/music_assistant/helpers/podcast_parsers.py b/music_assistant/helpers/podcast_parsers.py index d471cd20..0ff5a825 100644 --- a/music_assistant/helpers/podcast_parsers.py +++ b/music_assistant/helpers/podcast_parsers.py @@ -1,8 +1,11 @@ """Podcastfeed -> Mass.""" from datetime import datetime +from io import BytesIO from typing import Any +import aiohttp +import podcastparser from music_assistant_models.enums import ContentType, ImageType, MediaType from music_assistant_models.media_items import ( AudioFormat, @@ -16,6 +19,26 @@ from music_assistant_models.media_items import ( ) +async def get_podcastparser_dict( + *, session: aiohttp.ClientSession, feed_url: str, max_episodes: int = 0 +) -> dict[str, Any]: + """Get feed parsed by podcastparser by providing the url. + + max_episodes = 0 does not limit the returned episodes. + """ + response: aiohttp.ClientResponse | None = None + # without user agent, some feeds can not be retrieved + # https://github.com/music-assistant/support/issues/3596 + # but, reports on discord show, that also the opposite may be true + for headers in [{"User-Agent": "Mozilla/5.0"}, {}]: + # raises ClientError on status failure + response = await session.get(feed_url, headers=headers, raise_for_status=True) + assert response is not None # for type checking + feed_data = await response.read() + feed_stream = BytesIO(feed_data) + return podcastparser.parse(feed_url, feed_stream, max_episodes=max_episodes) # type: ignore[no-any-return] + + def parse_podcast( *, feed_url: str, diff --git a/music_assistant/providers/gpodder/__init__.py b/music_assistant/providers/gpodder/__init__.py index 35e807ab..ce5fce6b 100644 --- a/music_assistant/providers/gpodder/__init__.py +++ b/music_assistant/providers/gpodder/__init__.py @@ -16,10 +16,9 @@ from __future__ import annotations import asyncio import time from collections.abc import AsyncGenerator -from io import BytesIO from typing import TYPE_CHECKING, Any -import podcastparser +from aiohttp.client_exceptions import ClientError from music_assistant_models.config_entries import ConfigEntry, ConfigValueType, ProviderConfig from music_assistant_models.enums import ( ConfigEntryType, @@ -38,6 +37,7 @@ from music_assistant_models.media_items import AudioFormat, MediaItemType, Podca from music_assistant_models.streamdetails import StreamDetails from music_assistant.helpers.podcast_parsers import ( + get_podcastparser_dict, get_stream_url_and_guid_from_episode, parse_podcast, parse_podcast_episode, @@ -349,8 +349,12 @@ class GPodder(MusicProvider): self.logger.debug("Adding podcast with feed %s to library", feed_url) # parse podcast try: - parsed_podcast = await self._get_podcast(feed_url) - except RuntimeError: + parsed_podcast = await get_podcastparser_dict( + session=self.mass.http_session, + feed_url=feed_url, + max_episodes=self.max_episodes, + ) + except ClientError: self.logger.warning(f"Was unable to obtain podcast with feed {feed_url}") continue await self._cache_set_podcast(feed_url, parsed_podcast) @@ -603,15 +607,6 @@ class GPodder(MusicProvider): return stream_url return None - async def _get_podcast(self, feed_url: str) -> dict[str, Any]: - # see music-assistant/server@6aae82e - response = await self.mass.http_session.get(feed_url, headers={"User-Agent": "Mozilla/5.0"}) - if response.status != 200: - raise RuntimeError - feed_data = await response.read() - feed_stream = BytesIO(feed_data) - return podcastparser.parse(feed_url, feed_stream, max_episodes=self.max_episodes) # type: ignore[no-any-return] - async def _cache_get_podcast(self, prov_podcast_id: str) -> dict[str, Any]: parsed_podcast = await self.mass.cache.get( key=prov_podcast_id, @@ -620,7 +615,11 @@ class GPodder(MusicProvider): default=None, ) if parsed_podcast is None: - parsed_podcast = await self._get_podcast(feed_url=prov_podcast_id) + parsed_podcast = await get_podcastparser_dict( + session=self.mass.http_session, + feed_url=prov_podcast_id, + max_episodes=self.max_episodes, + ) await self._cache_set_podcast(feed_url=prov_podcast_id, parsed_podcast=parsed_podcast) # this is a dictionary from podcastparser diff --git a/music_assistant/providers/itunes_podcasts/__init__.py b/music_assistant/providers/itunes_podcasts/__init__.py index dd345c94..c4e61d6b 100644 --- a/music_assistant/providers/itunes_podcasts/__init__.py +++ b/music_assistant/providers/itunes_podcasts/__init__.py @@ -3,13 +3,12 @@ from __future__ import annotations from collections.abc import AsyncGenerator -from io import BytesIO from pathlib import Path from typing import TYPE_CHECKING, Any import aiofiles import orjson -import podcastparser +from aiohttp.client_exceptions import ClientError from music_assistant_models.config_entries import ConfigEntry, ConfigValueOption from music_assistant_models.enums import ( ConfigEntryType, @@ -32,7 +31,11 @@ from music_assistant_models.media_items import ( ) from music_assistant_models.streamdetails import StreamDetails -from music_assistant.helpers.podcast_parsers import parse_podcast, parse_podcast_episode +from music_assistant.helpers.podcast_parsers import ( + get_podcastparser_dict, + parse_podcast, + parse_podcast_episode, +) from music_assistant.helpers.throttle_retry import ThrottlerManager, throttle_with_retries from music_assistant.models.music_provider import MusicProvider from music_assistant.providers.itunes_podcasts.schema import ( @@ -331,17 +334,14 @@ class ITunesPodcastsProvider(MusicProvider): default=None, ) if parsed_podcast is None: - # see music-assistant/server@6aae82e - response = await self.mass.http_session.get( - prov_podcast_id, headers={"User-Agent": "Mozilla/5.0"} - ) - if response.status != 200: - raise MediaNotFoundError - feed_data = await response.read() - feed_stream = BytesIO(feed_data) - parsed_podcast = podcastparser.parse( - prov_podcast_id, feed_stream, max_episodes=self.max_episodes - ) + try: + parsed_podcast = await get_podcastparser_dict( + session=self.mass.http_session, + feed_url=prov_podcast_id, + max_episodes=self.max_episodes, + ) + except ClientError as exc: + raise MediaNotFoundError from exc await self._cache_set_podcast(feed_url=prov_podcast_id, parsed_podcast=parsed_podcast) # this is a dictionary from podcastparser diff --git a/music_assistant/providers/podcastfeed/__init__.py b/music_assistant/providers/podcastfeed/__init__.py index c10c7678..b40bbc91 100644 --- a/music_assistant/providers/podcastfeed/__init__.py +++ b/music_assistant/providers/podcastfeed/__init__.py @@ -10,10 +10,10 @@ multiple instances with each one feed must exist. from __future__ import annotations from collections.abc import AsyncGenerator -from io import BytesIO from typing import TYPE_CHECKING, Any import podcastparser +from aiohttp.client_exceptions import ClientError from music_assistant_models.config_entries import ConfigEntry, ConfigValueType from music_assistant_models.enums import ( ConfigEntryType, @@ -27,7 +27,11 @@ from music_assistant_models.media_items import AudioFormat, Podcast, PodcastEpis from music_assistant_models.streamdetails import StreamDetails from music_assistant.helpers.compare import create_safe_string -from music_assistant.helpers.podcast_parsers import parse_podcast, parse_podcast_episode +from music_assistant.helpers.podcast_parsers import ( + get_podcastparser_dict, + parse_podcast, + parse_podcast_episode, +) from music_assistant.models.music_provider import MusicProvider if TYPE_CHECKING: @@ -94,8 +98,8 @@ class PodcastMusicprovider(MusicProvider): try: self.parsed_podcast: dict[str, Any] = await self._cache_get_podcast() - except RuntimeError as exc: - raise RuntimeError("Invalid URL") from exc + except ClientError as exc: + raise MediaNotFoundError("Invalid URL") from exc @property def is_streaming_provider(self) -> bool: @@ -203,17 +207,8 @@ class PodcastMusicprovider(MusicProvider): ) async def _get_podcast(self) -> dict[str, Any]: - # without user agent, some feeds can not be retrieved - # https://github.com/music-assistant/support/issues/3596 assert self.feed_url is not None - response = await self.mass.http_session.get( - self.feed_url, headers={"User-Agent": "Mozilla/5.0"} - ) - if response.status != 200: - raise RuntimeError - feed_data = await response.read() - feed_stream = BytesIO(feed_data) - return podcastparser.parse(self.feed_url, feed_stream) # type:ignore [no-any-return] + return await get_podcastparser_dict(session=self.mass.http_session, feed_url=self.feed_url) async def _cache_get_podcast(self) -> dict[str, Any]: parsed_podcast = await self.mass.cache.get( -- 2.34.1