yt-dlp/yt_dlp/extractor/spreaker.py

import itertools

from .common import InfoExtractor
from ..utils import (
    float_or_none,
    int_or_none,
    parse_qs,
    str_or_none,
    try_get,
    unified_timestamp,
    url_or_none,
)
from ..utils.traversal import traverse_obj


def _extract_episode(data, episode_id=None):
    title = data['title']
    download_url = data['download_url']

    series = try_get(data, lambda x: x['show']['title'], str)
    uploader = try_get(data, lambda x: x['author']['fullname'], str)

    thumbnails = []
    for image in ('image_original', 'image_medium', 'image'):
        image_url = url_or_none(data.get(f'{image}_url'))
        if image_url:
            thumbnails.append({'url': image_url})

    def stats(key):
        return int_or_none(try_get(
            data,
            (lambda x: x[f'{key}s_count'],
             lambda x: x['stats'][f'{key}s'])))

    def duration(key):
        return float_or_none(data.get(key), scale=1000)

    return {
        'id': str(episode_id or data['episode_id']),
        'url': download_url,
        'display_id': data.get('permalink'),
        'title': title,
        'description': data.get('description'),
        'timestamp': unified_timestamp(data.get('published_at')),
        'uploader': uploader,
        'uploader_id': str_or_none(data.get('author_id')),
        'creator': uploader,
        'duration': duration('duration') or duration('length'),
        'view_count': stats('play'),
        'like_count': stats('like'),
        'comment_count': stats('message'),
        'format': 'MPEG Layer 3',
        'format_id': 'mp3',
        'container': 'mp3',
        'ext': 'mp3',
        'thumbnails': thumbnails,
        'series': series,
        'extractor_key': SpreakerIE.ie_key(),
    }


class SpreakerIE(InfoExtractor):
    _VALID_URL = [
        r'https?://api\.spreaker\.com/(?:(?:download/)?episode|v2/episodes)/(?P<id>\d+)',
        r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P<id>\d+)/?(?:[?#]|$)',
    ]
    _TESTS = [{
        'url': 'https://api.spreaker.com/episode/12534508',
        'info_dict': {
            'id': '12534508',
            'display_id': 'swm-ep15-how-to-market-your-music-part-2',
            'ext': 'mp3',
            'title': 'EP:15 | Music Marketing (Likes) - Part 2',
            'description': 'md5:0588c43e27be46423e183076fa071177',
            'timestamp': 1502250336,
            'upload_date': '20170809',
            'uploader': 'SWM',
            'uploader_id': '9780658',
            'duration': 1063.42,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
            'series': 'Success With Music | SWM',
            'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg',
            'creators': ['SWM'],
        },
    }, {
        'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
        'only_matching': True,
    }, {
        'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
        'only_matching': True,
    }, {
        'note': 'episode',
        'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615',
        'info_dict': {
            'id': '60269615',
            'display_id': 'grunge-music-origins-the-raw-sound-that-',
            'ext': 'mp3',
            'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation',
            'description': str,
            'timestamp': 1717468905,
            'upload_date': '20240604',
            'uploader': 'Katie Brown 2',
            'uploader_id': '17733249',
            'duration': 818.83,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
            'series': '90s Grunge',
            'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg',
            'creators': ['Katie Brown 2'],
        },
    }, {
        'url': 'https://www.spreaker.com/episode/60269615',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        episode_id = self._match_id(url)
        data = self._download_json(
            f'https://api.spreaker.com/v2/episodes/{episode_id}',
            episode_id, query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']
        return _extract_episode(data, episode_id)


class SpreakerShowIE(InfoExtractor):
    _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://api.spreaker.com/show/4652058',
        'info_dict': {
            'id': '4652058',
        },
        'playlist_mincount': 118,
    }]

    def _entries(self, show_id):
        for page_num in itertools.count(1):
            episodes = self._download_json(
                f'https://api.spreaker.com/show/{show_id}/episodes',
                show_id, note=f'Downloading JSON page {page_num}', query={
                    'page': page_num,
                    'max_per_page': 100,
                })
            pager = try_get(episodes, lambda x: x['response']['pager'], dict)
            if not pager:
                break
            results = pager.get('results')
            if not results or not isinstance(results, list):
                break
            for result in results:
                if not isinstance(result, dict):
                    continue
                yield _extract_episode(result)
            if page_num == pager.get('last_page'):
                break

    def _real_extract(self, url):
        show_id = self._match_id(url)
        return self.playlist_result(self._entries(show_id), playlist_id=show_id)


class SpreakerShowPageIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.spreaker.com/show/success-with-music',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        show_id = self._search_regex(
            r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
        return self.url_result(
            f'https://api.spreaker.com/show/{show_id}',
            ie=SpreakerShowIE.ie_key(), video_id=show_id)
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`import itertools`

			`from .common import InfoExtractor`
			`from ..utils import (`
			`float_or_none,`
			`int_or_none,`
[ie/spreaker] Support episode pages and access keys (#11489) Authored by: julionc 2024-11-11 12:42:05 -05:00			`parse_qs,`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`str_or_none,`
			`try_get,`
			`unified_timestamp,`
			`url_or_none,`
			`)`
[ie/spreaker] Support episode pages and access keys (#11489) Authored by: julionc 2024-11-11 12:42:05 -05:00			`from ..utils.traversal import traverse_obj`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00

			`def _extract_episode(data, episode_id=None):`
			`title = data['title']`
			`download_url = data['download_url']`

[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`series = try_get(data, lambda x: x['show']['title'], str)`
			`uploader = try_get(data, lambda x: x['author']['fullname'], str)`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00
			`thumbnails = []`
			`for image in ('image_original', 'image_medium', 'image'):`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`image_url = url_or_none(data.get(f'{image}_url'))`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`if image_url:`
			`thumbnails.append({'url': image_url})`

			`def stats(key):`
			`return int_or_none(try_get(`
			`data,`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`(lambda x: x[f'{key}s_count'],`
			`lambda x: x['stats'][f'{key}s'])))`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00
			`def duration(key):`
			`return float_or_none(data.get(key), scale=1000)`

			`return {`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`'id': str(episode_id or data['episode_id']),`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`'url': download_url,`
			`'display_id': data.get('permalink'),`
			`'title': title,`
			`'description': data.get('description'),`
			`'timestamp': unified_timestamp(data.get('published_at')),`
			`'uploader': uploader,`
			`'uploader_id': str_or_none(data.get('author_id')),`
			`'creator': uploader,`
			`'duration': duration('duration') or duration('length'),`
			`'view_count': stats('play'),`
			`'like_count': stats('like'),`
			`'comment_count': stats('message'),`
			`'format': 'MPEG Layer 3',`
			`'format_id': 'mp3',`
			`'container': 'mp3',`
			`'ext': 'mp3',`
			`'thumbnails': thumbnails,`
			`'series': series,`
			`'extractor_key': SpreakerIE.ie_key(),`
			`}`


			`class SpreakerIE(InfoExtractor):`
[ie/spreaker] Support episode pages and access keys (#11489) Authored by: julionc 2024-11-11 12:42:05 -05:00			`_VALID_URL = [`
			`r'https?://api\.spreaker\.com/(?:(?:download/)?episode\|v2/episodes)/(?P<id>\d+)',`
			`r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P<id>\d+)/?(?:[?#]\|$)',`
			`]`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`_TESTS = [{`
			`'url': 'https://api.spreaker.com/episode/12534508',`
			`'info_dict': {`
			`'id': '12534508',`
			`'display_id': 'swm-ep15-how-to-market-your-music-part-2',`
			`'ext': 'mp3',`
			`'title': 'EP:15 \| Music Marketing (Likes) - Part 2',`
			`'description': 'md5:0588c43e27be46423e183076fa071177',`
			`'timestamp': 1502250336,`
			`'upload_date': '20170809',`
			`'uploader': 'SWM',`
			`'uploader_id': '9780658',`
			`'duration': 1063.42,`
			`'view_count': int,`
			`'like_count': int,`
			`'comment_count': int,`
[ie/spreaker] Support episode pages and access keys (#11489) Authored by: julionc 2024-11-11 12:42:05 -05:00			`'series': 'Success With Music \| SWM',`
			`'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg',`
			`'creators': ['SWM'],`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`},`
			`}, {`
			`'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',`
			`'only_matching': True,`
[ie/spreaker] Support episode pages and access keys (#11489) Authored by: julionc 2024-11-11 12:42:05 -05:00			`}, {`
			`'note': 'episode',`
			`'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615',`
			`'info_dict': {`
			`'id': '60269615',`
			`'display_id': 'grunge-music-origins-the-raw-sound-that-',`
			`'ext': 'mp3',`
			`'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation',`
			`'description': str,`
			`'timestamp': 1717468905,`
			`'upload_date': '20240604',`
			`'uploader': 'Katie Brown 2',`
			`'uploader_id': '17733249',`
			`'duration': 818.83,`
			`'view_count': int,`
			`'like_count': int,`
			`'comment_count': int,`
			`'series': '90s Grunge',`
			`'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg',`
			`'creators': ['Katie Brown 2'],`
			`},`
			`}, {`
			`'url': 'https://www.spreaker.com/episode/60269615',`
			`'only_matching': True,`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`}]`

			`def _real_extract(self, url):`
			`episode_id = self._match_id(url)`
			`data = self._download_json(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`f'https://api.spreaker.com/v2/episodes/{episode_id}',`
[ie/spreaker] Support episode pages and access keys (#11489) Authored by: julionc 2024-11-11 12:42:05 -05:00			`episode_id, query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`return _extract_episode(data, episode_id)`


			`class SpreakerShowIE(InfoExtractor):`
			`_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'`
			`_TESTS = [{`
[spreaker] fix SpreakerShowIE test URL 2020-11-26 13:10:40 -05:00			`'url': 'https://api.spreaker.com/show/4652058',`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`'info_dict': {`
			`'id': '4652058',`
			`},`
			`'playlist_mincount': 118,`
			`}]`

			`def _entries(self, show_id):`
			`for page_num in itertools.count(1):`
			`episodes = self._download_json(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`f'https://api.spreaker.com/show/{show_id}/episodes',`
			`show_id, note=f'Downloading JSON page {page_num}', query={`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`'page': page_num,`
			`'max_per_page': 100,`
			`})`
			`pager = try_get(episodes, lambda x: x['response']['pager'], dict)`
			`if not pager:`
			`break`
			`results = pager.get('results')`
			`if not results or not isinstance(results, list):`
			`break`
			`for result in results:`
			`if not isinstance(result, dict):`
			`continue`
			`yield _extract_episode(result)`
			`if page_num == pager.get('last_page'):`
			`break`

			`def _real_extract(self, url):`
			`show_id = self._match_id(url)`
			`return self.playlist_result(self._entries(show_id), playlist_id=show_id)`


			`class SpreakerShowPageIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'`
			`_TESTS = [{`
			`'url': 'https://www.spreaker.com/show/success-with-music',`
			`'only_matching': True,`
			`}]`

			`def _real_extract(self, url):`
			`display_id = self._match_id(url)`
			`webpage = self._download_webpage(url, display_id)`
			`show_id = self._search_regex(`
			`r'show_id\s:\s(?P<id>\d+)', webpage, 'show id')`
			`return self.url_result(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`f'https://api.spreaker.com/show/{show_id}',`
Updated to release 2020.11.26 2020-11-26 12:27:34 -05:00			`ie=SpreakerShowIE.ie_key(), video_id=show_id)`