mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-22 10:07:43 -05:00
[ie/ondemandkorea] Overhaul extractor (#8386)
Closes #8374 Authored by: seproDev
This commit is contained in:
parent
3ff494f6f4
commit
05adfd883a
@ -1387,7 +1387,10 @@ from .oftv import (
|
|||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
from .olympics import OlympicsReplayIE
|
from .olympics import OlympicsReplayIE
|
||||||
from .on24 import On24IE
|
from .on24 import On24IE
|
||||||
from .ondemandkorea import OnDemandKoreaIE
|
from .ondemandkorea import (
|
||||||
|
OnDemandKoreaIE,
|
||||||
|
OnDemandKoreaProgramIE,
|
||||||
|
)
|
||||||
from .onefootball import OneFootballIE
|
from .onefootball import OneFootballIE
|
||||||
from .onenewsnz import OneNewsNZIE
|
from .onenewsnz import OneNewsNZIE
|
||||||
from .oneplace import OnePlacePodcastIE
|
from .oneplace import OnePlacePodcastIE
|
||||||
|
@ -1,87 +1,167 @@
|
|||||||
|
import functools
|
||||||
import re
|
import re
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking import HEADRequest
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
OnDemandPagedList,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
|
parse_age_limit,
|
||||||
|
parse_qs,
|
||||||
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class OnDemandKoreaIE(InfoExtractor):
|
class OnDemandKoreaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/[a-z0-9-]+\?(?:[^#]+&)?contentId=(?P<id>\d+)'
|
||||||
_GEO_COUNTRIES = ['US', 'CA']
|
_GEO_COUNTRIES = ['US', 'CA']
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.ondemandkorea.com/ask-us-anything-e351.html',
|
'url': 'https://www.ondemandkorea.com/player/vod/ask-us-anything?contentId=686471',
|
||||||
|
'md5': 'e2ff77255d989e3135bde0c5889fbce8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ask-us-anything-e351',
|
'id': '686471',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ask Us Anything : Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won - 09/24/2022',
|
'title': 'Ask Us Anything: Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
|
||||||
'description': 'A talk show/game show with a school theme where celebrity guests appear as “transfer students.”',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'duration': 5486.955,
|
||||||
|
'release_date': '20220924',
|
||||||
|
'series': 'Ask Us Anything',
|
||||||
|
'series_id': 11790,
|
||||||
|
'episode_number': 351,
|
||||||
|
'episode': 'Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': 'm3u8 download'
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ondemandkorea.com/work-later-drink-now-e1.html',
|
'url': 'https://www.ondemandkorea.com/player/vod/breakup-probation-a-week?contentId=1595796',
|
||||||
|
'md5': '57266c720006962be7ff415b24775caa',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'work-later-drink-now-e1',
|
'id': '1595796',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Work Later, Drink Now : E01',
|
'title': 'Breakup Probation, A Week: E08',
|
||||||
'description': 'Work Later, Drink First follows three women who find solace in a glass of liquor at the end of the day. So-hee, who gets comfort from a cup of soju af',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||||
'thumbnail': r're:^https?://.*\.png$',
|
'duration': 1586.0,
|
||||||
'subtitles': {
|
'release_date': '20231001',
|
||||||
'English': 'mincount:1',
|
'series': 'Breakup Probation, A Week',
|
||||||
},
|
'series_id': 22912,
|
||||||
|
'episode_number': 8,
|
||||||
|
'episode': 'E08',
|
||||||
},
|
},
|
||||||
'params': {
|
}, {
|
||||||
'skip_download': 'm3u8 download'
|
'url': 'https://www.ondemandkorea.com/player/vod/the-outlaws?contentId=369531',
|
||||||
}
|
'md5': 'fa5523b87aa1f6d74fc622a97f2b47cd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '369531',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'release_date': '20220519',
|
||||||
|
'duration': 7267.0,
|
||||||
|
'title': 'The Outlaws: Main Movie',
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ondemandkorea.com/en/player/vod/capture-the-moment-how-is-that-possible?contentId=1605006',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
|
||||||
|
|
||||||
if not webpage:
|
data = self._download_json(
|
||||||
# Page sometimes returns captcha page with HTTP 403
|
f'https://odkmedia.io/odx/api/v3/playback/{video_id}/', video_id, fatal=False,
|
||||||
raise ExtractorError(
|
headers={'service-name': 'odk'}, query={'did': str(uuid.uuid4())}, expected_status=(403, 404))
|
||||||
'Unable to access page. You may have been blocked.',
|
if not traverse_obj(data, ('result', {dict})):
|
||||||
expected=True)
|
msg = traverse_obj(data, ('messages', '__default'), 'title', expected_type=str)
|
||||||
|
raise ExtractorError(msg or 'Got empty response from playback API', expected=True)
|
||||||
|
|
||||||
if 'msg_block_01.png' in webpage:
|
data = data['result']
|
||||||
self.raise_geo_restricted(
|
|
||||||
msg='This content is not available in your region',
|
|
||||||
countries=self._GEO_COUNTRIES)
|
|
||||||
|
|
||||||
if 'This video is only available to ODK PLUS members.' in webpage:
|
def try_geo_bypass(url):
|
||||||
raise ExtractorError(
|
return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
|
||||||
'This video is only available to ODK PLUS members.',
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
if 'ODK PREMIUM Members Only' in webpage:
|
def try_upgrade_quality(url):
|
||||||
raise ExtractorError(
|
mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
|
||||||
'This video is only available to ODK PREMIUM members.',
|
return mod_url if mod_url != url and self._request_webpage(
|
||||||
expected=True)
|
HEADRequest(mod_url), video_id, note='Checking for higher quality format',
|
||||||
|
errnote='No higher quality format found', fatal=False) else url
|
||||||
|
|
||||||
title = self._search_regex(
|
formats = []
|
||||||
r'class=["\']episode_title["\'][^>]*>([^<]+)',
|
for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
|
||||||
webpage, 'episode_title', fatal=False) or self._og_search_title(webpage)
|
formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False))
|
||||||
|
|
||||||
jw_config = self._parse_json(
|
subtitles = {}
|
||||||
self._search_regex((
|
for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
|
||||||
r'(?P<options>{\s*[\'"]tracks[\'"].*?})[)\];]+$',
|
subtitles.setdefault(track.get('language', 'und'), []).append({
|
||||||
r'playlist\s*=\s*\[(?P<options>.+)];?$',
|
'url': track['url'],
|
||||||
r'odkPlayer\.init.*?(?P<options>{[^;]+}).*?;',
|
'ext': track.get('codec'),
|
||||||
), webpage, 'jw config', flags=re.MULTILINE | re.DOTALL, group='options'),
|
'name': track.get('label'),
|
||||||
video_id, transform_source=js_to_json)
|
})
|
||||||
info = self._parse_jwplayer_data(
|
|
||||||
jw_config, video_id, require_title=False, m3u8_id='hls',
|
|
||||||
base_url=url)
|
|
||||||
|
|
||||||
info.update({
|
def if_series(key=None):
|
||||||
'title': title,
|
return lambda obj: obj[key] if key and obj['kind'] == 'series' else None
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage)
|
return {
|
||||||
})
|
'id': video_id,
|
||||||
return info
|
'title': join_nonempty(
|
||||||
|
('episode', 'program', 'title'),
|
||||||
|
('episode', 'title'), from_dict=data, delim=': '),
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}),
|
||||||
|
'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}),
|
||||||
|
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}),
|
||||||
|
'series': ('episode', {if_series(key='program')}, 'title'),
|
||||||
|
'series_id': ('episode', {if_series(key='program')}, 'id'),
|
||||||
|
'episode': ('episode', {if_series(key='title')}),
|
||||||
|
'episode_number': ('episode', {if_series(key='number')}, {int_or_none}),
|
||||||
|
}, get_all=False),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class OnDemandKoreaProgramIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/(?P<id>[a-z0-9-]+)(?:$|#)'
|
||||||
|
_GEO_COUNTRIES = ['US', 'CA']
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.ondemandkorea.com/player/vod/uskn-news',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'uskn-news',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 755,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ondemandkorea.com/en/player/vod/the-land',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-land',
|
||||||
|
},
|
||||||
|
'playlist_count': 52,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _fetch_page(self, display_id, page):
|
||||||
|
page += 1
|
||||||
|
page_data = self._download_json(
|
||||||
|
f'https://odkmedia.io/odx/api/v3/program/{display_id}/episodes/', display_id,
|
||||||
|
headers={'service-name': 'odk'}, query={
|
||||||
|
'page': page,
|
||||||
|
'page_size': self._PAGE_SIZE,
|
||||||
|
}, note=f'Downloading page {page}', expected_status=404)
|
||||||
|
for episode in traverse_obj(page_data, ('result', 'results', ...)):
|
||||||
|
yield self.url_result(
|
||||||
|
f'https://www.ondemandkorea.com/player/vod/{display_id}?contentId={episode["id"]}',
|
||||||
|
ie=OnDemandKoreaIE, video_title=episode.get('title'))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
entries = OnDemandPagedList(functools.partial(
|
||||||
|
self._fetch_page, display_id), self._PAGE_SIZE)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, display_id)
|
||||||
|
Loading…
Reference in New Issue
Block a user