mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-22 18:17:17 -05:00
[ie/sproutvideo] Add extractors (#10098)
Closes #2933, Closes #8942 Authored by: bashonly, TheZ3ro Co-authored-by: thezero <io@thezero.org>
This commit is contained in:
parent
5dbac313ae
commit
d6c2c2bc84
@ -1928,6 +1928,10 @@ from .spreaker import (
|
|||||||
)
|
)
|
||||||
from .springboardplatform import SpringboardPlatformIE
|
from .springboardplatform import SpringboardPlatformIE
|
||||||
from .sprout import SproutIE
|
from .sprout import SproutIE
|
||||||
|
from .sproutvideo import (
|
||||||
|
SproutVideoIE,
|
||||||
|
VidsIoIE,
|
||||||
|
)
|
||||||
from .srgssr import (
|
from .srgssr import (
|
||||||
SRGSSRIE,
|
SRGSSRIE,
|
||||||
SRGSSRPlayIE,
|
SRGSSRPlayIE,
|
||||||
|
@ -2,6 +2,7 @@ import itertools
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .sproutvideo import VidsIoIE
|
||||||
from .vimeo import VimeoIE
|
from .vimeo import VimeoIE
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -12,6 +13,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -305,22 +307,27 @@ class PatreonIE(PatreonBaseIE):
|
|||||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
|
||||||
|
headers = {'referer': 'https://patreon.com/'}
|
||||||
|
|
||||||
# handle Vimeo embeds
|
# handle Vimeo embeds
|
||||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||||
v_url = urllib.parse.unquote(self._html_search_regex(
|
v_url = urllib.parse.unquote(self._html_search_regex(
|
||||||
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
||||||
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
||||||
if url_or_none(v_url) and self._request_webpage(
|
if url_or_none(v_url) and self._request_webpage(
|
||||||
v_url, video_id, 'Checking Vimeo embed URL',
|
v_url, video_id, 'Checking Vimeo embed URL', headers=headers, fatal=False, errnote=False):
|
||||||
headers={'Referer': 'https://patreon.com/'},
|
|
||||||
fatal=False, errnote=False):
|
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||||
VimeoIE, url_transparent=True))
|
VimeoIE, url_transparent=True))
|
||||||
|
|
||||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||||
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
|
if embed_url and (urlh := self._request_webpage(
|
||||||
entries.append(self.url_result(embed_url))
|
embed_url, video_id, 'Checking embed URL', headers=headers,
|
||||||
|
fatal=False, errnote=False, expected_status=403)):
|
||||||
|
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
|
||||||
|
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
|
||||||
|
entries.append(self.url_result(smuggle_url(embed_url, headers)))
|
||||||
|
|
||||||
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
||||||
if post_file:
|
if post_file:
|
||||||
|
198
yt_dlp/extractor/sproutvideo.py
Normal file
198
yt_dlp/extractor/sproutvideo.py
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
import base64
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
qualities,
|
||||||
|
remove_start,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class SproutVideoIE(InfoExtractor):
|
||||||
|
_NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+'
|
||||||
|
_VALID_URL = rf'https?:{_NO_SCHEME_RE}'
|
||||||
|
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
|
||||||
|
'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4c9dddb01910e3c9c4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
|
||||||
|
'duration': 576,
|
||||||
|
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27',
|
||||||
|
'md5': 'cebae5cf558cca83271917cf4ec03f26',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a79fdcb21f1be2c62e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'HS_01_Live Stream 2023-01-14 10:00',
|
||||||
|
'duration': 703,
|
||||||
|
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# http formats 'sd' and 'hd' are available
|
||||||
|
'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
|
||||||
|
'md5': 'f368c78df07e78a749508b221528672c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '119cd6bc1a18e6cd98',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '3. Updating your Partner details',
|
||||||
|
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||||
|
'duration': 60,
|
||||||
|
},
|
||||||
|
'params': {'format': 'hd'},
|
||||||
|
}, {
|
||||||
|
# subtitles
|
||||||
|
'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd',
|
||||||
|
'md5': '7f6798f037d7a3e3e07e67959de68fc6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '119dd8ba121ee0cc98',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Recipients Setup - Domestic Wire Only',
|
||||||
|
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||||
|
'duration': 77,
|
||||||
|
'subtitles': {'en': 'count:1'},
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4c9dddb01910e3c9c4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
|
||||||
|
'duration': 576,
|
||||||
|
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8'
|
||||||
|
_QUALITIES = ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _policy_to_qs(policy, signature_key, as_string=False):
|
||||||
|
query = {}
|
||||||
|
for key, value in policy['signatures'][signature_key].items():
|
||||||
|
query[remove_start(key, 'CloudFront-')] = value
|
||||||
|
query['sessionID'] = policy['sessionID']
|
||||||
|
return urllib.parse.urlencode(query, doseq=True) if as_string else query
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||||
|
if embed_url.startswith('//'):
|
||||||
|
embed_url = f'https:{embed_url}'
|
||||||
|
yield smuggle_url(embed_url, {'referer': url})
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
|
||||||
|
data = self._search_json(
|
||||||
|
r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
|
||||||
|
end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
|
||||||
|
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
headers = {
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Origin': 'https://videos.sproutvideo.com',
|
||||||
|
'Referer': url,
|
||||||
|
}
|
||||||
|
|
||||||
|
# HLS extraction is fatal; only attempt it if the JSON data says it's available
|
||||||
|
if traverse_obj(data, 'hls'):
|
||||||
|
manifest_query = self._policy_to_qs(data, 'm')
|
||||||
|
fragment_query = self._policy_to_qs(data, 't', as_string=True)
|
||||||
|
key_query = self._policy_to_qs(data, 'k', as_string=True)
|
||||||
|
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
self._M3U8_URL_TMPL.format(**data), video_id, 'mp4',
|
||||||
|
m3u8_id='hls', headers=headers, query=manifest_query))
|
||||||
|
for fmt in formats:
|
||||||
|
fmt.update({
|
||||||
|
'url': update_url_query(fmt['url'], manifest_query),
|
||||||
|
'extra_param_to_segment_url': fragment_query,
|
||||||
|
'extra_param_to_key_url': key_query,
|
||||||
|
})
|
||||||
|
|
||||||
|
if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||||
|
quality = qualities(self._QUALITIES)
|
||||||
|
acodec = 'none' if data.get('has_audio') is False else None
|
||||||
|
formats.extend([{
|
||||||
|
'format_id': str(format_id),
|
||||||
|
'url': format_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'quality': quality(format_id),
|
||||||
|
'acodec': acodec,
|
||||||
|
} for format_id, format_url in downloads])
|
||||||
|
|
||||||
|
for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))):
|
||||||
|
subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({
|
||||||
|
'url': sub_data['src'],
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'http_headers': headers,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'thumbnail': ('posterframe_url', {url_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VidsIoIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vids.io'
|
||||||
|
_VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming',
|
||||||
|
'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '799cd8b11c10efc1f0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How to Video: Live Streaming',
|
||||||
|
'duration': 2787,
|
||||||
|
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||||
|
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403)
|
||||||
|
|
||||||
|
if urlh.status == 403:
|
||||||
|
password = self.get_param('videopassword')
|
||||||
|
if not password:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is password-protected; use the --video-password option', expected=True)
|
||||||
|
try:
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, display_id, 'Submitting video password',
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'password': password,
|
||||||
|
**self._hidden_inputs(webpage),
|
||||||
|
}))
|
||||||
|
# Requests with user's session cookie `_sproutvideo_session` are now authorized
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||||
|
raise ExtractorError('Incorrect password', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None):
|
||||||
|
return self.url_result(embed_url, SproutVideoIE, video_id)
|
||||||
|
|
||||||
|
raise ExtractorError('Unable to extract any SproutVideo embed url')
|
Loading…
Reference in New Issue
Block a user