[generic] Restrict share-videos.se embeds regex to filter bogus URLs (#16115)

This commit is contained in:
Sergey M․ 2018-04-09 00:25:44 +07:00
parent 1fc37ca3f1
commit d3431dcb90
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -1974,10 +1974,10 @@ class GenericIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '83645793', 'id': '83645793',
'title': 'Lock up and get excited', 'title': 'Lock up and get excited',
'thumbnail': r're:^https?://.*\.jpg(\?.*)?$',
'ext': 'mp4' 'ext': 'mp4'
} },
} 'skip': 'TODO: fix nested playlists processing in tests',
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -2973,6 +2973,13 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key()) xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)]
if sharevideos_urls:
return self.playlist_from_matches(
sharevideos_urls, video_id, video_title)
def merge_dicts(dict1, dict2): def merge_dicts(dict1, dict2):
merged = {} merged = {}
for k, v in dict1.items(): for k, v in dict1.items():
@ -2988,14 +2995,6 @@ class GenericIE(InfoExtractor):
merged[k] = v merged[k] = v
return merged return merged
# Look for Share-Videos.se embeds
sharevideosse_urls = [m.group('url') for m in re.finditer(
r'<iframe[^>]+?src\s*=\s*(["\'])(?P<url>https?://embed\.share-videos\.se/auto/embed/\d+.+?)\1',
webpage)]
if sharevideosse_urls:
return self.playlist_from_matches(
sharevideosse_urls, video_id, video_title)
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries: