[extractor/common] Add validation for JSON-LD URLs

This commit is contained in:
Sergey M․ 2018-10-29 00:19:08 +07:00
parent 4c237ab787
commit bebef10909
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -69,6 +69,7 @@ from ..utils import (
update_url_query, update_url_query,
urljoin, urljoin,
url_basename, url_basename,
url_or_none,
xpath_element, xpath_element,
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
@ -1213,10 +1214,10 @@ class InfoExtractor(object):
def extract_video_object(e): def extract_video_object(e):
assert e['@type'] == 'VideoObject' assert e['@type'] == 'VideoObject'
info.update({ info.update({
'url': e.get('contentUrl'), 'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')), 'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')), 'description': unescapeHTML(e.get('description')),
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')), 'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')), 'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')), 'filesize': float_or_none(e.get('contentSize')),