Merge remote-tracking branch 'dstftw/generic-webpage-unescape'

Conflicts:
	youtube_dl/extractor/generic.py
This commit is contained in:
Jaime Marquínez Ferrándiz 2014-03-21 22:14:24 +01:00
commit 0f2a2ba14b

View File

@ -185,7 +185,18 @@ class GenericIE(InfoExtractor):
'uploader': 'Ze Frank', 'uploader': 'Ze Frank',
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
} }
} },
# nowvideo embed hidden behind percent encoding
{
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
'md5': '2baf4ddd70f697d94b1c18cf796d5107',
'info_dict': {
'id': '06e53103ca9aa',
'ext': 'flv',
'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
'description': 'No description',
},
},
] ]
def report_download_webpage(self, video_id): def report_download_webpage(self, video_id):
@ -337,6 +348,11 @@ class GenericIE(InfoExtractor):
except compat_xml_parse_error: except compat_xml_parse_error:
pass pass
# Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse.unquote(webpage)
# it's tempting to parse this further, but you would # it's tempting to parse this further, but you would
# have to take into account all the variations like # have to take into account all the variations like
# Video Title - Site Name # Video Title - Site Name