[extractor/common] Case insensitive inputs extraction

This commit is contained in:
Sergey M․ 2015-09-11 20:43:05 +06:00
parent 1721fef28b
commit 73eb13dfc7

View File

@ -732,7 +732,7 @@ class InfoExtractor(object):
@staticmethod
def _hidden_inputs(html):
hidden_inputs = {}
for input in re.findall(r'<input([^>]+)>', html):
for input in re.findall(r'(?i)<input([^>]+)>', html):
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
continue
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
@ -746,7 +746,7 @@ class InfoExtractor(object):
def _form_hidden_inputs(self, form_id, html):
form = self._search_regex(
r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
html, '%s form' % form_id, group='form')
return self._hidden_inputs(form)