From e3a88568b0457f18a03a2a894287a03f7cc2dbbe Mon Sep 17 00:00:00 2001 From: Yasoob Date: Sun, 11 Aug 2013 22:23:05 +0500 Subject: [PATCH 1/9] Added an IE for hark.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/hark.py | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 youtube_dl/extractor/hark.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 84c02c2ed9..ca56645779 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE +from .hark import HarkIE from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .hypem import HypemIE diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py new file mode 100644 index 0000000000..ab0a69697e --- /dev/null +++ b/youtube_dl/extractor/hark.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +import re + +from .common import InfoExtractor +from ..utils import determine_ext + +class HarkIE(InfoExtractor): + _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+' + _TEST = { + u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', + u'file': u'mmbzyhkgny.mp3', + u'md5': u'6783a58491b47b92c7c1af5a77d4cbee', + u'info_dict': { + u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id) + webpage = self._download_webpage(embed_url, video_id) + + final_url = self._search_regex(r'src="(.+?).mp3"', + webpage, 'video url')+'.mp3' + title = self._html_search_regex(r'(.+?)', + webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace( + 'Sound Clip , Quote, MP3, and Ringtone - Hark','') + + return {'id': video_id, + 'url' : final_url, + 'title': title, + 'ext': determine_ext(final_url), + } From f2aeefe29c72e7c6165a35ba8153b52f96ee42af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 24 Aug 2013 10:48:12 +0200 Subject: [PATCH 2/9] [youtube] update algo for length 84 --- devscripts/youtube_genalgo.py | 4 ++-- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 014324439f..6f1d6ef99e 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -26,9 +26,9 @@ tests = [ # 85 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), - # 84 + # 84 - vflh9ybst 2013/08/23 (sporadic) ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", - "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"), + "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"), # 83 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e4987b2b39..af01c9da02 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif len(s) == 85: return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] elif len(s) == 84: - return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] + return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84] elif len(s) == 83: return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: From d68730a56e9c26366758d66cd99f589cebad4686 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 24 Aug 2013 13:22:28 +0200 Subject: [PATCH 3/9] Add SUPER RTL NOW to RTLnow extractor --- youtube_dl/extractor/rtlnow.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index d993a990ad..67f341bcc6 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -5,8 +5,8 @@ from .common import InfoExtractor from ..utils import ExtractorError class RTLnowIE(InfoExtractor): - """Information Extractor for RTLnow, RTL2now and VOXnow""" - _VALID_URL = r'(?:http://)?(?P(?Prtl(?:(?P2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' + """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW""" + _VALID_URL = r'(?:http://)?(?P(?Prtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' _TESTS = [{ u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', u'file': u'90419.flv', @@ -43,6 +43,19 @@ class RTLnowIE(InfoExtractor): u'params': { u'skip_download': True, }, + }, + { + u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', + u'file': u'99205.flv', + u'info_dict': { + u'upload_date': u'20080928', + u'title': u'Medicopter 117 - Angst!', + u'description': u'Angst!', + u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' + }, + u'params': { + u'skip_download': True, + }, }] def _real_extract(self,url): From 9460db832ce2dc61456b2ad82e3b6190465a2133 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Aug 2013 21:10:03 +0200 Subject: [PATCH 4/9] [ro220] Add support for 220.ro --- youtube_dl/extractor/__init__.py | 3 +++ youtube_dl/extractor/ro220.py | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/ro220.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b4db8f0bf7..39c530ba37 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -57,6 +57,7 @@ from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE from .ringtv import RingTVIE +from .ro220 import Ro220IE from .roxwel import RoxwelIE from .rtlnow import RTLnowIE from .sina import SinaIE @@ -116,12 +117,14 @@ _ALL_CLASSES = [ ] _ALL_CLASSES.append(GenericIE) + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ return [klass() for klass in _ALL_CLASSES] + def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" return globals()[ie_name+'IE'] diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py new file mode 100644 index 0000000000..c32f64d997 --- /dev/null +++ b/youtube_dl/extractor/ro220.py @@ -0,0 +1,42 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + compat_parse_qs, +) + + +class Ro220IE(InfoExtractor): + IE_NAME = '220.ro' + _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)' + _TEST = { + u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/", + u'file': u'LYV6doKo7f.mp4', + u'md5': u'03af18b73a07b4088753930db7a34add', + u'info_dict': { + u"title": u"Luati-le Banii sez 4 ep 1", + u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + webpage = self._download_webpage(url, video_id) + flashVars_str = self._search_regex( + r' Date: Sat, 24 Aug 2013 22:49:22 +0200 Subject: [PATCH 5/9] Install our own HTTPS handler as well (#1309) --- youtube_dl/utils.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52cfb8a6d1..ab1049cc0d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -476,7 +476,7 @@ def formatSeconds(secs): def make_HTTPS_handler(opts): if sys.version_info < (3,2): # Python's 2.x handler is very simplistic - return compat_urllib_request.HTTPSHandler() + return YoutubeDLHandlerHTTPS() else: import ssl context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) @@ -485,7 +485,7 @@ def make_HTTPS_handler(opts): context.verify_mode = (ssl.CERT_NONE if opts.no_check_certificate else ssl.CERT_REQUIRED) - return compat_urllib_request.HTTPSHandler(context=context) + return YoutubeDLHandlerHTTPS(context=context) class ExtractorError(Exception): """Error during info extraction.""" @@ -569,7 +569,8 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected -class YoutubeDLHandler(compat_urllib_request.HTTPHandler): + +class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler """Handler for HTTP requests and responses. This class, when installed with an OpenerDirector, automatically adds @@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): ret.code = code return ret - def http_request(self, req): - for h,v in std_headers.items(): + def _http_request(self, req): + for h, v in std_headers.items(): if h in req.headers: del req.headers[h] req.add_header(h, v) @@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['Youtubedl-user-agent'] return req - def http_response(self, req, resp): + def _http_response(self, req, resp): old_resp = resp # gzip if resp.headers.get('Content-encoding', '') == 'gzip': @@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): resp.msg = old_resp.msg return resp - https_request = http_request - https_response = http_response + +class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler): + http_request = YoutubeDLHandler_Template._http_request + http_response = YoutubeDLHandler_Template._http_response + + +class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler): + https_request = YoutubeDLHandler_Template._http_request + https_response = YoutubeDLHandler_Template._http_response + def unified_strdate(date_str): """Return a string with the date in the format YYYYMMDD""" From 0838239e8e454c55903f3d69560cf53e25ce69f4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Aug 2013 22:49:52 +0200 Subject: [PATCH 6/9] [generic] Support double slash URLs (Fixes #1309) --- youtube_dl/extractor/generic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index da016f7ee8..ccbbdd2555 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,12 +7,14 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_error, compat_urllib_parse, + compat_urllib_parse_urlparse, compat_urllib_request, ExtractorError, ) from .brightcove import BrightcoveIE + class GenericIE(InfoExtractor): IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' @@ -23,7 +25,7 @@ class GenericIE(InfoExtractor): u'file': u'13601338388002.mp4', u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', u'info_dict': { - u"uploader": u"www.hodiho.fr", + u"uploader": u"www.hodiho.fr", u"title": u"R\u00e9gis plante sa Jeep" } }, @@ -161,6 +163,8 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_url = compat_urllib_parse.unquote(mobj.group(1)) + if video_url.startswith('//'): + video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url video_id = os.path.basename(video_url) # here's a fun little line of code for you: From 9585f890f8c0eff70eb874c7962dc30baea1049c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Aug 2013 22:56:37 +0200 Subject: [PATCH 7/9] [generic] add support for relative URLs (Fixes #1308) --- youtube_dl/extractor/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ccbbdd2555..8488dca052 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -165,6 +165,8 @@ class GenericIE(InfoExtractor): video_url = compat_urllib_parse.unquote(mobj.group(1)) if video_url.startswith('//'): video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url + if '://' not in video_url: + video_url = url + ('' if url.endswith('/') else '/') + video_url video_id = os.path.basename(video_url) # here's a fun little line of code for you: From 627a91a9a827b48270d3f5e288404388946f0733 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 26 Aug 2013 21:29:31 +0200 Subject: [PATCH 8/9] [generic] small typo --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8488dca052..d034a11bbc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -126,7 +126,7 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) self.report_extraction(video_id) - # Look for BrigthCove: + # Look for BrightCove: m_brightcove = re.search(r'', webpage, re.DOTALL) if m_brightcove is not None: self.to_screen(u'Brightcove video detected.') From 976fc7d137c9f23533c9bcf5942af72d3c861b14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2EYasoob=20Ullah=20Khalid=20=E2=98=BA?= Date: Tue, 27 Aug 2013 01:00:17 +0500 Subject: [PATCH 9/9] fixed tests for c56 and dailymotion --- youtube_dl/extractor/c56.py | 4 ++-- youtube_dl/extractor/dailymotion.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py index 4c8a8af091..dc3a8d47d1 100644 --- a/youtube_dl/extractor/c56.py +++ b/youtube_dl/extractor/c56.py @@ -12,8 +12,8 @@ class C56IE(InfoExtractor): _TEST ={ u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', - u'file': u'93440716.mp4', - u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e', + u'file': u'93440716.flv', + u'md5': u'e59995ac63d0457783ea05f93f12a866', u'info_dict': { u'title': u'网事知多少 第32期:车怒', }, diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index fa8c630d05..1ea449ca82 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor): u'file': u'x33vw9.mp4', u'md5': u'392c4b85a60a90dc4792da41ce3144eb', u'info_dict': { - u"uploader": u"Alex and Van .", + u"uploader": u"Amphora Alex and Van .", u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" } }