From 6a656a843a629ceef6979976a353d177c97b9527 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 4 Dec 2013 20:35:00 +0100 Subject: [PATCH 1/5] Update description value for the write_info_json test (required after 27dcce19045670fc348ff1119c0d2283aaed3ae2) --- test/test_write_info_json.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_write_info_json.py b/test/test_write_info_json.py index d7177611b..90426a559 100644 --- a/test/test_write_info_json.py +++ b/test/test_write_info_json.py @@ -33,6 +33,7 @@ TEST_ID = 'BaW_jenozKc' INFO_JSON_FILE = TEST_ID + '.info.json' DESCRIPTION_FILE = TEST_ID + '.mp4.description' EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐 +test URL: https://github.com/rg3/youtube-dl/issues/1892 This is a test video for youtube-dl. From bfb9f7bc4c5c6fd9b2d3d46be133988f70534d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 4 Dec 2013 20:36:26 +0100 Subject: [PATCH 2/5] [hotnewhiphop] Update test's title --- youtube_dl/extractor/hotnewhiphop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 3798118a7..0ee74fb38 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor): u'file': u'1435540.mp3', u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', u'info_dict': { - u"title": u"Freddie Gibbs - Lay It Down" + u"title": u'Freddie Gibbs "Lay It Down"' } } From e9bf7479d209c2623753628201ca0daffa19f3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 4 Dec 2013 23:28:40 +0100 Subject: [PATCH 3/5] Add an extractor for theplatform.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/theplatform.py | 69 +++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 youtube_dl/extractor/theplatform.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bd996483b..900a6f02f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -144,6 +144,7 @@ from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE from .tf1 import TF1IE +from .theplatform import ThePlatformIE from .thisav import ThisAVIE from .toutv import TouTvIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py new file mode 100644 index 000000000..d1d6a4c2c --- /dev/null +++ b/youtube_dl/extractor/theplatform.py @@ -0,0 +1,69 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( + xpath_with_ns, + find_xpath_attr, +) + +_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) + + +class ThePlatformIE(InfoExtractor): + _VALID_URL = r'https?://link\.theplatform\.com/s/[^/]+/(?P[^/\?]+)' + + _TEST = { + # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ + u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true', + u'info_dict': { + u'id': u'e9I_cZgTgIPd', + u'ext': u'flv', + u'title': u'Blackberry\'s big, bold Z30', + u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.', + u'duration': 247, + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + } + + def _get_info(self, video_id): + smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' + 'format=smil&mbr=true'.format(video_id)) + meta = self._download_xml(smil_url, video_id) + info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id) + info_json = self._download_webpage(info_url, video_id) + info = json.loads(info_json) + + head = meta.find(_x('smil:head')) + body = meta.find(_x('smil:body')) + base_url = head.find(_x('smil:meta')).attrib['base'] + switch = body.find(_x('smil:switch')) + formats = [] + for f in switch.findall(_x('smil:video')): + attr = f.attrib + formats.append({ + 'url': base_url, + 'play_path': 'mp4:' + attr['src'], + 'ext': 'flv', + 'width': int(attr['width']), + 'height': int(attr['height']), + 'vbr': int(attr['system-bitrate']), + }) + formats.sort(key=lambda f: (f['height'], f['width'], f['vbr'])) + + return { + 'id': video_id, + 'title': info['title'], + 'formats': formats, + 'description': info['description'], + 'thumbnail': info['defaultThumbnailUrl'], + 'duration': info['duration']//1000, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + return self._get_info(video_id) From b9a2c53833a3cebc32df908aad74f7c5a3537aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 4 Dec 2013 23:43:50 +0100 Subject: [PATCH 4/5] [metacafe] Add support for cbs videos (fixes #1838) They use theplatform.com --- youtube_dl/extractor/metacafe.py | 29 +++++++++++++++++++++++++---- youtube_dl/extractor/theplatform.py | 2 +- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 91480ba87..e59bdd604 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -69,6 +69,21 @@ class MetacafeIE(InfoExtractor): u'age_limit': 18, }, }, + # cbs video + { + u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/', + u'info_dict': { + u'id': u'0rOxMBabDXN6', + u'ext': u'flv', + u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet', + u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d', + u'duration': 129, + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + }, ] @@ -106,10 +121,16 @@ class MetacafeIE(InfoExtractor): video_id = mobj.group(1) - # Check if video comes from YouTube - mobj2 = re.match(r'^yt-(.*)$', video_id) - if mobj2 is not None: - return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')] + # the video may come from an external site + m_external = re.match('^(\w{2})-(.*)$', video_id) + if m_external is not None: + prefix, ext_id = m_external.groups() + # Check if video comes from YouTube + if prefix == 'yt': + return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube') + # CBS videos use theplatform.com + if prefix == 'cb': + return self.url_result('theplatform:%s' % ext_id, 'ThePlatform') # Retrieve video webpage to extract further information req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index d1d6a4c2c..920689511 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -11,7 +11,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language class ThePlatformIE(InfoExtractor): - _VALID_URL = r'https?://link\.theplatform\.com/s/[^/]+/(?P[^/\?]+)' + _VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P[^/\?]+)' _TEST = { # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ From 673d1273ff6f6d3267728fbe6f79c9c801598fd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 5 Dec 2013 12:41:58 +0100 Subject: [PATCH 5/5] [vevo] Support '/watch/{id}' urls --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index d8bfcd155..3eedcf7dd 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -15,7 +15,7 @@ class VevoIE(InfoExtractor): Accepts urls from vevo.com or in the format 'vevo:{id}' (currently used by MTVIE) """ - _VALID_URL = r'((http://www\.vevo\.com/watch/.*?/.*?/)|(vevo:))(?P.*?)(\?|$)' + _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P.*?)(\?|$)' _TESTS = [{ u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'file': u'GB1101300280.mp4',