2014-07-25 18:05:37 -04:00
from __future__ import unicode_literals
import re
from . common import InfoExtractor
2015-08-29 13:08:55 -04:00
from . . utils import (
ExtractorError ,
js_to_json ,
2015-08-29 13:11:56 -04:00
int_or_none ,
2016-08-21 19:06:39 -04:00
parse_iso8601 ,
2015-08-29 13:08:55 -04:00
)
2014-07-25 18:05:37 -04:00
class ABCIE ( InfoExtractor ) :
IE_NAME = ' abc.net.au '
2016-09-08 07:29:05 -04:00
_VALID_URL = r ' https?://(?:www \ .)?abc \ .net \ .au/news/(?:[^/]+/) { 1,2}(?P<id> \ d+) '
2014-07-25 18:05:37 -04:00
2015-08-29 13:08:55 -04:00
_TESTS = [ {
2014-11-05 08:30:39 -05:00
' url ' : ' http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334 ' ,
' md5 ' : ' cb3dd03b18455a661071ee1e28344d9f ' ,
2014-07-25 18:05:37 -04:00
' info_dict ' : {
2014-11-05 08:30:39 -05:00
' id ' : ' 5868334 ' ,
2014-07-25 18:05:37 -04:00
' ext ' : ' mp4 ' ,
2014-11-05 08:30:39 -05:00
' title ' : ' Australia to help staff Ebola treatment centre in Sierra Leone ' ,
' description ' : ' md5:809ad29c67a05f54eb41f2a105693a67 ' ,
2014-07-25 18:05:37 -04:00
} ,
2015-12-21 07:07:52 -05:00
' skip ' : ' this video has expired ' ,
2015-08-29 13:08:55 -04:00
} , {
' url ' : ' http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326 ' ,
' md5 ' : ' db2a5369238b51f9811ad815b69dc086 ' ,
' info_dict ' : {
' id ' : ' NvqvPeNZsHU ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20150816 ' ,
' uploader ' : ' ABC News (Australia) ' ,
' description ' : ' Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote " an inclusive Australia, not a divided one. " . Read more here: http://ab.co/1Mwc6ef ' ,
' uploader_id ' : ' NewsOnABC ' ,
' title ' : ' Marriage Equality: Warren Entsch introduces same sex marriage bill ' ,
} ,
' add_ie ' : [ ' Youtube ' ] ,
2015-12-21 07:07:52 -05:00
' skip ' : ' Not accessible from Travis CI server ' ,
2015-10-23 02:09:41 -04:00
} , {
' url ' : ' http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080 ' ,
' md5 ' : ' b96eee7c9edf4fc5a358a0252881cc1f ' ,
' info_dict ' : {
' id ' : ' 6880080 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' NAB lifts interest rates, following Westpac and CBA ' ,
' description ' : ' md5:f13d8edc81e462fce4a0437c7dc04728 ' ,
} ,
2015-10-24 06:31:42 -04:00
} , {
' url ' : ' http://www.abc.net.au/news/2015-10-19/6866214 ' ,
' only_matching ' : True ,
2015-08-29 13:08:55 -04:00
} ]
2014-07-25 18:05:37 -04:00
def _real_extract ( self , url ) :
2014-09-28 03:31:58 -04:00
video_id = self . _match_id ( url )
2014-07-25 18:05:37 -04:00
webpage = self . _download_webpage ( url , video_id )
2015-08-29 13:08:55 -04:00
mobj = re . search (
2015-10-23 02:09:41 -04:00
r ' inline(?P<type>Video|Audio|YouTube)Data \ .push \ ((?P<json_data>[^)]+) \ ); ' ,
2015-08-29 13:08:55 -04:00
webpage )
if mobj is None :
2015-12-21 07:07:52 -05:00
expired = self . _html_search_regex ( r ' (?s)class= " expired-(?:video|audio) " .+?<span>(.+?)</span> ' , webpage , ' expired ' , None )
if expired :
raise ExtractorError ( ' %s said: %s ' % ( self . IE_NAME , expired ) , expected = True )
2015-08-29 13:08:55 -04:00
raise ExtractorError ( ' Unable to extract video urls ' )
urls_info = self . _parse_json (
mobj . group ( ' json_data ' ) , video_id , transform_source = js_to_json )
if not isinstance ( urls_info , list ) :
urls_info = [ urls_info ]
if mobj . group ( ' type ' ) == ' YouTube ' :
return self . playlist_result ( [
self . url_result ( url_info [ ' url ' ] ) for url_info in urls_info ] )
2014-07-25 18:05:37 -04:00
formats = [ {
' url ' : url_info [ ' url ' ] ,
2015-10-23 02:09:41 -04:00
' vcodec ' : url_info . get ( ' codec ' ) if mobj . group ( ' type ' ) == ' Video ' else ' none ' ,
2015-08-29 13:11:56 -04:00
' width ' : int_or_none ( url_info . get ( ' width ' ) ) ,
' height ' : int_or_none ( url_info . get ( ' height ' ) ) ,
' tbr ' : int_or_none ( url_info . get ( ' bitrate ' ) ) ,
' filesize ' : int_or_none ( url_info . get ( ' filesize ' ) ) ,
2014-07-25 18:05:37 -04:00
} for url_info in urls_info ]
2015-10-23 02:09:41 -04:00
2014-07-25 18:05:37 -04:00
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : self . _og_search_title ( webpage ) ,
' formats ' : formats ,
' description ' : self . _og_search_description ( webpage ) ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
}
2016-08-21 19:06:39 -04:00
class ABCIViewIE ( InfoExtractor ) :
IE_NAME = ' abc.net.au:iview '
_VALID_URL = r ' https?://iview \ .abc \ .net \ .au/programs/[^/]+/(?P<id>[^/?#]+) '
2016-09-10 16:06:00 -04:00
# ABC iview programs are normally available for 14 days only.
2016-08-21 19:06:39 -04:00
_TESTS = [ {
' url ' : ' http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00 ' ,
' md5 ' : ' 979d10b2939101f0d27a06b79edad536 ' ,
' info_dict ' : {
' id ' : ' FA1505V024S00 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Series 27 Ep 24 ' ,
' description ' : ' md5:b28baeae7504d1148e1d2f0e3ed3c15d ' ,
' upload_date ' : ' 20160820 ' ,
' uploader_id ' : ' abc1 ' ,
' timestamp ' : 1471719600 ,
} ,
2016-09-10 16:06:00 -04:00
' skip ' : ' Video gone ' ,
2016-08-21 19:06:39 -04:00
} ]
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
video_params = self . _parse_json ( self . _search_regex (
r ' videoParams \ s*= \ s*( { .+?}); ' , webpage , ' video params ' ) , video_id )
title = video_params [ ' title ' ]
stream = next ( s for s in video_params [ ' playlist ' ] if s . get ( ' type ' ) == ' program ' )
2016-08-22 02:48:40 -04:00
formats = self . _extract_akamai_formats ( stream [ ' hds-unmetered ' ] , video_id )
2016-08-21 19:06:39 -04:00
self . _sort_formats ( formats )
subtitles = { }
src_vtt = stream . get ( ' captions ' , { } ) . get ( ' src-vtt ' )
if src_vtt :
subtitles [ ' en ' ] = [ {
' url ' : src_vtt ,
' ext ' : ' vtt ' ,
} ]
return {
' id ' : video_id ,
' title ' : title ,
' description ' : self . _html_search_meta ( [ ' og:description ' , ' twitter:description ' ] , webpage ) ,
' thumbnail ' : self . _html_search_meta ( [ ' og:image ' , ' twitter:image:src ' ] , webpage ) ,
' duration ' : int_or_none ( video_params . get ( ' eventDuration ' ) ) ,
' timestamp ' : parse_iso8601 ( video_params . get ( ' pubDate ' ) , ' ' ) ,
' series ' : video_params . get ( ' seriesTitle ' ) ,
' series_id ' : video_params . get ( ' seriesHouseNumber ' ) or video_id [ : 7 ] ,
' episode_number ' : int_or_none ( self . _html_search_meta ( ' episodeNumber ' , webpage ) ) ,
' episode ' : self . _html_search_meta ( ' episode_title ' , webpage ) ,
' uploader_id ' : video_params . get ( ' channel ' ) ,
' formats ' : formats ,
' subtitles ' : subtitles ,
}