diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 967010826..fa7b5313c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -434,6 +434,9 @@ from .cpac import ( from .cracked import CrackedIE from .crackle import CrackleIE from .craftsy import CraftsyIE +from .createacademy import ( + CreateAcademyIE, +) from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( CrowdBunkerChannelIE, diff --git a/yt_dlp/extractor/createacademy.py b/yt_dlp/extractor/createacademy.py new file mode 100644 index 000000000..705837da3 --- /dev/null +++ b/yt_dlp/extractor/createacademy.py @@ -0,0 +1,105 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, +) + + +class CreateAcademyIE(InfoExtractor): + _VALID_URL = r'https://www.createacademy.com/(?:[^/]+/)*lessons/(?P[^/?#]+)' + + _TESTS = [ + { + 'url': 'https://www.createacademy.com/courses/dan-pearson/lessons/meet-dan', + 'info_dict': { + 'id': '265', + 'ext': 'mp4', + 'title': 'Create Academy - s10e01 - Meet Dan', + 'description': 'md5:48c8af37219020571a84d5f406e75d86', + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/6222962662001/22f75006-c49f-4d95-8673-1b60df4223d2/45d953e0-fa58-4cb6-9217-1c7b3c80c932/1280x720/match/image.jpg', + }, + }, + ] + + def _get_lesson_metadata(self, data, lesson_id): + prefix = 'Create Academy - s' + str(data['props']['course']['id']) + 'e' + + for section in data['props']['course']['curriculum']['sections']: + for lesson in section['lessons']: + if lesson['id'] == lesson_id: + if lesson['number'] < 10: + num = '0' + str(lesson['number']) + else: + num = str(lesson['number']) + + return { + 'section_data': section, + 'title': prefix + num + ' - ' + lesson['title'].strip() + } + + return { + 'section_data': { + 'id': 0, + 'number': 0, + 'title': '', + }, + 'title': prefix + '00 - ' + data['props']['lesson']['title'].strip() + } + + def _get_policy_key(self, data, video_id): + accountId = data['props']['brightcove']['accountId'] + playerId = data['props']['brightcove']['playerId'] + + playerData = self._download_webpage(f'https://players.brightcove.net/{accountId}/{playerId}_default/index.min.js', video_id, 'Retrieving policy key') + obj = re.search(r'{policyKey:"(.*?)"}', playerData) + key = re.search(r'"(.*?)"', obj.group()) + + return key.group().replace('"', '') + + + def _get_manifest_url(self, data, video_id): + hostVideoId = data['props']['lesson']['video']['host_video_id'] + accountId = data['props']['brightcove']['accountId'] + policyKey = self._get_policy_key(data, video_id) + + manifestData = self._download_json(f'https://edge.api.brightcove.com/playback/v1/accounts/{accountId}/videos/{hostVideoId}', video_id, 'Retrieving manifest URL', headers={'accept': f'application/json;pk={policyKey}'}) + + for source in manifestData['sources']: + if 'master.m3u8' in source['src']: + return source['src'] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # parse + page_elem = self._search_regex(r'(]+>)', webpage, 'div') + attributes = extract_attributes(page_elem) + data = json.loads(attributes['data-page']) + createacademy_id = data['props']['lesson']['id'] + + # get media from manifest + manifestUrl = self._get_manifest_url(data, video_id) + + formats, subtitles = [], {} + fmts, subs = self._extract_m3u8_formats_and_subtitles(manifestUrl, str(createacademy_id), 'mp4') + + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + lesson_metadata = self._get_lesson_metadata(data, createacademy_id) + + return { + 'id': str(createacademy_id), + 'title': lesson_metadata['title'], + 'display_id': video_id, + 'description': data['props']['lesson']['description'], + 'thumbnail': data['props']['lesson']['thumbnail'], + 'formats': formats, + 'subtitles': subtitles, + 'chapter': lesson_metadata['section_data']['title'].strip(), + 'chapter_number': lesson_metadata['section_data']['number'], + 'chapter_id': str(lesson_metadata['section_data']['id']), + }