[ie/createacademy] extract course

2025-01-05 00:47:42 -05:00 · 2024-12-17 14:18:48 -08:00 · 2024-12-17 14:18:48 -08:00 · 6eb69a3e12
commit 6eb69a3e12
parent ed0ecfe56d
2 changed files with 49 additions and 4 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -436,6 +436,7 @@ from .crackle import CrackleIE
 from .craftsy import CraftsyIE
 from .createacademy import (
    CreateAcademyIE,
+    CreateAcademyCourseIE,
 )
 from .crooksandliars import CrooksAndLiarsIE
 from .crowdbunker import (
--- a/yt_dlp/extractor/createacademy.py
+++ b/yt_dlp/extractor/createacademy.py
@ -18,6 +18,10 @@ class CreateAcademyIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Create Academy - s10e01 - Meet Dan',
                'description': 'md5:48c8af37219020571a84d5f406e75d86',
+                'display_id': 'meet-dan',
+                'chapter': 'Introduction',
+                'chapter_id': '34',
+                'chapter_number': 1,
                'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/6222962662001/22f75006-c49f-4d95-8673-1b60df4223d2/45d953e0-fa58-4cb6-9217-1c7b3c80c932/1280x720/match/image.jpg',
            },
        },
@ -70,14 +74,17 @@ class CreateAcademyIE(InfoExtractor):
            if 'master.m3u8' in source['src']:
                return source['src']

-    def _real_extract(self, url):
-        video_id = self._match_id(url)
+    def _get_page_data(self, url, video_id):
        webpage = self._download_webpage(url, video_id)

-        # parse
        page_elem = self._search_regex(r'(<div[^>]+>)', webpage, 'div')
        attributes = extract_attributes(page_elem)
-        data = json.loads(attributes['data-page'])
+
+        return json.loads(attributes['data-page'])
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        data = self._get_page_data(url, video_id)
        createacademy_id = data['props']['lesson']['id']

        # get media from manifest
@ -103,3 +110,40 @@ class CreateAcademyIE(InfoExtractor):
            'chapter_number': lesson_metadata['section_data']['number'],
            'chapter_id': str(lesson_metadata['section_data']['id']),
        }
+
+
+class CreateAcademyCourseIE(CreateAcademyIE):
+    _VALID_URL = r'https://www.createacademy.com/courses/(?P<id>[^/?#]+)'
+
+    _TESTS = [
+        {
+            'url': 'https://www.createacademy.com/courses/dan-pearson',
+            'info_dict': {
+                'id': '265',
+                'ext': 'mp4',
+                'chapter_id': '34',
+                'description': 'md5:48c8af37219020571a84d5f406e75d86',
+                'chapter_number': 1,
+                'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/6222962662001/22f75006-c49f-4d95-8673-1b60df4223d2/45d953e0-fa58-4cb6-9217-1c7b3c80c932/1280x720/match/image.jpg',
+                'title': 'Create Academy - s10e01 - Meet Dan',
+                'display_id': 'dan-pearson',
+                'chapter': 'Introduction',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        data = self._get_page_data(url, video_id)
+
+        # iterate lessons
+        entries = []
+
+        for section in data['props']['curriculum']['sections']:
+            for lesson in section['lessons']:
+                entries.append(super()._real_extract('https://www.createacademy.com' + lesson['lessonPath']))
+
+        return {
+            '_type': 'multi_video',
+            'entries': entries,
+        }