From bf393fe1b10ea0709d68ecf4db439976aa4f76fc Mon Sep 17 00:00:00 2001 From: Huntington Pennington Date: Tue, 18 Jul 2023 09:17:36 -0400 Subject: [PATCH 1/2] [megatube] Add extractor --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/megatube.py | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/megatube.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 2af99b3da..7f1dac92e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1058,6 +1058,7 @@ from .mediastream import ( from .mediaworksnz import MediaWorksNZVODIE from .medici import MediciIE from .megaphone import MegaphoneIE +from .megatube import MegaTubeIE from .meipai import MeipaiIE from .melonvod import MelonVODIE from .meta import METAIE diff --git a/yt_dlp/extractor/megatube.py b/yt_dlp/extractor/megatube.py new file mode 100644 index 000000000..5843c7fd9 --- /dev/null +++ b/yt_dlp/extractor/megatube.py @@ -0,0 +1,51 @@ +from .common import InfoExtractor +from ..utils import ( + js_to_json, +) + + +class MegaTubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?megatube\.xxx/videos/(?P\d+)/(?P[^/]+)' + + _TESTS = [{ + 'url': 'https://www.megatube.xxx/videos/104245/brunette-abbie-cat-with-big-fake-tits-gets-the-fuck-she-wants/', + 'md5': '8df46b28ce6d7ea00cb4d09adf881cd3', + 'info_dict': { + 'id': '104245', + 'ext': 'mp4', + 'title': 'Brunette Abbie Cat with big fake tits gets the fuck she wants', + 'thumbnail': 'https://www.megatube.xxx/contents/videos_sources/104000/104245/screenshots/9.jpg', + 'age_limit': 18, + 'cookies': str + } + }, { + 'url': 'https://www.megatube.xxx/videos/104245/brunette-abbie-cat-with-big-fake-tits-gets-the-fuck-she-wants/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + data_json = self._search_json( + r'var\s?flashvars\s?=\s?', webpage, 'data', video_id, + transform_source=js_to_json) + + video_url = data_json.get('video_url') + thumbnail = data_json.get('preview_url') + + title = (self._og_search_title( + webpage, default=None) or self._html_extract_title( + webpage).split("(")[0] + ).strip() + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'age_limit': 18, + 'ext': 'mp4', + 'cookies': 'Domain=.megatube.xxx; Path=/' + } From 4e1d7fa791df4b37f94d51f450b9d172016931ea Mon Sep 17 00:00:00 2001 From: Huntington Pennington Date: Fri, 28 Jul 2023 06:59:13 -0400 Subject: [PATCH 2/2] Updated title extraction method to allow '(' in title but only if it is not trailing from the tag --- yt_dlp/extractor/megatube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/megatube.py b/yt_dlp/extractor/megatube.py index 5843c7fd9..095295eee 100644 --- a/yt_dlp/extractor/megatube.py +++ b/yt_dlp/extractor/megatube.py @@ -36,8 +36,8 @@ class MegaTubeIE(InfoExtractor): thumbnail = data_json.get('preview_url') title = (self._og_search_title( - webpage, default=None) or self._html_extract_title( - webpage).split("(")[0] + webpage, default=None) or re.sub("(.*)\(.*\)", "\\1", + self._html_extract_title(webpage)).strip() ).strip() return {