From ebf2fb4d619b7d65b40ae6bacc79bd9f3d3ceab8 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 18:42:24 +0000 Subject: [PATCH] [Vupload] Add extractor (#1146) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29877 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/vupload.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/vupload.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 8e8d269ce..b90110c7f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1713,6 +1713,7 @@ from .vtm import VTMIE from .medialaan import MedialaanIE from .vube import VubeIE from .vuclip import VuClipIE +from .vupload import VuploadIE from .vvvvid import ( VVVVIDIE, VVVVIDShowIE, diff --git a/yt_dlp/extractor/vupload.py b/yt_dlp/extractor/vupload.py new file mode 100644 index 000000000..9846ababc --- /dev/null +++ b/yt_dlp/extractor/vupload.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_filesize, + extract_attributes, + int_or_none, +) + + +class VuploadIE(InfoExtractor): + _VALID_URL = r'https://vupload\.com/v/(?P[a-z0-9]+)' + _TESTS = [{ + 'url': 'https://vupload.com/v/u28d0pl2tphy', + 'md5': '9b42a4a193cca64d80248e58527d83c8', + 'info_dict': { + 'id': 'u28d0pl2tphy', + 'ext': 'mp4', + 'description': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb', + 'title': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + video_e = self._html_search_regex(r'\|([a-z0-9]{60})\|', webpage, 'video') + video_url = f'https://wurize.megaupload.to/{video_e}/v.mp4' + duration = parse_duration(self._html_search_regex( + r'\s*([\d:]+)\s*', webpage, 'duration', fatal=False)) + filesize_approx = parse_filesize(self._html_search_regex( + r'\s*([^<]+)\s*', webpage, 'filesize', fatal=False)) + extra_video_info = extract_attributes(self._html_search_regex( + r'(]+>)', webpage, 'video_info', fatal=False)) + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'duration': duration, + 'filesize_approx': filesize_approx, + 'width': int_or_none(extra_video_info.get('width')), + 'height': int_or_none(extra_video_info.get('height')), + 'format_id': extra_video_info.get('height', '') + 'p', + 'title': title, + 'description': description, + }