[extractor/cda] Support premium and misc improvements (#5529)

* Fix cache for non-ASCII key
* Improve error messages
* Better UA for fingerprint bypass

Authored by: selfisekai
This commit is contained in:
lauren n. liberda 2022-12-27 20:57:26 +01:00 committed by GitHub
parent 15e9e578c0
commit da8d2de208
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 44 additions and 12 deletions

View File

@ -5,6 +5,7 @@ import os
import re import re
import shutil import shutil
import traceback import traceback
import urllib.parse
from .utils import expand_path, traverse_obj, version_tuple, write_json_file from .utils import expand_path, traverse_obj, version_tuple, write_json_file
from .version import __version__ from .version import __version__
@ -22,11 +23,9 @@ class Cache:
return expand_path(res) return expand_path(res)
def _get_cache_fn(self, section, key, dtype): def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}'
'invalid section %r' % section key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}')
return os.path.join(
self._get_root_dir(), section, f'{key}.{dtype}')
@property @property
def enabled(self): def enabled(self):

View File

@ -4,6 +4,7 @@ import datetime
import hashlib import hashlib
import hmac import hmac
import json import json
import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -27,11 +28,10 @@ class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_NETRC_MACHINE = 'cdapl' _NETRC_MACHINE = 'cdapl'
_BASE_URL = 'http://www.cda.pl/' _BASE_URL = 'https://www.cda.pl'
_BASE_API_URL = 'https://api.cda.pl' _BASE_API_URL = 'https://api.cda.pl'
_API_HEADERS = { _API_HEADERS = {
'Accept': 'application/vnd.cda.public+json', 'Accept': 'application/vnd.cda.public+json',
'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)',
} }
# hardcoded in the app # hardcoded in the app
_LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q' _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
@ -101,6 +101,38 @@ class CDAIE(InfoExtractor):
}, **kwargs) }, **kwargs)
def _perform_login(self, username, password): def _perform_login(self, username, password):
app_version = random.choice((
'1.2.88 build 15306',
'1.2.174 build 18469',
))
android_version = random.randrange(8, 14)
phone_model = random.choice((
# x-kom.pl top selling Android smartphones, as of 2022-12-26
# https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
'ASUS ZenFone 8',
'Motorola edge 20 5G',
'Motorola edge 30 neo 5G',
'Motorola moto g22',
'OnePlus Nord 2T 5G',
'Samsung Galaxy A32 SMA325F',
'Samsung Galaxy M13',
'Samsung Galaxy S20 FE 5G',
'Xiaomi 11T',
'Xiaomi POCO M4 Pro',
'Xiaomi Redmi 10',
'Xiaomi Redmi 10C',
'Xiaomi Redmi 9C NFC',
'Xiaomi Redmi Note 10 Pro',
'Xiaomi Redmi Note 11 Pro',
'Xiaomi Redmi Note 11',
'Xiaomi Redmi Note 11S 5G',
'Xiaomi Redmi Note 11S',
'realme 10',
'realme 9 Pro+',
'vivo Y33s',
))
self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {} cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5: if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}' self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
@ -138,9 +170,6 @@ class CDAIE(InfoExtractor):
meta = self._download_json( meta = self._download_json(
f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video'] f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
if meta.get('premium') and not meta.get('premium_free'):
self.report_drm(video_id)
uploader = traverse_obj(meta, 'author', 'login') uploader = traverse_obj(meta, 'author', 'login')
formats = [{ formats = [{
@ -151,6 +180,10 @@ class CDAIE(InfoExtractor):
'filesize': quality.get('length'), 'filesize': quality.get('length'),
} for quality in meta['qualities'] if quality.get('file')] } for quality in meta['qualities'] if quality.get('file')]
if meta.get('premium') and not meta.get('premium_free') and not formats:
raise ExtractorError(
'Video requires CDA Premium - subscription needed', expected=True)
return { return {
'id': video_id, 'id': video_id,
'title': meta.get('title'), 'title': meta.get('title'),
@ -167,10 +200,10 @@ class CDAIE(InfoExtractor):
def _web_extract(self, video_id, url): def _web_extract(self, video_id, url):
self._set_cookie('cda.pl', 'cda.player', 'html5') self._set_cookie('cda.pl', 'cda.player', 'html5')
webpage = self._download_webpage( webpage = self._download_webpage(
self._BASE_URL + '/video/' + video_id, video_id) f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage: if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True) self.raise_login_required('This video is only available for premium users')
if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage): if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
self.raise_geo_restricted() self.raise_geo_restricted()