[extractor/americastestkitchen] Fix extractor (#5343)

Fix `_VALID_URL` and season extraction

Closes #5343
Authored by: bashonly
This commit is contained in:
bashonly 2022-10-24 10:16:56 +00:00 committed by GitHub
parent 2530b68d44
commit c66ed4e2e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -11,7 +11,7 @@ from ..utils import (
class AmericasTestKitchenIE(InfoExtractor): class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers', 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f', 'md5': 'b861c3e365ac38ad319cfd509c30577f',
@ -19,15 +19,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5b400b9ee338f922cb06450c', 'id': '5b400b9ee338f922cb06450c',
'title': 'Japanese Suppers', 'title': 'Japanese Suppers',
'ext': 'mp4', 'ext': 'mp4',
'display_id': 'weeknight-japanese-suppers',
'description': 'md5:64e606bfee910627efc4b5f050de92b3', 'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://', 'timestamp': 1523304000,
'timestamp': 1523318400, 'upload_date': '20180409',
'upload_date': '20180410', 'release_date': '20180409',
'release_date': '20180410', 'series': 'America\'s Test Kitchen',
'series': "America's Test Kitchen", 'season': 'Season 18',
'season_number': 18,
'episode': 'Japanese Suppers', 'episode': 'Japanese Suppers',
'season_number': 18,
'episode_number': 15, 'episode_number': 15,
'duration': 1376,
'thumbnail': r're:^https?://',
'average_rating': 0,
'view_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -40,15 +45,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5fbe8c61bda2010001c6763b', 'id': '5fbe8c61bda2010001c6763b',
'title': 'Simple Chicken Dinner', 'title': 'Simple Chicken Dinner',
'ext': 'mp4', 'ext': 'mp4',
'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7', 'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
'thumbnail': r're:^https?://', 'timestamp': 1610737200,
'timestamp': 1610755200, 'upload_date': '20210115',
'upload_date': '20210116', 'release_date': '20210115',
'release_date': '20210116', 'series': 'America\'s Test Kitchen',
'series': "America's Test Kitchen", 'season': 'Season 21',
'season_number': 21,
'episode': 'Simple Chicken Dinner', 'episode': 'Simple Chicken Dinner',
'season_number': 21,
'episode_number': 3, 'episode_number': 3,
'duration': 1397,
'thumbnail': r're:^https?://',
'view_count': int,
'average_rating': 0,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -57,10 +67,10 @@ class AmericasTestKitchenIE(InfoExtractor):
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do', 'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington', 'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
'only_matching': True, 'only_matching': True,
}] }]
@ -90,7 +100,7 @@ class AmericasTestKitchenIE(InfoExtractor):
class AmericasTestKitchenSeasonIE(InfoExtractor): class AmericasTestKitchenSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# ATK Season # ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1', 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@ -101,7 +111,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'playlist_count': 13, 'playlist_count': 13,
}, { }, {
# Cooks Country Season # Cooks Country Season
'url': 'https://www.cookscountry.com/episodes/browse/season_12', 'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
'info_dict': { 'info_dict': {
'id': 'season_12', 'id': 'season_12',
'title': 'Season 12', 'title': 'Season 12',
@ -110,17 +120,17 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_name, season_number = self._match_valid_url(url).groups() show_path, season_number = self._match_valid_url(url).group('show', 'id')
season_number = int(season_number) season_number = int(season_number)
slug = 'atk' if show_name == 'americastestkitchen' else 'cco' slug = 'cco' if show_path == '/cookscountry' else 'atk'
season = 'Season %d' % season_number season = 'Season %d' % season_number
season_search = self._download_json( season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={ season, headers={
'Origin': 'https://www.%s.com' % show_name, 'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30', 'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={ }, query={
@ -136,12 +146,12 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
def entries(): def entries():
for episode in (season_search.get('hits') or []): for episode in (season_search.get('hits') or []):
search_url = episode.get('search_url') search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
if not search_url: if not search_url:
continue continue
yield { yield {
'_type': 'url', '_type': 'url',
'url': 'https://www.%s.com%s' % (show_name, search_url), 'url': f'https://www.americastestkitchen.com{show_path or ""}{search_url}',
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]), 'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
'title': episode.get('title'), 'title': episode.get('title'),
'description': episode.get('description'), 'description': episode.get('description'),