[extractor/ninegag] Extract uploader (#4597)

Closes #4587
Authored by: DjesonPV
This commit is contained in:
Djeson 2022-08-07 22:21:53 +02:00 committed by GitHub
parent a416623436
commit 298d9c0e89
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -3,7 +3,7 @@ from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
int_or_none, int_or_none,
try_get, traverse_obj,
unescapeHTML, unescapeHTML,
url_or_none, url_or_none,
) )
@ -11,18 +11,20 @@ from ..utils import (
class NineGagIE(InfoExtractor): class NineGagIE(InfoExtractor):
IE_NAME = '9gag' IE_NAME = '9gag'
IE_DESC = '9GAG'
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://9gag.com/gag/ae5Ag7B', 'url': 'https://9gag.com/gag/ae5Ag7B',
'info_dict': { 'info_dict': {
'id': 'ae5Ag7B', 'id': 'ae5Ag7B',
'ext': 'mp4', 'ext': 'webm',
'title': 'Capybara Agility Training', 'title': 'Capybara Agility Training',
'upload_date': '20191108', 'upload_date': '20191108',
'timestamp': 1573237208, 'timestamp': 1573237208,
'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg',
'categories': ['Awesome'], 'categories': ['Awesome'],
'tags': ['Weimaraner', 'American Pit Bull Terrier'], 'tags': ['Awesome'],
'duration': 44, 'duration': 44,
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
@ -32,6 +34,26 @@ class NineGagIE(InfoExtractor):
# HTML escaped title # HTML escaped title
'url': 'https://9gag.com/gag/av5nvyb', 'url': 'https://9gag.com/gag/av5nvyb',
'only_matching': True, 'only_matching': True,
}, {
# Non Anonymous Uploader
'url': 'https://9gag.com/gag/ajgp66G',
'info_dict': {
'id': 'ajgp66G',
'ext': 'webm',
'title': 'Master Shifu! Or Splinter! You decide:',
'upload_date': '20220806',
'timestamp': 1659803411,
'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg',
'categories': ['Funny'],
'tags': ['Funny'],
'duration': 26,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'uploader': 'Peter Klaus',
'uploader_id': 'peterklaus12',
'uploader_url': 'https://9gag.com/u/peterklaus12',
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -46,8 +68,6 @@ class NineGagIE(InfoExtractor):
'The given url does not contain a video', 'The given url does not contain a video',
expected=True) expected=True)
title = unescapeHTML(post['title'])
duration = None duration = None
formats = [] formats = []
thumbnails = [] thumbnails = []
@ -98,7 +118,7 @@ class NineGagIE(InfoExtractor):
formats.append(common) formats.append(common)
self._sort_formats(formats) self._sort_formats(formats)
section = try_get(post, lambda x: x['postSection']['name']) section = traverse_obj(post, ('postSection', 'name'))
tags = None tags = None
post_tags = post.get('tags') post_tags = post.get('tags')
@ -110,18 +130,19 @@ class NineGagIE(InfoExtractor):
continue continue
tags.append(tag_key) tags.append(tag_key)
get_count = lambda x: int_or_none(post.get(x + 'Count'))
return { return {
'id': post_id, 'id': post_id,
'title': title, 'title': unescapeHTML(post.get('title')),
'timestamp': int_or_none(post.get('creationTs')), 'timestamp': int_or_none(post.get('creationTs')),
'duration': duration, 'duration': duration,
'uploader': traverse_obj(post, ('creator', 'fullName')),
'uploader_id': traverse_obj(post, ('creator', 'username')),
'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))),
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'like_count': get_count('upVote'), 'like_count': int_or_none(post.get('upVoteCount')),
'dislike_count': get_count('downVote'), 'dislike_count': int_or_none(post.get('downVoteCount')),
'comment_count': get_count('comments'), 'comment_count': int_or_none(post.get('commentsCount')),
'age_limit': 18 if post.get('nsfw') == 1 else None, 'age_limit': 18 if post.get('nsfw') == 1 else None,
'categories': [section] if section else None, 'categories': [section] if section else None,
'tags': tags, 'tags': tags,