From fb2d1ee6cc259d2a23ac6f20dea3fce5fcf9af1a Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Fri, 5 Nov 2021 21:31:34 +0000 Subject: [PATCH] [Instagram] Add IOS URL support (#1560) Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/instagram.py | 43 +++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9f818a12f..e984f51b5 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -588,6 +588,7 @@ from .indavideo import IndavideoEmbedIE from .infoq import InfoQIE from .instagram import ( InstagramIE, + InstagramIOSIE, InstagramUserIE, InstagramTagIE, ) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index c4036d096..4694c9a33 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -73,6 +73,48 @@ class InstagramBaseIE(InfoExtractor): self._login() +class InstagramIOSIE(InfoExtractor): + _VALID_URL = r'instagram://media\?id=(?P[\d_]+)' + _TESTS = [{ + 'url': 'instagram://media?id=482584233761418119', + 'md5': '0d2da106a9d2631273e192b372806516', + 'info_dict': { + 'id': 'aye83DjauH', + 'ext': 'mp4', + 'title': 'Video by naomipq', + 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 0, + 'timestamp': 1371748545, + 'upload_date': '20130620', + 'uploader_id': 'naomipq', + 'uploader': 'B E A U T Y F O R A S H E S', + 'like_count': int, + 'comment_count': int, + 'comments': list, + }, + 'add_ie': ['Instagram'] + }] + + def _get_id(self, id): + """Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id""" + chrs = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_' + media_id = int(id.split('_')[0]) + shortened_id = '' + while media_id > 0: + r = media_id % 64 + media_id = (media_id - r) // 64 + shortened_id = chrs[r] + shortened_id + return shortened_id + + def _real_extract(self, url): + return { + '_type': 'url_transparent', + 'url': f'http://instagram.com/tv/{self._get_id(self._match_id(url))}/', + 'ie_key': 'Instagram', + } + + class InstagramIE(InstagramBaseIE): _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' _TESTS = [{ @@ -348,7 +390,6 @@ class InstagramIE(InstagramBaseIE): class InstagramPlaylistBaseIE(InstagramBaseIE): - _gis_tmpl = None # used to cache GIS request type def _parse_graphql(self, webpage, item_id):