From 7be9ccff0bc82c0dd48b88b4f2570e5b504da9f3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 10 Aug 2021 03:40:40 +0530 Subject: [PATCH] [utils] Fix `InAdvancePagedList.__getitem__` Since it didn't have any cache, the page was re-fetched for each video. * Also generalized the cache code --- yt_dlp/utils.py | 63 +++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4d83b1fbe..0e8392fdf 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4041,15 +4041,31 @@ class LazyList(collections.abc.Sequence): return repr(self.exhaust()) -class PagedList(object): +class PagedList: def __len__(self): # This is only useful for tests return len(self.getslice()) - def getslice(self, start, end): + def __init__(self, pagefunc, pagesize, use_cache=True): + self._pagefunc = pagefunc + self._pagesize = pagesize + self._use_cache = use_cache + self._cache = {} + + def getpage(self, pagenum): + page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum)) + if self._use_cache: + self._cache[pagenum] = page_results + return page_results + + def getslice(self, start=0, end=None): + return list(self._getslice(start, end)) + + def _getslice(self, start, end): raise NotImplementedError('This method must be implemented by subclasses') def __getitem__(self, idx): + # NOTE: cache must be enabled if this is used if not isinstance(idx, int) or idx < 0: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) @@ -4057,42 +4073,26 @@ class PagedList(object): class OnDemandPagedList(PagedList): - def __init__(self, pagefunc, pagesize, use_cache=True): - self._pagefunc = pagefunc - self._pagesize = pagesize - self._use_cache = use_cache - if use_cache: - self._cache = {} - - def getslice(self, start=0, end=None): - res = [] + def _getslice(self, start, end): for pagenum in itertools.count(start // self._pagesize): firstid = pagenum * self._pagesize nextfirstid = pagenum * self._pagesize + self._pagesize if start >= nextfirstid: continue - page_results = None - if self._use_cache: - page_results = self._cache.get(pagenum) - if page_results is None: - page_results = list(self._pagefunc(pagenum)) - if self._use_cache: - self._cache[pagenum] = page_results - startv = ( start % self._pagesize if firstid <= start < nextfirstid else 0) - endv = ( ((end - 1) % self._pagesize) + 1 if (end is not None and firstid <= end <= nextfirstid) else None) + page_results = self.getpage(pagenum) if startv != 0 or endv is not None: page_results = page_results[startv:endv] - res.extend(page_results) + yield from page_results # A little optimization - if current page is not "full", ie. does # not contain page_size videos then we can assume that this page @@ -4105,36 +4105,31 @@ class OnDemandPagedList(PagedList): # break out early as well if end == nextfirstid: break - return res class InAdvancePagedList(PagedList): def __init__(self, pagefunc, pagecount, pagesize): - self._pagefunc = pagefunc self._pagecount = pagecount - self._pagesize = pagesize + PagedList.__init__(self, pagefunc, pagesize, True) - def getslice(self, start=0, end=None): - res = [] + def _getslice(self, start, end): start_page = start // self._pagesize end_page = ( self._pagecount if end is None else (end // self._pagesize + 1)) skip_elems = start - start_page * self._pagesize only_more = None if end is None else end - start for pagenum in range(start_page, end_page): - page = list(self._pagefunc(pagenum)) + page_results = self.getpage(pagenum) if skip_elems: - page = page[skip_elems:] + page_results = page_results[skip_elems:] skip_elems = None if only_more is not None: - if len(page) < only_more: - only_more -= len(page) + if len(page_results) < only_more: + only_more -= len(page_results) else: - page = page[:only_more] - res.extend(page) + yield from page_results[:only_more] break - res.extend(page) - return res + yield from page_results def uppercase_escape(s):