mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[vimeo:likes] Support large like lists (Fixes #3847)
This commit is contained in:
		| @@ -22,7 +22,8 @@ from youtube_dl.utils import ( | ||||
|     fix_xml_ampersands, | ||||
|     get_meta_content, | ||||
|     orderedSet, | ||||
|     PagedList, | ||||
|     OnDemandPagedList, | ||||
|     InAdvancePagedList, | ||||
|     parse_duration, | ||||
|     read_batch_urls, | ||||
|     sanitize_filename, | ||||
| @@ -246,10 +247,14 @@ class TestUtil(unittest.TestCase): | ||||
|                 for i in range(firstid, upto): | ||||
|                     yield i | ||||
|  | ||||
|             pl = PagedList(get_page, pagesize) | ||||
|             pl = OnDemandPagedList(get_page, pagesize) | ||||
|             got = pl.getslice(*sliceargs) | ||||
|             self.assertEqual(got, expected) | ||||
|  | ||||
|             iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize) | ||||
|             got = iapl.getslice(*sliceargs) | ||||
|             self.assertEqual(got, expected) | ||||
|  | ||||
|         testPL(5, 2, (), [0, 1, 2, 3, 4]) | ||||
|         testPL(5, 2, (1,), [1, 2, 3, 4]) | ||||
|         testPL(5, 2, (2,), [2, 3, 4]) | ||||
|   | ||||
| @@ -8,18 +8,19 @@ import itertools | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     clean_html, | ||||
|     get_element_by_attribute, | ||||
|     compat_urlparse, | ||||
|     ExtractorError, | ||||
|     get_element_by_attribute, | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|     RegexNotFoundError, | ||||
|     smuggle_url, | ||||
|     std_headers, | ||||
|     unsmuggle_url, | ||||
|     urlencode_postdata, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -533,32 +534,55 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): | ||||
|  | ||||
|  | ||||
| class VimeoLikesIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes(?:$|[?#])' | ||||
|     _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)' | ||||
|     IE_NAME = 'vimeo:likes' | ||||
|     IE_DESC = 'Vimeo user likes' | ||||
|     _TEST = { | ||||
|         'url': 'https://vimeo.com/user20132939/likes', | ||||
|         'playlist_mincount': 4, | ||||
|         'add_ies': ['Generic'], | ||||
|         'url': 'https://vimeo.com/user755559/likes/', | ||||
|         'playlist_mincount': 293, | ||||
|         "info_dict": { | ||||
|             "description": "Videos Philipp Hagemeister likes on Vimeo.", | ||||
|             "title": "Vimeo / Philipp Hagemeister's likes", | ||||
|         }, | ||||
|         'params': { | ||||
|             'extract_flat': False, | ||||
|             "description": "See all the videos urza likes", | ||||
|             "title": 'Videos urza likes', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         user_id = self._match_id(url) | ||||
|         rss_url = '%s//vimeo.com/user%s/likes/rss' % ( | ||||
|             self.http_scheme(), user_id) | ||||
|         surl = smuggle_url(rss_url, { | ||||
|             'force_videoid': '%s_likes' % user_id, | ||||
|             'to_generic': True, | ||||
|         }) | ||||
|         webpage = self._download_webpage(url, user_id) | ||||
|         page_count = self._int( | ||||
|             self._search_regex( | ||||
|                 r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)"> | ||||
|                     .*?</a></li>\s*<li\s+class="pagination_next"> | ||||
|                 ''', webpage, 'page count'), | ||||
|             'page count', fatal=True) | ||||
|         PAGE_SIZE = 12 | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         def _get_page(idx): | ||||
|             page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % ( | ||||
|                 self.http_scheme(), user_id, idx + 1) | ||||
|             webpage = self._download_webpage( | ||||
|                 page_url, user_id, | ||||
|                 note='Downloading page %d/%d' % (idx + 1, page_count)) | ||||
|             video_list = self._search_regex( | ||||
|                 r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>', | ||||
|                 webpage, 'video content') | ||||
|             paths = re.findall( | ||||
|                 r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list) | ||||
|             for path in paths: | ||||
|                 yield { | ||||
|                     '_type': 'url', | ||||
|                     'url': compat_urlparse.urljoin(page_url, path), | ||||
|                 } | ||||
|  | ||||
|         pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url', | ||||
|             'url': surl, | ||||
|             '_type': 'playlist', | ||||
|             'id': 'user%s_likes' % user_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'entries': pl, | ||||
|         } | ||||
|   | ||||
| @@ -26,7 +26,7 @@ from ..utils import ( | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     PagedList, | ||||
|     OnDemandPagedList, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     orderedSet, | ||||
| @@ -1341,7 +1341,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
|                     'id': video_id, | ||||
|                     'title': title, | ||||
|                 } | ||||
|         url_results = PagedList(download_page, self._GDATA_PAGE_SIZE) | ||||
|         url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE) | ||||
|  | ||||
|         return self.playlist_result(url_results, playlist_title=username) | ||||
|  | ||||
|   | ||||
| @@ -1384,14 +1384,16 @@ def check_executable(exe, args=[]): | ||||
|  | ||||
|  | ||||
| class PagedList(object): | ||||
|     def __init__(self, pagefunc, pagesize): | ||||
|         self._pagefunc = pagefunc | ||||
|         self._pagesize = pagesize | ||||
|  | ||||
|     def __len__(self): | ||||
|         # This is only useful for tests | ||||
|         return len(self.getslice()) | ||||
|  | ||||
|  | ||||
| class OnDemandPagedList(PagedList): | ||||
|     def __init__(self, pagefunc, pagesize): | ||||
|         self._pagefunc = pagefunc | ||||
|         self._pagesize = pagesize | ||||
|  | ||||
|     def getslice(self, start=0, end=None): | ||||
|         res = [] | ||||
|         for pagenum in itertools.count(start // self._pagesize): | ||||
| @@ -1430,6 +1432,35 @@ class PagedList(object): | ||||
|         return res | ||||
|  | ||||
|  | ||||
| class InAdvancePagedList(PagedList): | ||||
|     def __init__(self, pagefunc, pagecount, pagesize): | ||||
|         self._pagefunc = pagefunc | ||||
|         self._pagecount = pagecount | ||||
|         self._pagesize = pagesize | ||||
|  | ||||
|     def getslice(self, start=0, end=None): | ||||
|         res = [] | ||||
|         start_page = start // self._pagesize | ||||
|         end_page = ( | ||||
|             self._pagecount if end is None else (end // self._pagesize + 1)) | ||||
|         skip_elems = start - start_page * self._pagesize | ||||
|         only_more = None if end is None else end - start | ||||
|         for pagenum in range(start_page, end_page): | ||||
|             page = list(self._pagefunc(pagenum)) | ||||
|             if skip_elems: | ||||
|                 page = page[skip_elems:] | ||||
|                 skip_elems = None | ||||
|             if only_more is not None: | ||||
|                 if len(page) < only_more: | ||||
|                     only_more -= len(page) | ||||
|                 else: | ||||
|                     page = page[:only_more] | ||||
|                     res.extend(page) | ||||
|                     break | ||||
|             res.extend(page) | ||||
|         return res | ||||
|  | ||||
|  | ||||
| def uppercase_escape(s): | ||||
|     unicode_escape = codecs.getdecoder('unicode_escape') | ||||
|     return re.sub( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister