mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[vimeo:likes] Support large like lists (Fixes #3847)
This commit is contained in:
		| @@ -22,7 +22,8 @@ from youtube_dl.utils import ( | |||||||
|     fix_xml_ampersands, |     fix_xml_ampersands, | ||||||
|     get_meta_content, |     get_meta_content, | ||||||
|     orderedSet, |     orderedSet, | ||||||
|     PagedList, |     OnDemandPagedList, | ||||||
|  |     InAdvancePagedList, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|     read_batch_urls, |     read_batch_urls, | ||||||
|     sanitize_filename, |     sanitize_filename, | ||||||
| @@ -246,10 +247,14 @@ class TestUtil(unittest.TestCase): | |||||||
|                 for i in range(firstid, upto): |                 for i in range(firstid, upto): | ||||||
|                     yield i |                     yield i | ||||||
|  |  | ||||||
|             pl = PagedList(get_page, pagesize) |             pl = OnDemandPagedList(get_page, pagesize) | ||||||
|             got = pl.getslice(*sliceargs) |             got = pl.getslice(*sliceargs) | ||||||
|             self.assertEqual(got, expected) |             self.assertEqual(got, expected) | ||||||
|  |  | ||||||
|  |             iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize) | ||||||
|  |             got = iapl.getslice(*sliceargs) | ||||||
|  |             self.assertEqual(got, expected) | ||||||
|  |  | ||||||
|         testPL(5, 2, (), [0, 1, 2, 3, 4]) |         testPL(5, 2, (), [0, 1, 2, 3, 4]) | ||||||
|         testPL(5, 2, (1,), [1, 2, 3, 4]) |         testPL(5, 2, (1,), [1, 2, 3, 4]) | ||||||
|         testPL(5, 2, (2,), [2, 3, 4]) |         testPL(5, 2, (2,), [2, 3, 4]) | ||||||
|   | |||||||
| @@ -8,18 +8,19 @@ import itertools | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .subtitles import SubtitlesInfoExtractor | from .subtitles import SubtitlesInfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     clean_html, | ||||||
|     compat_HTTPError, |     compat_HTTPError, | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|     clean_html, |     compat_urlparse, | ||||||
|     get_element_by_attribute, |  | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     get_element_by_attribute, | ||||||
|  |     InAdvancePagedList, | ||||||
|  |     int_or_none, | ||||||
|     RegexNotFoundError, |     RegexNotFoundError, | ||||||
|     smuggle_url, |  | ||||||
|     std_headers, |     std_headers, | ||||||
|     unsmuggle_url, |     unsmuggle_url, | ||||||
|     urlencode_postdata, |     urlencode_postdata, | ||||||
|     int_or_none, |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -533,32 +534,55 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): | |||||||
|  |  | ||||||
|  |  | ||||||
| class VimeoLikesIE(InfoExtractor): | class VimeoLikesIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes(?:$|[?#])' |     _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)' | ||||||
|     IE_NAME = 'vimeo:likes' |     IE_NAME = 'vimeo:likes' | ||||||
|     IE_DESC = 'Vimeo user likes' |     IE_DESC = 'Vimeo user likes' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'https://vimeo.com/user20132939/likes', |         'url': 'https://vimeo.com/user755559/likes/', | ||||||
|         'playlist_mincount': 4, |         'playlist_mincount': 293, | ||||||
|         'add_ies': ['Generic'], |  | ||||||
|         "info_dict": { |         "info_dict": { | ||||||
|             "description": "Videos Philipp Hagemeister likes on Vimeo.", |             "description": "See all the videos urza likes", | ||||||
|             "title": "Vimeo / Philipp Hagemeister's likes", |             "title": 'Videos urza likes', | ||||||
|         }, |  | ||||||
|         'params': { |  | ||||||
|             'extract_flat': False, |  | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         user_id = self._match_id(url) |         user_id = self._match_id(url) | ||||||
|         rss_url = '%s//vimeo.com/user%s/likes/rss' % ( |         webpage = self._download_webpage(url, user_id) | ||||||
|             self.http_scheme(), user_id) |         page_count = self._int( | ||||||
|         surl = smuggle_url(rss_url, { |             self._search_regex( | ||||||
|             'force_videoid': '%s_likes' % user_id, |                 r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)"> | ||||||
|             'to_generic': True, |                     .*?</a></li>\s*<li\s+class="pagination_next"> | ||||||
|         }) |                 ''', webpage, 'page count'), | ||||||
|  |             'page count', fatal=True) | ||||||
|  |         PAGE_SIZE = 12 | ||||||
|  |         title = self._html_search_regex( | ||||||
|  |             r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False) | ||||||
|  |         description = self._html_search_meta('description', webpage) | ||||||
|  |  | ||||||
|  |         def _get_page(idx): | ||||||
|  |             page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % ( | ||||||
|  |                 self.http_scheme(), user_id, idx + 1) | ||||||
|  |             webpage = self._download_webpage( | ||||||
|  |                 page_url, user_id, | ||||||
|  |                 note='Downloading page %d/%d' % (idx + 1, page_count)) | ||||||
|  |             video_list = self._search_regex( | ||||||
|  |                 r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>', | ||||||
|  |                 webpage, 'video content') | ||||||
|  |             paths = re.findall( | ||||||
|  |                 r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list) | ||||||
|  |             for path in paths: | ||||||
|  |                 yield { | ||||||
|  |                     '_type': 'url', | ||||||
|  |                     'url': compat_urlparse.urljoin(page_url, path), | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |         pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             '_type': 'url', |             '_type': 'playlist', | ||||||
|             'url': surl, |             'id': 'user%s_likes' % user_id, | ||||||
|  |             'title': title, | ||||||
|  |             'description': description, | ||||||
|  |             'entries': pl, | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -26,7 +26,7 @@ from ..utils import ( | |||||||
|     get_element_by_attribute, |     get_element_by_attribute, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     PagedList, |     OnDemandPagedList, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     orderedSet, |     orderedSet, | ||||||
| @@ -1341,7 +1341,7 @@ class YoutubeUserIE(InfoExtractor): | |||||||
|                     'id': video_id, |                     'id': video_id, | ||||||
|                     'title': title, |                     'title': title, | ||||||
|                 } |                 } | ||||||
|         url_results = PagedList(download_page, self._GDATA_PAGE_SIZE) |         url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE) | ||||||
|  |  | ||||||
|         return self.playlist_result(url_results, playlist_title=username) |         return self.playlist_result(url_results, playlist_title=username) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1384,14 +1384,16 @@ def check_executable(exe, args=[]): | |||||||
|  |  | ||||||
|  |  | ||||||
| class PagedList(object): | class PagedList(object): | ||||||
|     def __init__(self, pagefunc, pagesize): |  | ||||||
|         self._pagefunc = pagefunc |  | ||||||
|         self._pagesize = pagesize |  | ||||||
|  |  | ||||||
|     def __len__(self): |     def __len__(self): | ||||||
|         # This is only useful for tests |         # This is only useful for tests | ||||||
|         return len(self.getslice()) |         return len(self.getslice()) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class OnDemandPagedList(PagedList): | ||||||
|  |     def __init__(self, pagefunc, pagesize): | ||||||
|  |         self._pagefunc = pagefunc | ||||||
|  |         self._pagesize = pagesize | ||||||
|  |  | ||||||
|     def getslice(self, start=0, end=None): |     def getslice(self, start=0, end=None): | ||||||
|         res = [] |         res = [] | ||||||
|         for pagenum in itertools.count(start // self._pagesize): |         for pagenum in itertools.count(start // self._pagesize): | ||||||
| @@ -1430,6 +1432,35 @@ class PagedList(object): | |||||||
|         return res |         return res | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class InAdvancePagedList(PagedList): | ||||||
|  |     def __init__(self, pagefunc, pagecount, pagesize): | ||||||
|  |         self._pagefunc = pagefunc | ||||||
|  |         self._pagecount = pagecount | ||||||
|  |         self._pagesize = pagesize | ||||||
|  |  | ||||||
|  |     def getslice(self, start=0, end=None): | ||||||
|  |         res = [] | ||||||
|  |         start_page = start // self._pagesize | ||||||
|  |         end_page = ( | ||||||
|  |             self._pagecount if end is None else (end // self._pagesize + 1)) | ||||||
|  |         skip_elems = start - start_page * self._pagesize | ||||||
|  |         only_more = None if end is None else end - start | ||||||
|  |         for pagenum in range(start_page, end_page): | ||||||
|  |             page = list(self._pagefunc(pagenum)) | ||||||
|  |             if skip_elems: | ||||||
|  |                 page = page[skip_elems:] | ||||||
|  |                 skip_elems = None | ||||||
|  |             if only_more is not None: | ||||||
|  |                 if len(page) < only_more: | ||||||
|  |                     only_more -= len(page) | ||||||
|  |                 else: | ||||||
|  |                     page = page[:only_more] | ||||||
|  |                     res.extend(page) | ||||||
|  |                     break | ||||||
|  |             res.extend(page) | ||||||
|  |         return res | ||||||
|  |  | ||||||
|  |  | ||||||
| def uppercase_escape(s): | def uppercase_escape(s): | ||||||
|     unicode_escape = codecs.getdecoder('unicode_escape') |     unicode_escape = codecs.getdecoder('unicode_escape') | ||||||
|     return re.sub( |     return re.sub( | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister