mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[pornhub] Separate and fix playlist extractor (#700)
Closes #680 Authored by: mzbaulhaque
This commit is contained in:
		| @@ -1044,6 +1044,7 @@ from .pornhd import PornHdIE | ||||
| from .pornhub import ( | ||||
|     PornHubIE, | ||||
|     PornHubUserIE, | ||||
|     PornHubPlaylistIE, | ||||
|     PornHubPagedVideoListIE, | ||||
|     PornHubUserVideosUploadIE, | ||||
| ) | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import functools | ||||
| import itertools | ||||
| import math | ||||
| import operator | ||||
| import re | ||||
|  | ||||
| @@ -638,7 +639,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): | ||||
|  | ||||
|  | ||||
| class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): | ||||
|     _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE | ||||
|     _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.pornhub.com/model/zoe_ph/videos', | ||||
|         'only_matching': True, | ||||
| @@ -731,18 +732,6 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): | ||||
|     }, { | ||||
|         'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.pornhub.com/playlist/44121572', | ||||
|         'info_dict': { | ||||
|             'id': 'playlist/44121572', | ||||
|         }, | ||||
|         'playlist_mincount': 132, | ||||
|     }, { | ||||
|         'url': 'https://www.pornhub.com/playlist/4667351', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://de.pornhub.com/playlist/4667351', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos', | ||||
|         'only_matching': True, | ||||
| @@ -770,3 +759,59 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): | ||||
|         'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|  | ||||
| class PornHubPlaylistIE(PornHubPlaylistBaseIE): | ||||
|     _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.pornhub.com/playlist/44121572', | ||||
|         'info_dict': { | ||||
|             'id': '44121572', | ||||
|         }, | ||||
|         'playlist_count': 77, | ||||
|     }, { | ||||
|         'url': 'https://www.pornhub.com/playlist/4667351', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://de.pornhub.com/playlist/4667351', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://de.pornhub.com/playlist/4667351?page=2', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _entries(self, url, host, item_id): | ||||
|         webpage = self._download_webpage(url, item_id, 'Downloading page 1') | ||||
|         playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id') | ||||
|         video_count = int_or_none( | ||||
|             self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count')) | ||||
|         token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token') | ||||
|         page_count = math.ceil((video_count - 36) / 40.) + 1 | ||||
|         page_entries = self._extract_entries(webpage, host) | ||||
|  | ||||
|         def download_page(page_num): | ||||
|             note = 'Downloading page {}'.format(page_num) | ||||
|             page_url = 'https://www.{}/playlist/viewChunked'.format(host) | ||||
|             return self._download_webpage(page_url, item_id, note, query={ | ||||
|                 'id': playlist_id, | ||||
|                 'page': page_num, | ||||
|                 'token': token, | ||||
|             }) | ||||
|  | ||||
|         for page_num in range(1, page_count + 1): | ||||
|             if page_num > 1: | ||||
|                 webpage = download_page(page_num) | ||||
|                 page_entries = self._extract_entries(webpage, host) | ||||
|             if not page_entries: | ||||
|                 break | ||||
|             for e in page_entries: | ||||
|                 yield e | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         host = mobj.group('host') | ||||
|         item_id = mobj.group('id') | ||||
|  | ||||
|         self._login(host) | ||||
|  | ||||
|         return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 mzbaulhaque
					mzbaulhaque