mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Update to ytdl-commit-195f22f6
[generic] Improve KVS (etc) extraction
195f22f679
Closes #3716
Authored by: Grub4k, pukkandan
			
			
This commit is contained in:
		| @@ -1872,6 +1872,11 @@ from .theweatherchannel import TheWeatherChannelIE | |||||||
| from .thisamericanlife import ThisAmericanLifeIE | from .thisamericanlife import ThisAmericanLifeIE | ||||||
| from .thisav import ThisAVIE | from .thisav import ThisAVIE | ||||||
| from .thisoldhouse import ThisOldHouseIE | from .thisoldhouse import ThisOldHouseIE | ||||||
|  | from .thisvid import ( | ||||||
|  |     ThisVidIE, | ||||||
|  |     ThisVidMemberIE, | ||||||
|  |     ThisVidPlaylistIE, | ||||||
|  | ) | ||||||
| from .threespeak import ( | from .threespeak import ( | ||||||
|     ThreeSpeakIE, |     ThreeSpeakIE, | ||||||
|     ThreeSpeakUserIE, |     ThreeSpeakUserIE, | ||||||
|   | |||||||
| @@ -1396,10 +1396,16 @@ class InfoExtractor: | |||||||
|         # And then there are the jokers who advertise that they use RTA, but actually don't. |         # And then there are the jokers who advertise that they use RTA, but actually don't. | ||||||
|         AGE_LIMIT_MARKERS = [ |         AGE_LIMIT_MARKERS = [ | ||||||
|             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>', |             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>', | ||||||
|  |             r'>[^<]*you acknowledge you are at least (\d+) years old', | ||||||
|  |             r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b', | ||||||
|         ] |         ] | ||||||
|         if any(re.search(marker, html) for marker in AGE_LIMIT_MARKERS): | 
 | ||||||
|             return 18 |         age_limit = 0 | ||||||
|         return 0 |         for marker in AGE_LIMIT_MARKERS: | ||||||
|  |             mobj = re.search(marker, html) | ||||||
|  |             if mobj: | ||||||
|  |                 age_limit = max(age_limit, int(traverse_obj(mobj, 1, default=18))) | ||||||
|  |         return age_limit | ||||||
| 
 | 
 | ||||||
|     def _media_rating_search(self, html): |     def _media_rating_search(self, html): | ||||||
|         # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ |         # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ | ||||||
| @@ -3216,7 +3222,7 @@ class InfoExtractor: | |||||||
| 
 | 
 | ||||||
|     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): |     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): | ||||||
|         mobj = re.search( |         mobj = re.search( | ||||||
|             r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)', |             r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''', | ||||||
|             webpage) |             webpage) | ||||||
|         if mobj: |         if mobj: | ||||||
|             try: |             try: | ||||||
| @@ -3237,19 +3243,20 @@ class InfoExtractor: | |||||||
| 
 | 
 | ||||||
|     def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, |     def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, | ||||||
|                              m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): |                              m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): | ||||||
|         # JWPlayer backward compatibility: flattened playlists |  | ||||||
|         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 |  | ||||||
|         if 'playlist' not in jwplayer_data: |  | ||||||
|             jwplayer_data = {'playlist': [jwplayer_data]} |  | ||||||
| 
 |  | ||||||
|         entries = [] |         entries = [] | ||||||
|  |         if not isinstance(jwplayer_data, dict): | ||||||
|  |             return entries | ||||||
| 
 | 
 | ||||||
|         # JWPlayer backward compatibility: single playlist item |         playlist_items = jwplayer_data.get('playlist') | ||||||
|  |         # JWPlayer backward compatibility: single playlist item/flattened playlists | ||||||
|         # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 |         # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 | ||||||
|         if not isinstance(jwplayer_data['playlist'], list): |         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 | ||||||
|             jwplayer_data['playlist'] = [jwplayer_data['playlist']] |         if not isinstance(playlist_items, list): | ||||||
|  |             playlist_items = (playlist_items or jwplayer_data, ) | ||||||
| 
 | 
 | ||||||
|         for video_data in jwplayer_data['playlist']: |         for video_data in playlist_items: | ||||||
|  |             if not isinstance(video_data, dict): | ||||||
|  |                 continue | ||||||
|             # JWPlayer backward compatibility: flattened sources |             # JWPlayer backward compatibility: flattened sources | ||||||
|             # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 |             # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 | ||||||
|             if 'sources' not in video_data: |             if 'sources' not in video_data: | ||||||
| @@ -3287,6 +3294,13 @@ class InfoExtractor: | |||||||
|                 'timestamp': int_or_none(video_data.get('pubdate')), |                 'timestamp': int_or_none(video_data.get('pubdate')), | ||||||
|                 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), |                 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), | ||||||
|                 'subtitles': subtitles, |                 'subtitles': subtitles, | ||||||
|  |                 'alt_title': clean_html(video_data.get('subtitle')),  # attributes used e.g. by Tele5 ... | ||||||
|  |                 'genre': clean_html(video_data.get('genre')), | ||||||
|  |                 'channel': clean_html(dict_get(video_data, ('category', 'channel'))), | ||||||
|  |                 'season_number': int_or_none(video_data.get('season')), | ||||||
|  |                 'episode_number': int_or_none(video_data.get('episode')), | ||||||
|  |                 'release_year': int_or_none(video_data.get('releasedate')), | ||||||
|  |                 'age_limit': int_or_none(video_data.get('age_restriction')), | ||||||
|             } |             } | ||||||
|             # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32 |             # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32 | ||||||
|             if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']): |             if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']): | ||||||
| @@ -3304,7 +3318,7 @@ class InfoExtractor: | |||||||
| 
 | 
 | ||||||
|     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, |     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, | ||||||
|                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): |                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): | ||||||
|         urls = [] |         urls = set() | ||||||
|         formats = [] |         formats = [] | ||||||
|         for source in jwplayer_sources_data: |         for source in jwplayer_sources_data: | ||||||
|             if not isinstance(source, dict): |             if not isinstance(source, dict): | ||||||
| @@ -3313,14 +3327,14 @@ class InfoExtractor: | |||||||
|                 base_url, self._proto_relative_url(source.get('file'))) |                 base_url, self._proto_relative_url(source.get('file'))) | ||||||
|             if not source_url or source_url in urls: |             if not source_url or source_url in urls: | ||||||
|                 continue |                 continue | ||||||
|             urls.append(source_url) |             urls.add(source_url) | ||||||
|             source_type = source.get('type') or '' |             source_type = source.get('type') or '' | ||||||
|             ext = mimetype2ext(source_type) or determine_ext(source_url) |             ext = mimetype2ext(source_type) or determine_ext(source_url) | ||||||
|             if source_type == 'hls' or ext == 'm3u8': |             if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url: | ||||||
|                 formats.extend(self._extract_m3u8_formats( |                 formats.extend(self._extract_m3u8_formats( | ||||||
|                     source_url, video_id, 'mp4', entry_protocol='m3u8_native', |                     source_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||||
|                     m3u8_id=m3u8_id, fatal=False)) |                     m3u8_id=m3u8_id, fatal=False)) | ||||||
|             elif source_type == 'dash' or ext == 'mpd': |             elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url: | ||||||
|                 formats.extend(self._extract_mpd_formats( |                 formats.extend(self._extract_mpd_formats( | ||||||
|                     source_url, video_id, mpd_id=mpd_id, fatal=False)) |                     source_url, video_id, mpd_id=mpd_id, fatal=False)) | ||||||
|             elif ext == 'smil': |             elif ext == 'smil': | ||||||
| @@ -3335,13 +3349,12 @@ class InfoExtractor: | |||||||
|                     'ext': ext, |                     'ext': ext, | ||||||
|                 }) |                 }) | ||||||
|             else: |             else: | ||||||
|  |                 format_id = str_or_none(source.get('label')) | ||||||
|                 height = int_or_none(source.get('height')) |                 height = int_or_none(source.get('height')) | ||||||
|                 if height is None: |                 if height is None and format_id: | ||||||
|                     # Often no height is provided but there is a label in |                     # Often no height is provided but there is a label in | ||||||
|                     # format like "1080p", "720p SD", or 1080. |                     # format like "1080p", "720p SD", or 1080. | ||||||
|                     height = int_or_none(self._search_regex( |                     height = parse_resolution(format_id).get('height') | ||||||
|                         r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''), |  | ||||||
|                         'height', default=None)) |  | ||||||
|                 a_format = { |                 a_format = { | ||||||
|                     'url': source_url, |                     'url': source_url, | ||||||
|                     'width': int_or_none(source.get('width')), |                     'width': int_or_none(source.get('width')), | ||||||
| @@ -3349,6 +3362,7 @@ class InfoExtractor: | |||||||
|                     'tbr': int_or_none(source.get('bitrate'), scale=1000), |                     'tbr': int_or_none(source.get('bitrate'), scale=1000), | ||||||
|                     'filesize': int_or_none(source.get('filesize')), |                     'filesize': int_or_none(source.get('filesize')), | ||||||
|                     'ext': ext, |                     'ext': ext, | ||||||
|  |                     'format_id': format_id | ||||||
|                 } |                 } | ||||||
|                 if source_url.startswith('rtmp'): |                 if source_url.startswith('rtmp'): | ||||||
|                     a_format['ext'] = 'flv' |                     a_format['ext'] = 'flv' | ||||||
|   | |||||||
| @@ -32,6 +32,7 @@ from ..utils import ( | |||||||
|     unified_timestamp, |     unified_timestamp, | ||||||
|     unsmuggle_url, |     unsmuggle_url, | ||||||
|     url_or_none, |     url_or_none, | ||||||
|  |     urljoin, | ||||||
|     variadic, |     variadic, | ||||||
|     xpath_attr, |     xpath_attr, | ||||||
|     xpath_text, |     xpath_text, | ||||||
| @@ -1867,11 +1868,13 @@ class GenericIE(InfoExtractor): | |||||||
|                 'display_id': 'kelis-4th-of-july', |                 'display_id': 'kelis-4th-of-july', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': 'Kelis - 4th Of July', |                 'title': 'Kelis - 4th Of July', | ||||||
|                 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', |                 'description': 'Kelis - 4th Of July', | ||||||
|  |                 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', | ||||||
|             }, |             }, | ||||||
|             'params': { |             'params': { | ||||||
|                 'skip_download': True, |                 'skip_download': True, | ||||||
|             }, |             }, | ||||||
|  |             'expected_warnings': ['Untested major version'], | ||||||
|         }, { |         }, { | ||||||
|             # KVS Player |             # KVS Player | ||||||
|             'url': 'https://www.kvs-demo.com/embed/105/', |             'url': 'https://www.kvs-demo.com/embed/105/', | ||||||
| @@ -1880,35 +1883,12 @@ class GenericIE(InfoExtractor): | |||||||
|                 'display_id': 'kelis-4th-of-july', |                 'display_id': 'kelis-4th-of-july', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': 'Kelis - 4th Of July / Embed Player', |                 'title': 'Kelis - 4th Of July / Embed Player', | ||||||
|                 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', |                 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', | ||||||
|             }, |             }, | ||||||
|             'params': { |             'params': { | ||||||
|                 'skip_download': True, |                 'skip_download': True, | ||||||
|             }, |             }, | ||||||
|         }, { |         }, { | ||||||
|             # KVS Player |  | ||||||
|             'url': 'https://thisvid.com/videos/french-boy-pantsed/', |  | ||||||
|             'md5': '3397979512c682f6b85b3b04989df224', |  | ||||||
|             'info_dict': { |  | ||||||
|                 'id': '2400174', |  | ||||||
|                 'display_id': 'french-boy-pantsed', |  | ||||||
|                 'ext': 'mp4', |  | ||||||
|                 'title': 'French Boy Pantsed - ThisVid.com', |  | ||||||
|                 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', |  | ||||||
|             } |  | ||||||
|         }, { |  | ||||||
|             # KVS Player |  | ||||||
|             'url': 'https://thisvid.com/embed/2400174/', |  | ||||||
|             'md5': '3397979512c682f6b85b3b04989df224', |  | ||||||
|             'info_dict': { |  | ||||||
|                 'id': '2400174', |  | ||||||
|                 'display_id': 'french-boy-pantsed', |  | ||||||
|                 'ext': 'mp4', |  | ||||||
|                 'title': 'French Boy Pantsed - ThisVid.com', |  | ||||||
|                 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', |  | ||||||
|             } |  | ||||||
|         }, { |  | ||||||
|             # KVS Player |  | ||||||
|             'url': 'https://youix.com/video/leningrad-zoj/', |             'url': 'https://youix.com/video/leningrad-zoj/', | ||||||
|             'md5': '94f96ba95706dc3880812b27b7d8a2b8', |             'md5': '94f96ba95706dc3880812b27b7d8a2b8', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
| @@ -1916,8 +1896,8 @@ class GenericIE(InfoExtractor): | |||||||
|                 'display_id': 'leningrad-zoj', |                 'display_id': 'leningrad-zoj', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', |                 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', | ||||||
|                 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', |                 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # KVS Player |             # KVS Player | ||||||
|             'url': 'https://youix.com/embed/18485', |             'url': 'https://youix.com/embed/18485', | ||||||
| @@ -1927,19 +1907,20 @@ class GenericIE(InfoExtractor): | |||||||
|                 'display_id': 'leningrad-zoj', |                 'display_id': 'leningrad-zoj', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': 'Ленинград - ЗОЖ', |                 'title': 'Ленинград - ЗОЖ', | ||||||
|                 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', |                 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', | ||||||
|             } |             }, | ||||||
|         }, { |         }, { | ||||||
|             # KVS Player |             # KVS Player | ||||||
|             'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/', |             'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/', | ||||||
|             'md5': '94166bdb26b4cb1fb9214319a629fc51', |             'md5': '94166bdb26b4cb1fb9214319a629fc51', | ||||||
|             'info_dict': { |             'info_dict': { | ||||||
|                 'id': '21217', |                 'id': '21217', | ||||||
|                 'display_id': '40-nochey-40-nights-2016', |                 'display_id': '40-nochey-2016', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': '40 ночей (2016) - BogMedia.org', |                 'title': '40 ночей (2016) - BogMedia.org', | ||||||
|  |                 'description': 'md5:4e6d7d622636eb7948275432eb256dc3', | ||||||
|                 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', |                 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', | ||||||
|             } |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             # KVS Player (for sites that serve kt_player.js via non-https urls) |             # KVS Player (for sites that serve kt_player.js via non-https urls) | ||||||
| @@ -1950,8 +1931,8 @@ class GenericIE(InfoExtractor): | |||||||
|                 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source', |                 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', |                 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', | ||||||
|                 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg', |                 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg', | ||||||
|             } |             }, | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
|             # Reddit-hosted video that will redirect and be processed by RedditIE |             # Reddit-hosted video that will redirect and be processed by RedditIE | ||||||
| @@ -2169,7 +2150,20 @@ class GenericIE(InfoExtractor): | |||||||
|                 'direct': True, |                 'direct': True, | ||||||
|                 'age_limit': 0, |                 'age_limit': 0, | ||||||
|             } |             } | ||||||
|         } |         }, | ||||||
|  |         { | ||||||
|  |             'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', | ||||||
|  |             'md5': 'e2f0a4c329f7986280b7328e24036d60', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'id': '284002', | ||||||
|  |                 'display_id': 'just-out-of-the-shower-joi', | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 'title': 'Just Out Of The Shower JOI - Shooshtime', | ||||||
|  |                 'thumbnail': 'https://i.shoosh.co/contents/videos_screenshots/284000/284002/preview.mp4.jpg', | ||||||
|  |                 'height': 720, | ||||||
|  |                 'age_limit': 18, | ||||||
|  |             }, | ||||||
|  |         }, | ||||||
|     ] |     ] | ||||||
| 
 | 
 | ||||||
|     def report_following_redirect(self, new_url): |     def report_following_redirect(self, new_url): | ||||||
| @@ -2235,43 +2229,87 @@ class GenericIE(InfoExtractor): | |||||||
|             'entries': entries, |             'entries': entries, | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|     def _kvs_getrealurl(self, video_url, license_code): |     @classmethod | ||||||
|  |     def _kvs_get_real_url(cls, video_url, license_code): | ||||||
|         if not video_url.startswith('function/0/'): |         if not video_url.startswith('function/0/'): | ||||||
|             return video_url  # not obfuscated |             return video_url  # not obfuscated | ||||||
| 
 | 
 | ||||||
|         url_path, _, url_query = video_url.partition('?') |         parsed = urllib.parse.urlparse(video_url[len('function/0/'):]) | ||||||
|         urlparts = url_path.split('/')[2:] |         license = cls._kvs_get_license_token(license_code) | ||||||
|         license = self._kvs_getlicensetoken(license_code) |         urlparts = parsed.path.split('/') | ||||||
|         newmagic = urlparts[5][:32] |  | ||||||
| 
 | 
 | ||||||
|         for o in range(len(newmagic) - 1, -1, -1): |         HASH_LENGTH = 32 | ||||||
|             new = '' |         hash = urlparts[3][:HASH_LENGTH] | ||||||
|             l = (o + sum(int(n) for n in license[o:])) % 32 |         indices = list(range(HASH_LENGTH)) | ||||||
| 
 | 
 | ||||||
|             for i in range(0, len(newmagic)): |         # Swap indices of hash according to the destination calculated from the license token | ||||||
|                 if i == o: |         accum = 0 | ||||||
|                     new += newmagic[l] |         for src in reversed(range(HASH_LENGTH)): | ||||||
|                 elif i == l: |             accum += license[src] | ||||||
|                     new += newmagic[o] |             dest = (src + accum) % HASH_LENGTH | ||||||
|                 else: |             indices[src], indices[dest] = indices[dest], indices[src] | ||||||
|                     new += newmagic[i] |  | ||||||
|             newmagic = new |  | ||||||
| 
 | 
 | ||||||
|         urlparts[5] = newmagic + urlparts[5][32:] |         urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:] | ||||||
|         return '/'.join(urlparts) + '?' + url_query |         return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts))) | ||||||
| 
 | 
 | ||||||
|     def _kvs_getlicensetoken(self, license): |     @staticmethod | ||||||
|         modlicense = license.replace('$', '').replace('0', '1') |     def _kvs_get_license_token(license): | ||||||
|         center = int(len(modlicense) / 2) |         license = license.replace('$', '') | ||||||
|  |         license_values = [int(char) for char in license] | ||||||
|  | 
 | ||||||
|  |         modlicense = license.replace('0', '1') | ||||||
|  |         center = len(modlicense) // 2 | ||||||
|         fronthalf = int(modlicense[:center + 1]) |         fronthalf = int(modlicense[:center + 1]) | ||||||
|         backhalf = int(modlicense[center:]) |         backhalf = int(modlicense[center:]) | ||||||
|  |         modlicense = str(4 * abs(fronthalf - backhalf))[:center + 1] | ||||||
| 
 | 
 | ||||||
|         modlicense = str(4 * abs(fronthalf - backhalf)) |         return [ | ||||||
|         retval = '' |             (license_values[index + offset] + current) % 10 | ||||||
|         for o in range(0, center + 1): |             for index, current in enumerate(map(int, modlicense)) | ||||||
|             for i in range(1, 5): |             for offset in range(4) | ||||||
|                 retval += str((int(license[o + i]) + int(modlicense[o])) % 10) |         ] | ||||||
|         return retval | 
 | ||||||
|  |     def _extract_kvs(self, url, webpage, video_id): | ||||||
|  |         flashvars = self._search_json( | ||||||
|  |             r'(?s:<script\b[^>]*>.*?var\s+flashvars\s*=)', | ||||||
|  |             webpage, 'flashvars', video_id, transform_source=js_to_json) | ||||||
|  | 
 | ||||||
|  |         # extract the part after the last / as the display_id from the | ||||||
|  |         # canonical URL. | ||||||
|  |         display_id = self._search_regex( | ||||||
|  |             r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>' | ||||||
|  |             r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)', | ||||||
|  |             webpage, 'display_id', fatal=False) | ||||||
|  |         title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title') | ||||||
|  | 
 | ||||||
|  |         thumbnail = flashvars['preview_url'] | ||||||
|  |         if thumbnail.startswith('//'): | ||||||
|  |             protocol, _, _ = url.partition('/') | ||||||
|  |             thumbnail = protocol + thumbnail | ||||||
|  | 
 | ||||||
|  |         url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys())) | ||||||
|  |         formats = [] | ||||||
|  |         for key in url_keys: | ||||||
|  |             if '/get_file/' not in flashvars[key]: | ||||||
|  |                 continue | ||||||
|  |             format_id = flashvars.get(f'{key}_text', key) | ||||||
|  |             formats.append({ | ||||||
|  |                 'url': urljoin(url, self._kvs_get_real_url(flashvars[key], flashvars['license_code'])), | ||||||
|  |                 'format_id': format_id, | ||||||
|  |                 'ext': 'mp4', | ||||||
|  |                 **(parse_resolution(format_id) or parse_resolution(flashvars[key])), | ||||||
|  |                 'http_headers': {'Referer': url}, | ||||||
|  |             }) | ||||||
|  |             if not formats[-1].get('height'): | ||||||
|  |                 formats[-1]['quality'] = 1 | ||||||
|  | 
 | ||||||
|  |         return { | ||||||
|  |             'id': flashvars['video_id'], | ||||||
|  |             'display_id': display_id, | ||||||
|  |             'title': title, | ||||||
|  |             'thumbnail': thumbnail, | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         if url.startswith('//'): |         if url.startswith('//'): | ||||||
| @@ -2580,6 +2618,17 @@ class GenericIE(InfoExtractor): | |||||||
|                 self.report_detected('video.js embed') |                 self.report_detected('video.js embed') | ||||||
|                 return [{'formats': formats, 'subtitles': subtitles}] |                 return [{'formats': formats, 'subtitles': subtitles}] | ||||||
| 
 | 
 | ||||||
|  |         # Look for generic KVS player (before json-ld bc of some urls that break otherwise) | ||||||
|  |         found = self._search_regex(( | ||||||
|  |             r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>', | ||||||
|  |             r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,', | ||||||
|  |         ), webpage, 'KVS player', group='ver', default=False) | ||||||
|  |         if found: | ||||||
|  |             self.report_detected('KWS Player') | ||||||
|  |             if found.split('.')[0] not in ('4', '5', '6'): | ||||||
|  |                 self.report_warning(f'Untested major version ({found}) in player engine - download may fail.') | ||||||
|  |             return [self._extract_kvs(url, webpage, video_id)] | ||||||
|  | 
 | ||||||
|         # Looking for http://schema.org/VideoObject |         # Looking for http://schema.org/VideoObject | ||||||
|         json_ld = self._search_json_ld(webpage, video_id, default={}) |         json_ld = self._search_json_ld(webpage, video_id, default={}) | ||||||
|         if json_ld.get('url') not in (url, None): |         if json_ld.get('url') not in (url, None): | ||||||
| @@ -2622,52 +2671,6 @@ class GenericIE(InfoExtractor): | |||||||
|                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) |                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) | ||||||
|             if found: |             if found: | ||||||
|                 self.report_detected('JW Player embed') |                 self.report_detected('JW Player embed') | ||||||
|         if not found: |  | ||||||
|             # Look for generic KVS player |  | ||||||
|             found = re.search(r'<script [^>]*?src="https?://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage) |  | ||||||
|             if found: |  | ||||||
|                 self.report_detected('KWS Player') |  | ||||||
|                 if found.group('maj_ver') not in ['4', '5']: |  | ||||||
|                     self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver')) |  | ||||||
|                 flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage) |  | ||||||
|                 flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json) |  | ||||||
| 
 |  | ||||||
|                 # extract the part after the last / as the display_id from the |  | ||||||
|                 # canonical URL. |  | ||||||
|                 display_id = self._search_regex( |  | ||||||
|                     r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>' |  | ||||||
|                     r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)', |  | ||||||
|                     webpage, 'display_id', fatal=False |  | ||||||
|                 ) |  | ||||||
|                 title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title') |  | ||||||
| 
 |  | ||||||
|                 thumbnail = flashvars['preview_url'] |  | ||||||
|                 if thumbnail.startswith('//'): |  | ||||||
|                     protocol, _, _ = url.partition('/') |  | ||||||
|                     thumbnail = protocol + thumbnail |  | ||||||
| 
 |  | ||||||
|                 url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys())) |  | ||||||
|                 formats = [] |  | ||||||
|                 for key in url_keys: |  | ||||||
|                     if '/get_file/' not in flashvars[key]: |  | ||||||
|                         continue |  | ||||||
|                     format_id = flashvars.get(f'{key}_text', key) |  | ||||||
|                     formats.append({ |  | ||||||
|                         'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']), |  | ||||||
|                         'format_id': format_id, |  | ||||||
|                         'ext': 'mp4', |  | ||||||
|                         **(parse_resolution(format_id) or parse_resolution(flashvars[key])) |  | ||||||
|                     }) |  | ||||||
|                     if not formats[-1].get('height'): |  | ||||||
|                         formats[-1]['quality'] = 1 |  | ||||||
| 
 |  | ||||||
|                 return [{ |  | ||||||
|                     'id': flashvars['video_id'], |  | ||||||
|                     'display_id': display_id, |  | ||||||
|                     'title': title, |  | ||||||
|                     'thumbnail': thumbnail, |  | ||||||
|                     'formats': formats, |  | ||||||
|                 }] |  | ||||||
|         if not found: |         if not found: | ||||||
|             # Broaden the search a little bit |             # Broaden the search a little bit | ||||||
|             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) |             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) | ||||||
|   | |||||||
| @@ -1,71 +1,128 @@ | |||||||
|  | import re | ||||||
|  | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     ExtractorError, | ||||||
|  |     get_element_by_class, | ||||||
|  |     int_or_none, | ||||||
|  |     merge_dicts, | ||||||
|  |     url_or_none, | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class PeekVidsIE(InfoExtractor): | class PeekVidsBaseIE(InfoExtractor): | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         domain, video_id = self._match_valid_url(url).group('domain', 'id') | ||||||
|  |         webpage = self._download_webpage(url, video_id, expected_status=429) | ||||||
|  |         if '>Rate Limit Exceeded' in webpage: | ||||||
|  |             raise ExtractorError( | ||||||
|  |                 f'You are suspected as a bot. Wait, or pass the captcha on the site and provide cookies. {self._login_hint()}', | ||||||
|  |                 video_id=video_id, expected=True) | ||||||
|  | 
 | ||||||
|  |         title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title') | ||||||
|  | 
 | ||||||
|  |         display_id = video_id | ||||||
|  |         video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID') | ||||||
|  |         srcs = self._download_json( | ||||||
|  |             f'https://www.{domain}/v-alt/{video_id}', video_id, | ||||||
|  |             note='Downloading list of source files') | ||||||
|  | 
 | ||||||
|  |         formats = [] | ||||||
|  |         for k, v in srcs.items(): | ||||||
|  |             f_url = url_or_none(v) | ||||||
|  |             if not f_url: | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             height = self._search_regex(r'^data-src(\d{3,})$', k, 'height', default=None) | ||||||
|  |             if not height: | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             formats.append({ | ||||||
|  |                 'url': f_url, | ||||||
|  |                 'format_id': height, | ||||||
|  |                 'height': int_or_none(height), | ||||||
|  |             }) | ||||||
|  | 
 | ||||||
|  |         if not formats: | ||||||
|  |             formats = [{'url': url} for url in srcs.values()] | ||||||
|  | 
 | ||||||
|  |         info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={}) | ||||||
|  |         info.pop('url', None) | ||||||
|  | 
 | ||||||
|  |         # may not have found the thumbnail if it was in a list in the ld+json | ||||||
|  |         info.setdefault('thumbnail', self._og_search_thumbnail(webpage)) | ||||||
|  |         detail = (get_element_by_class('detail-video-block', webpage) | ||||||
|  |                   or get_element_by_class('detail-block', webpage) or '') | ||||||
|  |         info['description'] = self._html_search_regex( | ||||||
|  |             rf'(?s)(.+?)(?:{re.escape(info.get("description", ""))}\s*<|<ul\b)', | ||||||
|  |             detail, 'description', default=None) or None | ||||||
|  |         info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url) | ||||||
|  | 
 | ||||||
|  |         def cat_tags(name, html): | ||||||
|  |             l = self._html_search_regex( | ||||||
|  |                 rf'(?s)<span\b[^>]*>\s*{re.escape(name)}\s*:\s*</span>(.+?)</li>', | ||||||
|  |                 html, name, default='') | ||||||
|  |             return list(filter(None, re.split(r'\s+', l))) | ||||||
|  | 
 | ||||||
|  |         return merge_dicts({ | ||||||
|  |             'id': video_id, | ||||||
|  |             'display_id': display_id, | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'formats': formats, | ||||||
|  |             'categories': cat_tags('Categories', detail), | ||||||
|  |             'tags': cat_tags('Tags', detail), | ||||||
|  |             'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None), | ||||||
|  |         }, info) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class PeekVidsIE(PeekVidsBaseIE): | ||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = r'''(?x) | ||||||
|         https?://(?:www\.)?peekvids\.com/ |         https?://(?:www\.)?(?P<domain>peekvids\.com)/ | ||||||
|         (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) |         (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) | ||||||
|         (?P<id>[^/?&#]*) |         (?P<id>[^/?&#]*) | ||||||
|     ''' |     ''' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', |         'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', | ||||||
|         'md5': 'a00940646c428e232407e3e62f0e8ef5', |         'md5': '2ff6a357a9717dc9dc9894b51307e9a2', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'BSyLMbN0YCd', |             'id': '1262717', | ||||||
|             'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', |             'display_id': 'BSyLMbN0YCd', | ||||||
|  |             'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|             'description': 'Watch  Dane Jones - Cute redhead with perfect tits with Mini Vamp (7 min), uploaded by SEXYhub.com', |             'description': 'md5:0a61df3620de26c0af8963b1a730cd69', | ||||||
|             'timestamp': 1642579329, |             'timestamp': 1642579329, | ||||||
|             'upload_date': '20220119', |             'upload_date': '20220119', | ||||||
|             'duration': 416, |             'duration': 416, | ||||||
|             'view_count': int, |             'view_count': int, | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|  |             'uploader': 'SEXYhub.com', | ||||||
|  |             'categories': list, | ||||||
|  |             'tags': list, | ||||||
|         }, |         }, | ||||||
|     }] |     }] | ||||||
|     _DOMAIN = 'www.peekvids.com' |  | ||||||
| 
 |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         video_id = self._match_id(url) |  | ||||||
|         webpage = self._download_webpage(url, video_id) |  | ||||||
| 
 |  | ||||||
|         short_video_id = self._html_search_regex(r'<video [^>]*data-id="(.+?)"', webpage, 'short video ID') |  | ||||||
|         srcs = self._download_json( |  | ||||||
|             f'https://{self._DOMAIN}/v-alt/{short_video_id}', video_id, |  | ||||||
|             note='Downloading list of source files') |  | ||||||
|         formats = [{ |  | ||||||
|             'url': url, |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'format_id': name[8:], |  | ||||||
|         } for name, url in srcs.items() if len(name) > 8 and name.startswith('data-src')] |  | ||||||
|         if not formats: |  | ||||||
|             formats = [{'url': url} for url in srcs.values()] |  | ||||||
| 
 |  | ||||||
|         info = self._search_json_ld(webpage, video_id, expected_type='VideoObject') |  | ||||||
|         info.update({ |  | ||||||
|             'id': video_id, |  | ||||||
|             'age_limit': 18, |  | ||||||
|             'formats': formats, |  | ||||||
|         }) |  | ||||||
|         return info |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class PlayVidsIE(PeekVidsIE):  # XXX: Do not subclass from concrete IE | class PlayVidsIE(PeekVidsBaseIE): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|[^/]{2}/)?(?P<id>[^/?#]*)' |     _VALID_URL = r'https?://(?:www\.)?(?P<domain>playvids\.com)/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', |         'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', | ||||||
|         'md5': 'cd7dfd8a2e815a45402369c76e3c1825', |         'md5': '2f12e50213dd65f142175da633c4564c', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'U3pBrYhsjXM', |             'id': '1978030', | ||||||
|             'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', |             'display_id': 'U3pBrYhsjXM', | ||||||
|  |             'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|             'description': 'Watch  Dane Jones - Cute redhead with perfect tits with Mini Vamp video in HD, uploaded by SEXYhub.com', |             'description': 'md5:0a61df3620de26c0af8963b1a730cd69', | ||||||
|             'timestamp': 1640435839, |             'timestamp': 1640435839, | ||||||
|             'upload_date': '20211225', |             'upload_date': '20211225', | ||||||
|             'duration': 416, |             'duration': 416, | ||||||
|             'view_count': int, |             'view_count': int, | ||||||
|             'age_limit': 18, |             'age_limit': 18, | ||||||
|  |             'uploader': 'SEXYhub.com', | ||||||
|  |             'categories': list, | ||||||
|  |             'tags': list, | ||||||
|         }, |         }, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', |         'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', | ||||||
| @@ -73,5 +130,62 @@ class PlayVidsIE(PeekVidsIE):  # XXX: Do not subclass from concrete IE | |||||||
|     }, { |     }, { | ||||||
|         'url': 'https://www.playvids.com/embed/U3pBrYhsjXM', |         'url': 'https://www.playvids.com/embed/U3pBrYhsjXM', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line', | ||||||
|  |         'md5': 'e783986e596cafbf46411a174ab42ba6', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '762385', | ||||||
|  |             'display_id': 'bKmGLe3IwjZ', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6', | ||||||
|  |             'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef', | ||||||
|  |             'timestamp': 1516958544, | ||||||
|  |             'upload_date': '20180126', | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|  |             'duration': 480, | ||||||
|  |             'uploader': 'Brazzers', | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'view_count': int, | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'categories': list, | ||||||
|  |             'tags': list, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.playvids.com/v/47iUho33toY', | ||||||
|  |         'md5': 'b056b5049d34b648c1e86497cf4febce', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '700621', | ||||||
|  |             'display_id': '47iUho33toY', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE', | ||||||
|  |             'description': None, | ||||||
|  |             'timestamp': 1507052209, | ||||||
|  |             'upload_date': '20171003', | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|  |             'duration': 332, | ||||||
|  |             'uploader': 'Cacerenele', | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'view_count': int, | ||||||
|  |             'categories': list, | ||||||
|  |             'tags': list, | ||||||
|  |         }, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances', | ||||||
|  |         'md5': 'efa09be9f031314b7b7e3bc6510cd0df', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1523518', | ||||||
|  |             'display_id': 'z3_7iwWCmqt', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances', | ||||||
|  |             'description': None, | ||||||
|  |             'timestamp': 1607470323, | ||||||
|  |             'upload_date': '20201208', | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|  |             'duration': 593, | ||||||
|  |             'uploader': 'yorours', | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'view_count': int, | ||||||
|  |             'categories': list, | ||||||
|  |             'tags': list, | ||||||
|  |         }, | ||||||
|     }] |     }] | ||||||
|     _DOMAIN = 'www.playvids.com' |  | ||||||
|   | |||||||
							
								
								
									
										226
									
								
								yt_dlp/extractor/thisvid.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										226
									
								
								yt_dlp/extractor/thisvid.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,226 @@ | |||||||
|  | import itertools | ||||||
|  | import re | ||||||
|  | import urllib.parse | ||||||
|  | 
 | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     clean_html, | ||||||
|  |     get_element_by_class, | ||||||
|  |     int_or_none, | ||||||
|  |     url_or_none, | ||||||
|  |     urljoin, | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ThisVidIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/', | ||||||
|  |         'md5': '839becb572995687e11a69dc4358a386', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '3533241', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Sitting on ball tight jeans', | ||||||
|  |             'description': 'md5:372353bb995883d1b65fddf507489acd', | ||||||
|  |             'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg', | ||||||
|  |             'uploader_id': '150629', | ||||||
|  |             'uploader': 'jeanslevisjeans', | ||||||
|  |             'display_id': 'sitting-on-ball-tight-jeans', | ||||||
|  |             'age_limit': 18, | ||||||
|  |         } | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://thisvid.com/embed/3533241/', | ||||||
|  |         'md5': '839becb572995687e11a69dc4358a386', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '3533241', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Sitting on ball tight jeans', | ||||||
|  |             'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg', | ||||||
|  |             'uploader_id': '150629', | ||||||
|  |             'uploader': 'jeanslevisjeans', | ||||||
|  |             'display_id': 'sitting-on-ball-tight-jeans', | ||||||
|  |             'age_limit': 18, | ||||||
|  |         } | ||||||
|  |     }] | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type') | ||||||
|  |         webpage = self._download_webpage(url, main_id) | ||||||
|  | 
 | ||||||
|  |         title = self._html_search_regex( | ||||||
|  |             r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>', | ||||||
|  |             webpage, 'title') | ||||||
|  | 
 | ||||||
|  |         if type_ == 'embed': | ||||||
|  |             # look for more metadata | ||||||
|  |             video_alt_url = url_or_none(self._search_regex( | ||||||
|  |                 rf'''video_alt_url\s*:\s+'({self._VALID_URL}/)',''', | ||||||
|  |                 webpage, 'video_alt_url', default=None)) | ||||||
|  |             if video_alt_url and video_alt_url != url: | ||||||
|  |                 webpage = self._download_webpage( | ||||||
|  |                     video_alt_url, main_id, | ||||||
|  |                     note='Redirecting embed to main page', fatal=False) or webpage | ||||||
|  | 
 | ||||||
|  |         video_holder = get_element_by_class('video-holder', webpage) or '' | ||||||
|  |         if '>This video is a private video' in video_holder: | ||||||
|  |             self.raise_login_required( | ||||||
|  |                 (clean_html(video_holder) or 'Private video').partition('\n')[0]) | ||||||
|  | 
 | ||||||
|  |         uploader = self._html_search_regex( | ||||||
|  |             r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''', | ||||||
|  |             webpage, 'uploader', default='') | ||||||
|  |         uploader = re.split(r'''/["'][^>]*>\s*''', uploader) | ||||||
|  |         if len(uploader) == 2: | ||||||
|  |             # id must be non-empty, uploader could be '' | ||||||
|  |             uploader_id, uploader = uploader | ||||||
|  |             uploader = uploader or None | ||||||
|  |         else: | ||||||
|  |             uploader_id = uploader = None | ||||||
|  | 
 | ||||||
|  |         return self.url_result( | ||||||
|  |             url, ie='Generic', url_transparent=True, | ||||||
|  |             title=title, | ||||||
|  |             age_limit=18, | ||||||
|  |             uploader=uploader, | ||||||
|  |             uploader_id=uploader_id) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ThisVidPlaylistBaseIE(InfoExtractor): | ||||||
|  |     _PLAYLIST_URL_RE = None | ||||||
|  | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def _find_urls(cls, html): | ||||||
|  |         for m in re.finditer(rf'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>{cls._PLAYLIST_URL_RE}\b)[^>]+>''', html): | ||||||
|  |             yield m.group('url') | ||||||
|  | 
 | ||||||
|  |     def _generate_playlist_entries(self, url, playlist_id, html=None): | ||||||
|  |         page_url = url | ||||||
|  |         for page in itertools.count(1): | ||||||
|  |             if not html: | ||||||
|  |                 html = self._download_webpage( | ||||||
|  |                     page_url, playlist_id, note=f'Downloading page {page}', | ||||||
|  |                     fatal=False) or '' | ||||||
|  | 
 | ||||||
|  |             yield from self._find_urls(html) | ||||||
|  | 
 | ||||||
|  |             next_page = get_element_by_class('pagination-next', html) or '' | ||||||
|  |             if next_page: | ||||||
|  |                 # member list page | ||||||
|  |                 next_page = urljoin(url, self._search_regex( | ||||||
|  |                     r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''', | ||||||
|  |                     next_page, 'next page link', group='url', default=None)) | ||||||
|  | 
 | ||||||
|  |             # in case a member page should have pagination-next with empty link, not just `else:` | ||||||
|  |             if next_page is None: | ||||||
|  |                 # playlist page | ||||||
|  |                 parsed_url = urllib.parse.urlparse(page_url) | ||||||
|  |                 base_path, _, num = parsed_url.path.rpartition('/') | ||||||
|  |                 num = int_or_none(num) | ||||||
|  |                 if num is None: | ||||||
|  |                     base_path, num = parsed_url.path.rstrip('/'), 1 | ||||||
|  |                 parsed_url = parsed_url._replace(path=f'{base_path}/{num + 1}') | ||||||
|  |                 next_page = urllib.parse.urlunparse(parsed_url) | ||||||
|  |                 if page_url == next_page: | ||||||
|  |                     next_page = None | ||||||
|  | 
 | ||||||
|  |             if not next_page: | ||||||
|  |                 return | ||||||
|  |             page_url, html = next_page, None | ||||||
|  | 
 | ||||||
|  |     def _make_playlist_result(self, url): | ||||||
|  |         playlist_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, playlist_id) | ||||||
|  | 
 | ||||||
|  |         title = re.split( | ||||||
|  |             r'(?i)\s*\|\s*ThisVid\.com\s*$', | ||||||
|  |             self._og_search_title(webpage, default=None) | ||||||
|  |             or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None | ||||||
|  | 
 | ||||||
|  |         return self.playlist_from_matches( | ||||||
|  |             self._generate_playlist_entries(url, playlist_id, webpage), | ||||||
|  |             playlist_id=playlist_id, playlist_title=title, ie=ThisVidIE) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ThisVidMemberIE(ThisVidPlaylistBaseIE): | ||||||
|  |     _VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://thisvid.com/members/2140501/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '2140501', | ||||||
|  |             'title': 'Rafflesia\'s Profile', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 16, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://thisvid.com/members/2140501/favourite_videos/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '2140501', | ||||||
|  |             'title': 'Rafflesia\'s Favourite Videos', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 15, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://thisvid.com/members/636468/public_videos/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '636468', | ||||||
|  |             'title': 'Happymouth\'s Public Videos', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 196, | ||||||
|  |     }] | ||||||
|  |     _PLAYLIST_URL_RE = ThisVidIE._VALID_URL | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         return self._make_playlist_result(url) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ThisVidPlaylistIE(ThisVidPlaylistBaseIE): | ||||||
|  |     _VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '6615', | ||||||
|  |             'title': 'Underwear Stuff', | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 200, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '1072387', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'Big Italian Booty 28', | ||||||
|  |             'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2', | ||||||
|  |             'uploader_id': '367912', | ||||||
|  |             'uploader': 'Jcmusclefun', | ||||||
|  |             'age_limit': 18, | ||||||
|  |             'display_id': 'big-italian-booty-28', | ||||||
|  |             'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+1072387/preview\.jpg', | ||||||
|  |         }, | ||||||
|  |         'params': { | ||||||
|  |             'noplaylist': True, | ||||||
|  |         }, | ||||||
|  |     }] | ||||||
|  |     _PLAYLIST_URL_RE = _VALID_URL | ||||||
|  | 
 | ||||||
|  |     def _generate_playlist_entries(self, url, playlist_id, html=None): | ||||||
|  |         for wrapped_url in super()._generate_playlist_entries(url, playlist_id, html): | ||||||
|  |             video_id = re.match(self._VALID_URL, wrapped_url).group('video_id') | ||||||
|  |             yield urljoin(url, f'/videos/{video_id}/') | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         playlist_id, video_id = self._match_valid_url(url).group('id', 'video_id') | ||||||
|  | 
 | ||||||
|  |         if not self._yes_playlist(playlist_id, video_id): | ||||||
|  |             redirect_url = urljoin(url, f'/videos/{video_id}/') | ||||||
|  |             return self.url_result(redirect_url, ThisVidIE) | ||||||
|  | 
 | ||||||
|  |         result = self._make_playlist_result(url) | ||||||
|  | 
 | ||||||
|  |         # Fix duplicated title (`the title - the title` => `the title`) | ||||||
|  |         title = result['title'] | ||||||
|  |         t_len = len(title) | ||||||
|  |         if t_len > 5 and t_len % 2 != 0: | ||||||
|  |             t_len = t_len // 2 | ||||||
|  |             if title[t_len] == '-': | ||||||
|  |                 first, second = map(str.strip, (title[:t_len], title[t_len + 1:])) | ||||||
|  |                 if first and first == second: | ||||||
|  |                     result['title'] = first | ||||||
|  | 
 | ||||||
|  |         return result | ||||||
		Reference in New Issue
	
	Block a user
	 Simon Sawicki
					Simon Sawicki