mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[ie/bitchute] Fix extractor (#13081)
Closes #13080 Authored by: bashonly
This commit is contained in:
		| @@ -1,30 +1,32 @@ | ||||
| import functools | ||||
| import json | ||||
| import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..networking import HEADRequest | ||||
| from ..networking.exceptions import HTTPError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     OnDemandPagedList, | ||||
|     clean_html, | ||||
|     extract_attributes, | ||||
|     determine_ext, | ||||
|     format_field, | ||||
|     get_element_by_class, | ||||
|     get_element_by_id, | ||||
|     get_element_html_by_class, | ||||
|     get_elements_html_by_class, | ||||
|     int_or_none, | ||||
|     orderedSet, | ||||
|     parse_count, | ||||
|     parse_duration, | ||||
|     traverse_obj, | ||||
|     unified_strdate, | ||||
|     parse_iso8601, | ||||
|     url_or_none, | ||||
|     urlencode_postdata, | ||||
|     urljoin, | ||||
| ) | ||||
| from ..utils.traversal import traverse_obj | ||||
| 
 | ||||
| 
 | ||||
| class BitChuteIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | ||||
|     _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/?#]+)/(?P<id>[^/?#&]+)' | ||||
|     _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', | ||||
| @@ -34,12 +36,17 @@ class BitChuteIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'This is the first video on #BitChute !', | ||||
|             'description': 'md5:a0337e7b1fe39e32336974af8173a034', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'thumbnail': r're:https?://.+/.+\.jpg$', | ||||
|             'uploader': 'BitChute', | ||||
|             'upload_date': '20170103', | ||||
|             'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', | ||||
|             'channel': 'BitChute', | ||||
|             'channel_url': 'https://www.bitchute.com/channel/bitchute/', | ||||
|             'uploader_id': 'I5NgtHZn9vPj', | ||||
|             'channel_id': '1VBwRfyNcKdX', | ||||
|             'view_count': int, | ||||
|             'duration': 16.0, | ||||
|             'timestamp': 1483425443, | ||||
|         }, | ||||
|     }, { | ||||
|         # test case: video with different channel and uploader | ||||
| @@ -49,13 +56,18 @@ class BitChuteIE(InfoExtractor): | ||||
|             'id': 'Yti_j9A-UZ4', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Israel at War | Full Measure', | ||||
|             'description': 'md5:38cf7bc6f42da1a877835539111c69ef', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:e60198b89971966d6030d22b3268f08f', | ||||
|             'thumbnail': r're:https?://.+/.+\.jpg$', | ||||
|             'uploader': 'sharylattkisson', | ||||
|             'upload_date': '20231106', | ||||
|             'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', | ||||
|             'channel': 'Full Measure with Sharyl Attkisson', | ||||
|             'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/', | ||||
|             'uploader_id': '9K0kUWA9zmd9', | ||||
|             'channel_id': 'NpdxoCRv3ZLb', | ||||
|             'view_count': int, | ||||
|             'duration': 554.0, | ||||
|             'timestamp': 1699296106, | ||||
|         }, | ||||
|     }, { | ||||
|         # video not downloadable in browser, but we can recover it | ||||
| @@ -66,25 +78,21 @@ class BitChuteIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'filesize': 71537926, | ||||
|             'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control', | ||||
|             'description': 'md5:228ee93bd840a24938f536aeac9cf749', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:2029c7c212ccd4b040f52bb2d036ef4e', | ||||
|             'thumbnail': r're:https?://.+/.+\.jpg$', | ||||
|             'uploader': 'BitChute', | ||||
|             'upload_date': '20181113', | ||||
|             'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', | ||||
|             'channel': 'BitChute', | ||||
|             'channel_url': 'https://www.bitchute.com/channel/bitchute/', | ||||
|             'uploader_id': 'I5NgtHZn9vPj', | ||||
|             'channel_id': '1VBwRfyNcKdX', | ||||
|             'view_count': int, | ||||
|             'duration': 1701.0, | ||||
|             'tags': ['bitchute'], | ||||
|             'timestamp': 1542130287, | ||||
|         }, | ||||
|         'params': {'check_formats': None}, | ||||
|     }, { | ||||
|         # restricted video | ||||
|         'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/', | ||||
|         'info_dict': { | ||||
|             'id': 'WEnQU7XGcTdl', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft', | ||||
|         }, | ||||
|         'params': {'skip_download': True}, | ||||
|         'skip': 'Georestricted in DE', | ||||
|     }, { | ||||
|         'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | ||||
|         'only_matching': True, | ||||
| @@ -96,11 +104,8 @@ class BitChuteIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _GEO_BYPASS = False | ||||
| 
 | ||||
|     _HEADERS = { | ||||
|         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | ||||
|         'Referer': 'https://www.bitchute.com/', | ||||
|     } | ||||
|     _UPLOADER_URL_TMPL = 'https://www.bitchute.com/profile/%s/' | ||||
|     _CHANNEL_URL_TMPL = 'https://www.bitchute.com/channel/%s/' | ||||
| 
 | ||||
|     def _check_format(self, video_url, video_id): | ||||
|         urls = orderedSet( | ||||
| @@ -112,7 +117,7 @@ class BitChuteIE(InfoExtractor): | ||||
|         for url in urls: | ||||
|             try: | ||||
|                 response = self._request_webpage( | ||||
|                     HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS) | ||||
|                     HEADRequest(url), video_id=video_id, note=f'Checking {url}') | ||||
|             except ExtractorError as e: | ||||
|                 self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}') | ||||
|                 continue | ||||
| @@ -121,54 +126,79 @@ class BitChuteIE(InfoExtractor): | ||||
|                 'filesize': int_or_none(response.headers.get('Content-Length')), | ||||
|             } | ||||
| 
 | ||||
|     def _raise_if_restricted(self, webpage): | ||||
|         page_title = clean_html(get_element_by_class('page-title', webpage)) or '' | ||||
|         if re.fullmatch(r'(?:Channel|Video) Restricted', page_title): | ||||
|             reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title | ||||
|             self.raise_geo_restricted(reason) | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _make_url(html): | ||||
|         path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href') | ||||
|         return urljoin('https://www.bitchute.com', path) | ||||
|     def _call_api(self, endpoint, data, display_id, fatal=True): | ||||
|         note = endpoint.rpartition('/')[2] | ||||
|         try: | ||||
|             return self._download_json( | ||||
|                 f'https://api.bitchute.com/api/beta/{endpoint}', display_id, | ||||
|                 f'Downloading {note} API JSON', f'Unable to download {note} API JSON', | ||||
|                 data=json.dumps(data).encode(), | ||||
|                 headers={ | ||||
|                     'Accept': 'application/json', | ||||
|                     'Content-Type': 'application/json', | ||||
|                 }) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, HTTPError) and e.cause.status == 403: | ||||
|                 errors = '. '.join(traverse_obj(e.cause.response.read().decode(), ( | ||||
|                     {json.loads}, 'errors', lambda _, v: v['context'] == 'reason', 'message', {str}))) | ||||
|                 if errors and 'location' in errors: | ||||
|                     # Can always be fatal since the video/media call will reach this code first | ||||
|                     self.raise_geo_restricted(errors) | ||||
|             if fatal: | ||||
|                 raise | ||||
|             self.report_warning(e.msg) | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage( | ||||
|             f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) | ||||
| 
 | ||||
|         self._raise_if_restricted(webpage) | ||||
|         publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) | ||||
|         entries = self._parse_html5_media_entries(url, webpage, video_id) | ||||
|         data = {'video_id': video_id} | ||||
|         media_url = self._call_api('video/media', data, video_id)['media_url'] | ||||
| 
 | ||||
|         formats = [] | ||||
|         for format_ in traverse_obj(entries, (0, 'formats', ...)): | ||||
|         if determine_ext(media_url) == 'm3u8': | ||||
|             formats.extend( | ||||
|                 self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls', live=True)) | ||||
|         else: | ||||
|             if self.get_param('check_formats') is not False: | ||||
|                 format_.update(self._check_format(format_.pop('url'), video_id) or {}) | ||||
|                 if 'url' not in format_: | ||||
|                     continue | ||||
|             formats.append(format_) | ||||
|                 if fmt := self._check_format(media_url, video_id): | ||||
|                     formats.append(fmt) | ||||
|             else: | ||||
|                 formats.append({'url': media_url}) | ||||
| 
 | ||||
|         if not formats: | ||||
|             self.raise_no_formats( | ||||
|                 'Video is unavailable. Please make sure this video is playable in the browser ' | ||||
|                 'before reporting this issue.', expected=True, video_id=video_id) | ||||
| 
 | ||||
|         details = get_element_by_class('details', webpage) or '' | ||||
|         uploader_html = get_element_html_by_class('creator', details) or '' | ||||
|         channel_html = get_element_html_by_class('name', details) or '' | ||||
|         video = self._call_api('video', data, video_id, fatal=False) | ||||
|         channel = None | ||||
|         if channel_id := traverse_obj(video, ('channel', 'channel_id', {str})): | ||||
|             channel = self._call_api('channel', {'channel_id': channel_id}, video_id, fatal=False) | ||||
| 
 | ||||
|         return { | ||||
|             **traverse_obj(video, { | ||||
|                 'title': ('video_name', {str}), | ||||
|                 'description': ('description', {str}), | ||||
|                 'thumbnail': ('thumbnail_url', {url_or_none}), | ||||
|                 'channel': ('channel', 'channel_name', {str}), | ||||
|                 'channel_id': ('channel', 'channel_id', {str}), | ||||
|                 'channel_url': ('channel', 'channel_url', {urljoin('https://www.bitchute.com/')}), | ||||
|                 'uploader_id': ('profile_id', {str}), | ||||
|                 'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter), | ||||
|                 'timestamp': ('date_published', {parse_iso8601}), | ||||
|                 'duration': ('duration', {parse_duration}), | ||||
|                 'tags': ('hashtags', ..., {str}, filter, all, filter), | ||||
|                 'view_count': ('view_count', {int_or_none}), | ||||
|                 'is_live': ('state_id', {lambda x: x == 'live'}), | ||||
|             }), | ||||
|             **traverse_obj(channel, { | ||||
|                 'channel': ('channel_name', {str}), | ||||
|                 'channel_id': ('channel_id', {str}), | ||||
|                 'channel_url': ('url_slug', {format_field(template=self._CHANNEL_URL_TMPL)}, filter), | ||||
|                 'uploader': ('profile_name', {str}), | ||||
|                 'uploader_id': ('profile_id', {str}), | ||||
|                 'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter), | ||||
|             }), | ||||
|             'id': video_id, | ||||
|             'title': self._html_extract_title(webpage) or self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage, default=None), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader': clean_html(uploader_html), | ||||
|             'uploader_url': self._make_url(uploader_html), | ||||
|             'channel': clean_html(channel_html), | ||||
|             'channel_url': self._make_url(channel_html), | ||||
|             'upload_date': unified_strdate(self._search_regex( | ||||
|                 r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)), | ||||
|             'formats': formats, | ||||
|         } | ||||
| 
 | ||||
| @@ -190,7 +220,7 @@ class BitChuteChannelIE(InfoExtractor): | ||||
|                     'ext': 'mp4', | ||||
|                     'title': 'This is the first video on #BitChute !', | ||||
|                     'description': 'md5:a0337e7b1fe39e32336974af8173a034', | ||||
|                     'thumbnail': r're:^https?://.*\.jpg$', | ||||
|                     'thumbnail': r're:https?://.+/.+\.jpg$', | ||||
|                     'uploader': 'BitChute', | ||||
|                     'upload_date': '20170103', | ||||
|                     'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', | ||||
| @@ -198,6 +228,9 @@ class BitChuteChannelIE(InfoExtractor): | ||||
|                     'channel_url': 'https://www.bitchute.com/channel/bitchute/', | ||||
|                     'duration': 16, | ||||
|                     'view_count': int, | ||||
|                     'uploader_id': 'I5NgtHZn9vPj', | ||||
|                     'channel_id': '1VBwRfyNcKdX', | ||||
|                     'timestamp': 1483425443, | ||||
|                 }, | ||||
|             }, | ||||
|         ], | ||||
| @@ -213,6 +246,7 @@ class BitChuteChannelIE(InfoExtractor): | ||||
|             'title': 'Bruce MacDonald and "The Light of Darkness"', | ||||
|             'description': 'md5:747724ef404eebdfc04277714f81863e', | ||||
|         }, | ||||
|         'skip': '404 Not Found', | ||||
|     }, { | ||||
|         'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/', | ||||
|         'only_matching': True, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bashonly
					bashonly