mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[youtube] Correct invalid JSON (Fixes #2353)
This commit is contained in:
		| @@ -271,8 +271,11 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|     def _download_json(self, url_or_request, video_id, |     def _download_json(self, url_or_request, video_id, | ||||||
|                        note=u'Downloading JSON metadata', |                        note=u'Downloading JSON metadata', | ||||||
|                        errnote=u'Unable to download JSON metadata'): |                        errnote=u'Unable to download JSON metadata', | ||||||
|  |                        transform_source=None): | ||||||
|         json_string = self._download_webpage(url_or_request, video_id, note, errnote) |         json_string = self._download_webpage(url_or_request, video_id, note, errnote) | ||||||
|  |         if transform_source: | ||||||
|  |             json_string = transform_source(json_string) | ||||||
|         try: |         try: | ||||||
|             return json.loads(json_string) |             return json.loads(json_string) | ||||||
|         except ValueError as ve: |         except ValueError as ve: | ||||||
|   | |||||||
| @@ -34,6 +34,7 @@ from ..utils import ( | |||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     orderedSet, |     orderedSet, | ||||||
|     write_json_file, |     write_json_file, | ||||||
|  |     uppercase_escape, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| class YoutubeBaseInfoExtractor(InfoExtractor): | class YoutubeBaseInfoExtractor(InfoExtractor): | ||||||
| @@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor): | |||||||
|             # Download all channel pages using the json-based channel_ajax query |             # Download all channel pages using the json-based channel_ajax query | ||||||
|             for pagenum in itertools.count(1): |             for pagenum in itertools.count(1): | ||||||
|                 url = self._MORE_PAGES_URL % (pagenum, channel_id) |                 url = self._MORE_PAGES_URL % (pagenum, channel_id) | ||||||
|                 page = self._download_webpage(url, channel_id, |                 page = self._download_json( | ||||||
|                                               u'Downloading page #%s' % pagenum) |                     url, channel_id, note=u'Downloading page #%s' % pagenum, | ||||||
|      |                     transform_source=uppercase_escape) | ||||||
|                 page = json.loads(page) |  | ||||||
|      |  | ||||||
|                 ids_in_page = self.extract_videos_from_page(page['content_html']) |                 ids_in_page = self.extract_videos_from_page(page['content_html']) | ||||||
|                 video_ids.extend(ids_in_page) |                 video_ids.extend(ids_in_page) | ||||||
|      |      | ||||||
|   | |||||||
| @@ -1214,3 +1214,9 @@ class PagedList(object): | |||||||
|             if end == nextfirstid: |             if end == nextfirstid: | ||||||
|                 break |                 break | ||||||
|         return res |         return res | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def uppercase_escape(s): | ||||||
|  |     return re.sub( | ||||||
|  |         r'\\U([0-9a-fA-F]{8})', | ||||||
|  |         lambda m: compat_chr(int(m.group(1), base=16)), s) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister