mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[weibo] Misc improvements
This commit is contained in:
		| @@ -8,7 +8,8 @@ import random | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|     compat_urlparse, |     compat_parse_qs, | ||||||
|  |     compat_str, | ||||||
| ) | ) | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     js_to_json, |     js_to_json, | ||||||
| @@ -31,36 +32,34 @@ class WeiboIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         # to get Referer url for genvisitor |         # to get Referer url for genvisitor | ||||||
|         webpage, urlh = self._download_webpage_handle(url, video_id, note="first visit the page") |         webpage, urlh = self._download_webpage_handle(url, video_id) | ||||||
|  |  | ||||||
|         visitor_url = urlh.geturl() |         visitor_url = urlh.geturl() | ||||||
|         headers = { |  | ||||||
|             'Referer': visitor_url |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         fp = { |         if 'passport.weibo.com' in visitor_url: | ||||||
|             "os": "2", |             # first visit | ||||||
|             "browser": "Gecko57,0,0,0", |             visitor_data = self._download_json( | ||||||
|             "fonts": "undefined", |                 'https://passport.weibo.com/visitor/genvisitor', video_id, | ||||||
|             "screenInfo": "1440*900*24", |                 note='Generating first-visit data', | ||||||
|             "plugins": "" |                 transform_source=strip_jsonp, | ||||||
|         } |                 headers={'Referer': visitor_url}, | ||||||
|                 data=urlencode_postdata({ |                 data=urlencode_postdata({ | ||||||
|             "cb": "gen_callback", |                     'cb': 'gen_callback', | ||||||
|             "fp": json.dumps(fp), |                     'fp': json.dumps({ | ||||||
|         }) |                         'os': '2', | ||||||
|  |                         'browser': 'Gecko57,0,0,0', | ||||||
|  |                         'fonts': 'undefined', | ||||||
|  |                         'screenInfo': '1440*900*24', | ||||||
|  |                         'plugins': '', | ||||||
|  |                     }), | ||||||
|  |                 })) | ||||||
|  |  | ||||||
|         genvisitor_url = 'https://passport.weibo.com/visitor/genvisitor' |             tid = visitor_data['data']['tid'] | ||||||
|         webpage = self._download_webpage(genvisitor_url, video_id, data=data, headers=headers, note="gen visitor") |             cnfd = '%03d' % visitor_data['data']['confidence'] | ||||||
|  |  | ||||||
|         p = strip_jsonp(webpage) |  | ||||||
|         i1 = p.find('{') |  | ||||||
|         i2 = p.rfind('}') |  | ||||||
|         j = p[i1:i2 + 1]  # get JSON object |  | ||||||
|         d = json.loads(j) |  | ||||||
|         tid = d["data"]["tid"] |  | ||||||
|         cnfd = "%03d" % d["data"]["confidence"] |  | ||||||
|  |  | ||||||
|  |             self._download_webpage( | ||||||
|  |                 'https://passport.weibo.com/visitor/visitor', video_id, | ||||||
|  |                 note='Running first-visit callback', | ||||||
|                 query={ |                 query={ | ||||||
|                     'a': 'incarnate', |                     'a': 'incarnate', | ||||||
|                     't': tid, |                     't': tid, | ||||||
| @@ -68,33 +67,36 @@ class WeiboIE(InfoExtractor): | |||||||
|                     'c': cnfd, |                     'c': cnfd, | ||||||
|                     'cb': 'cross_domain', |                     'cb': 'cross_domain', | ||||||
|                     'from': 'weibo', |                     'from': 'weibo', | ||||||
|             '_rand': random.random() |                     '_rand': random.random(), | ||||||
|         } |                 }) | ||||||
|         gencallback_url = "https://passport.weibo.com/visitor/visitor" |  | ||||||
|         self._download_webpage(gencallback_url, video_id, note="gen callback", query=query) |  | ||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, video_id, note="retry to visit the page") |             webpage = self._download_webpage( | ||||||
|  |                 url, video_id, note='Revisiting webpage') | ||||||
|  |  | ||||||
|         title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title') |         title = self._html_search_regex( | ||||||
|  |             r'<title>(.+?)</title>', webpage, 'title') | ||||||
|  |  | ||||||
|         video_sources_text = self._search_regex(r'video-sources=\\\"(.+?)\"', webpage, 'video_sources') |         video_formats = compat_parse_qs(self._search_regex( | ||||||
|  |             r'video-sources=\\\"(.+?)\"', webpage, 'video_sources')) | ||||||
|         video_formats = compat_urlparse.parse_qs(video_sources_text) |  | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         supported_resolutions = ('720', '480') |         supported_resolutions = (480, 720) | ||||||
|         for res in supported_resolutions: |         for res in supported_resolutions: | ||||||
|             f = video_formats.get(res) |             vid_urls = video_formats.get(compat_str(res)) | ||||||
|             if isinstance(f, list): |             if not vid_urls or not isinstance(vid_urls, list): | ||||||
|                 if len(f) > 0: |                 continue | ||||||
|                     vid_url = f[0] |  | ||||||
|  |             vid_url = vid_urls[0] | ||||||
|             formats.append({ |             formats.append({ | ||||||
|                 'url': vid_url, |                 'url': vid_url, | ||||||
|                         'format': 'mp4', |                 'height': res, | ||||||
|                         'height': int(res), |  | ||||||
|             }) |             }) | ||||||
|  |  | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|         uploader = self._og_search_property('nick-name', webpage, 'uploader', default=None) |  | ||||||
|  |         uploader = self._og_search_property( | ||||||
|  |             'nick-name', webpage, 'uploader', default=None) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': title, |             'title': title, | ||||||
| @@ -118,12 +120,17 @@ class WeiboMobileIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         # to get Referer url for genvisitor |         # to get Referer url for genvisitor | ||||||
|         webpage = self._download_webpage(url, video_id, note="visit the page") |         webpage = self._download_webpage(url, video_id, note='visit the page') | ||||||
|         js_code = self._search_regex(r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\] \|\| {};', webpage, 'js_code', flags=re.DOTALL) |  | ||||||
|         weibo_info = self._parse_json(js_code, video_id, transform_source=js_to_json) |         weibo_info = self._parse_json(self._search_regex( | ||||||
|         page_info = weibo_info.get('status').get('page_info') |             r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};', | ||||||
|         title = weibo_info.get('status').get('status_title') |             webpage, 'js_code', flags=re.DOTALL), | ||||||
|         uploader = weibo_info.get('status').get('user').get('screen_name') |             video_id, transform_source=js_to_json) | ||||||
|  |  | ||||||
|  |         status_data = weibo_info.get('status', {}) | ||||||
|  |         page_info = status_data.get('page_info') | ||||||
|  |         title = status_data['status_title'] | ||||||
|  |         uploader = status_data.get('user', {}).get('screen_name') | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan