mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[wsj:article] Add extractor
This commit is contained in:
		 John Hawkinson
					John Hawkinson
				
			
				
					committed by
					
						 Sergey M․
						Sergey M․
					
				
			
			
				
	
			
			
			 Sergey M․
						Sergey M․
					
				
			
						parent
						
							0254f93b08
						
					
				
				
					commit
					3266d08af2
				
			| @@ -10,10 +10,11 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class WSJIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?: | ||||
|             video-api\.wsj\.com/api-video/player/iframe\.html\?guid=| | ||||
|             (?:www\.)?wsj\.com/video/[^/]+/ | ||||
|             https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=| | ||||
|             https?://(?:www\.)?wsj\.com/video/[^/]+/| | ||||
|             wsj: | ||||
|         ) | ||||
|         (?P<id>[a-zA-Z0-9-]+)''' | ||||
|     IE_DESC = 'Wall Street Journal' | ||||
| @@ -87,3 +88,24 @@ class WSJIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'categories': info.get('keywords'), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class WSJArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>\w[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?', | ||||
|         'info_dict': { | ||||
|             'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20170221', | ||||
|             'uploader_id': 'ralcaraz', | ||||
|             'title': 'Bao Bao the Panda Leaves for China', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         article_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, article_id) | ||||
|         video_id = self._search_regex(r'data-src=["\']([A-Z0-9\-]+)', | ||||
|                                       webpage, 'video id') | ||||
|         return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user