mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[wsj:article] Add extractor
This commit is contained in:
		 John Hawkinson
					John Hawkinson
				
			
				
					committed by
					
						 Sergey M․
						Sergey M․
					
				
			
			
				
	
			
			
			 Sergey M․
						Sergey M․
					
				
			
						parent
						
							0254f93b08
						
					
				
				
					commit
					3266d08af2
				
			| @@ -1233,7 +1233,10 @@ from .wrzuta import ( | |||||||
|     WrzutaIE, |     WrzutaIE, | ||||||
|     WrzutaPlaylistIE, |     WrzutaPlaylistIE, | ||||||
| ) | ) | ||||||
| from .wsj import WSJIE | from .wsj import ( | ||||||
|  |     WSJIE, | ||||||
|  |     WSJArticleIE, | ||||||
|  | ) | ||||||
| from .xbef import XBefIE | from .xbef import XBefIE | ||||||
| from .xboxclips import XboxClipsIE | from .xboxclips import XboxClipsIE | ||||||
| from .xfileshare import XFileShareIE | from .xfileshare import XFileShareIE | ||||||
|   | |||||||
| @@ -10,10 +10,11 @@ from ..utils import ( | |||||||
|  |  | ||||||
|  |  | ||||||
| class WSJIE(InfoExtractor): | class WSJIE(InfoExtractor): | ||||||
|     _VALID_URL = r'''(?x)https?:// |     _VALID_URL = r'''(?x) | ||||||
|         (?: |         (?: | ||||||
|             video-api\.wsj\.com/api-video/player/iframe\.html\?guid=| |             https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=| | ||||||
|             (?:www\.)?wsj\.com/video/[^/]+/ |             https?://(?:www\.)?wsj\.com/video/[^/]+/| | ||||||
|  |             wsj: | ||||||
|         ) |         ) | ||||||
|         (?P<id>[a-zA-Z0-9-]+)''' |         (?P<id>[a-zA-Z0-9-]+)''' | ||||||
|     IE_DESC = 'Wall Street Journal' |     IE_DESC = 'Wall Street Journal' | ||||||
| @@ -87,3 +88,24 @@ class WSJIE(InfoExtractor): | |||||||
|             'title': title, |             'title': title, | ||||||
|             'categories': info.get('keywords'), |             'categories': info.get('keywords'), | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class WSJArticleIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>\w[^/]+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'upload_date': '20170221', | ||||||
|  |             'uploader_id': 'ralcaraz', | ||||||
|  |             'title': 'Bao Bao the Panda Leaves for China', | ||||||
|  |         } | ||||||
|  |     }] | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         article_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, article_id) | ||||||
|  |         video_id = self._search_regex(r'data-src=["\']([A-Z0-9\-]+)', | ||||||
|  |                                       webpage, 'video id') | ||||||
|  |         return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user