mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Document and test categories (#2923)
This commit is contained in:
		| @@ -113,6 +113,8 @@ class InfoExtractor(object): | |||||||
|     webpage_url:    The url to the video webpage, if given to youtube-dl it |     webpage_url:    The url to the video webpage, if given to youtube-dl it | ||||||
|                     should allow to get the same result again. (It will be set |                     should allow to get the same result again. (It will be set | ||||||
|                     by YoutubeDL if it's missing) |                     by YoutubeDL if it's missing) | ||||||
|  |     categories:     A list of categories that the video falls in, for example | ||||||
|  |                     ["Sports", "Berlin"] | ||||||
|  |  | ||||||
|     Unless mentioned otherwise, the fields should be Unicode strings. |     Unless mentioned otherwise, the fields should be Unicode strings. | ||||||
|  |  | ||||||
|   | |||||||
| @@ -242,7 +242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|                 u"uploader": u"Philipp Hagemeister", |                 u"uploader": u"Philipp Hagemeister", | ||||||
|                 u"uploader_id": u"phihag", |                 u"uploader_id": u"phihag", | ||||||
|                 u"upload_date": u"20121002", |                 u"upload_date": u"20121002", | ||||||
|                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." |                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", | ||||||
|  |                 u"categories": [u'Science & Technology'], | ||||||
|             } |             } | ||||||
|         }, |         }, | ||||||
|         { |         { | ||||||
| @@ -1136,18 +1137,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|  |  | ||||||
|         # upload date |         # upload date | ||||||
|         upload_date = None |         upload_date = None | ||||||
|         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL) |         mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage) | ||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) |             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) | ||||||
|             upload_date = unified_strdate(upload_date) |             upload_date = unified_strdate(upload_date) | ||||||
|  |  | ||||||
|  |  | ||||||
|         video_categories = [] |  | ||||||
|         # categories |  | ||||||
|         m_cat_container = get_element_by_id("eow-category", video_webpage) |         m_cat_container = get_element_by_id("eow-category", video_webpage) | ||||||
|         if m_cat_container: |         if m_cat_container: | ||||||
|             video_categories = re.findall(r'<a[^<]+>(.*?)</a>', |             category = self._html_search_regex( | ||||||
|                                 m_cat_container, re.DOTALL) |                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'cateory', | ||||||
|  |                 default=None) | ||||||
|  |             video_categories = None if category is None else [category] | ||||||
|  |         else: | ||||||
|  |             video_categories = None | ||||||
|  |  | ||||||
|         # description |         # description | ||||||
|         video_description = get_element_by_id("eow-description", video_webpage) |         video_description = get_element_by_id("eow-description", video_webpage) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister