mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[generic] Add support for RSS feeds (Fixes #667)
This commit is contained in:
		| @@ -250,5 +250,14 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['title'], 'python language') | ||||
|         self.assertTrue(len(result['entries']) == 15) | ||||
|  | ||||
|     def test_generic_rss_feed(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = GenericIE(dl) | ||||
|         result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml') | ||||
|         self.assertEqual(result['title'], 'Zero Punctuation') | ||||
|         self.assertTrue(len(result['entries']) > 10) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| @@ -159,6 +160,25 @@ class GenericIE(InfoExtractor): | ||||
|             raise ExtractorError('Invalid URL protocol') | ||||
|         return response | ||||
|  | ||||
|     def _extract_rss(self, url, video_id, doc): | ||||
|         playlist_title = doc.find('./channel/title').text | ||||
|         playlist_desc_el = doc.find('./channel/description') | ||||
|         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text | ||||
|  | ||||
|         entries = [{ | ||||
|             '_type': 'url', | ||||
|             'url': e.find('link').text, | ||||
|             'title': e.find('title').text, | ||||
|         } for e in doc.findall('./channel/item')] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': url, | ||||
|             'title': playlist_title, | ||||
|             'description': playlist_desc, | ||||
|             'entries': entries, | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         parsed_url = compat_urlparse.urlparse(url) | ||||
|         if not parsed_url.scheme: | ||||
| @@ -219,6 +239,14 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Is it an RSS feed? | ||||
|         try: | ||||
|             doc = xml.etree.ElementTree.fromstring(webpage) | ||||
|             if doc.tag == 'rss': | ||||
|                 return self._extract_rss(url, video_id, doc) | ||||
|         except xml.etree.ElementTree.ParseError: | ||||
|             pass | ||||
|  | ||||
|         # it's tempting to parse this further, but you would | ||||
|         # have to take into account all the variations like | ||||
|         #   Video Title - Site Name | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister