mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[generic] Add support for RSS feeds (Fixes #667)
This commit is contained in:
		| @@ -250,5 +250,14 @@ class TestPlaylists(unittest.TestCase): | |||||||
|         self.assertEqual(result['title'], 'python language') |         self.assertEqual(result['title'], 'python language') | ||||||
|         self.assertTrue(len(result['entries']) == 15) |         self.assertTrue(len(result['entries']) == 15) | ||||||
|  |  | ||||||
|  |     def test_generic_rss_feed(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = GenericIE(dl) | ||||||
|  |         result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml') | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml') | ||||||
|  |         self.assertEqual(result['title'], 'Zero Punctuation') | ||||||
|  |         self.assertTrue(len(result['entries']) > 10) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
|  | import xml.etree.ElementTree | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from .youtube import YoutubeIE | from .youtube import YoutubeIE | ||||||
| @@ -159,6 +160,25 @@ class GenericIE(InfoExtractor): | |||||||
|             raise ExtractorError('Invalid URL protocol') |             raise ExtractorError('Invalid URL protocol') | ||||||
|         return response |         return response | ||||||
|  |  | ||||||
|  |     def _extract_rss(self, url, video_id, doc): | ||||||
|  |         playlist_title = doc.find('./channel/title').text | ||||||
|  |         playlist_desc_el = doc.find('./channel/description') | ||||||
|  |         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text | ||||||
|  |  | ||||||
|  |         entries = [{ | ||||||
|  |             '_type': 'url', | ||||||
|  |             'url': e.find('link').text, | ||||||
|  |             'title': e.find('title').text, | ||||||
|  |         } for e in doc.findall('./channel/item')] | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': url, | ||||||
|  |             'title': playlist_title, | ||||||
|  |             'description': playlist_desc, | ||||||
|  |             'entries': entries, | ||||||
|  |         } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         parsed_url = compat_urlparse.urlparse(url) |         parsed_url = compat_urlparse.urlparse(url) | ||||||
|         if not parsed_url.scheme: |         if not parsed_url.scheme: | ||||||
| @@ -219,6 +239,14 @@ class GenericIE(InfoExtractor): | |||||||
|  |  | ||||||
|         self.report_extraction(video_id) |         self.report_extraction(video_id) | ||||||
|  |  | ||||||
|  |         # Is it an RSS feed? | ||||||
|  |         try: | ||||||
|  |             doc = xml.etree.ElementTree.fromstring(webpage) | ||||||
|  |             if doc.tag == 'rss': | ||||||
|  |                 return self._extract_rss(url, video_id, doc) | ||||||
|  |         except xml.etree.ElementTree.ParseError: | ||||||
|  |             pass | ||||||
|  |  | ||||||
|         # it's tempting to parse this further, but you would |         # it's tempting to parse this further, but you would | ||||||
|         # have to take into account all the variations like |         # have to take into account all the variations like | ||||||
|         #   Video Title - Site Name |         #   Video Title - Site Name | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister