mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[livestream:original] Add support for folder urls (closes #2631)
The webpage only contains shortened links for the videos, since the server doesn't support HEAD requests, we use an specific extractor for them.
This commit is contained in:
		| @@ -30,6 +30,7 @@ from youtube_dl.extractor import ( | |||||||
|     SoundcloudPlaylistIE, |     SoundcloudPlaylistIE, | ||||||
|     TeacherTubeClassroomIE, |     TeacherTubeClassroomIE, | ||||||
|     LivestreamIE, |     LivestreamIE, | ||||||
|  |     LivestreamOriginalIE, | ||||||
|     NHLVideocenterIE, |     NHLVideocenterIE, | ||||||
|     BambuserChannelIE, |     BambuserChannelIE, | ||||||
|     BandcampAlbumIE, |     BandcampAlbumIE, | ||||||
| @@ -155,6 +156,14 @@ class TestPlaylists(unittest.TestCase): | |||||||
|         self.assertEqual(result['title'], 'TEDCity2.0 (English)') |         self.assertEqual(result['title'], 'TEDCity2.0 (English)') | ||||||
|         self.assertTrue(len(result['entries']) >= 4) |         self.assertTrue(len(result['entries']) >= 4) | ||||||
|  |  | ||||||
|  |     def test_livestreamoriginal_folder(self): | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = LivestreamOriginalIE(dl) | ||||||
|  |         result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') | ||||||
|  |         self.assertTrue(len(result['entries']) >= 28) | ||||||
|  |  | ||||||
|     def test_nhl_videocenter(self): |     def test_nhl_videocenter(self): | ||||||
|         dl = FakeYDL() |         dl = FakeYDL() | ||||||
|         ie = NHLVideocenterIE(dl) |         ie = NHLVideocenterIE(dl) | ||||||
|   | |||||||
| @@ -147,7 +147,11 @@ from .ku6 import Ku6IE | |||||||
| from .la7 import LA7IE | from .la7 import LA7IE | ||||||
| from .lifenews import LifeNewsIE | from .lifenews import LifeNewsIE | ||||||
| from .liveleak import LiveLeakIE | from .liveleak import LiveLeakIE | ||||||
| from .livestream import LivestreamIE, LivestreamOriginalIE | from .livestream import ( | ||||||
|  |     LivestreamIE, | ||||||
|  |     LivestreamOriginalIE, | ||||||
|  |     LivestreamShortenerIE, | ||||||
|  | ) | ||||||
| from .lynda import ( | from .lynda import ( | ||||||
|     LyndaIE, |     LyndaIE, | ||||||
|     LyndaCourseIE |     LyndaCourseIE | ||||||
|   | |||||||
| @@ -459,6 +459,9 @@ class InfoExtractor(object): | |||||||
|         if secure: regexes = self._og_regexes('video:secure_url') + regexes |         if secure: regexes = self._og_regexes('video:secure_url') + regexes | ||||||
|         return self._html_search_regex(regexes, html, name, **kargs) |         return self._html_search_regex(regexes, html, name, **kargs) | ||||||
|  |  | ||||||
|  |     def _og_search_url(self, html, **kargs): | ||||||
|  |         return self._og_search_property('url', html, **kargs) | ||||||
|  |  | ||||||
|     def _html_search_meta(self, name, html, display_name=None, fatal=False): |     def _html_search_meta(self, name, html, display_name=None, fatal=False): | ||||||
|         if display_name is None: |         if display_name is None: | ||||||
|             display_name = name |             display_name = name | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ from ..utils import ( | |||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
|     xpath_with_ns, |     xpath_with_ns, | ||||||
|     compat_str, |     compat_str, | ||||||
|  |     orderedSet, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor): | |||||||
| # The original version of Livestream uses a different system | # The original version of Livestream uses a different system | ||||||
| class LivestreamOriginalIE(InfoExtractor): | class LivestreamOriginalIE(InfoExtractor): | ||||||
|     IE_NAME = 'livestream:original' |     IE_NAME = 'livestream:original' | ||||||
|     _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' |     _VALID_URL = r'''(?x)https?://www\.livestream\.com/ | ||||||
|  |         (?P<user>[^/]+)/(?P<type>video|folder) | ||||||
|  |         (?:\?.*?Id=|/)(?P<id>.*?)(&|$) | ||||||
|  |         ''' | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', |         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
| @@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _extract_video(self, user, video_id): | ||||||
|         mobj = re.match(self._VALID_URL, url) |  | ||||||
|         video_id = mobj.group('id') |  | ||||||
|         user = mobj.group('user') |  | ||||||
|         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) |         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) | ||||||
|  |  | ||||||
|         info = self._download_xml(api_url, video_id) |         info = self._download_xml(api_url, video_id) | ||||||
| @@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor): | |||||||
|             'ext': 'flv', |             'ext': 'flv', | ||||||
|             'thumbnail': thumbnail_url, |             'thumbnail': thumbnail_url, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |     def _extract_folder(self, url, folder_id): | ||||||
|  |         webpage = self._download_webpage(url, folder_id) | ||||||
|  |         urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage)) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': folder_id, | ||||||
|  |             'entries': [{ | ||||||
|  |                 '_type': 'url', | ||||||
|  |                 'url': video_url, | ||||||
|  |             } for video_url in urls], | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         id = mobj.group('id') | ||||||
|  |         user = mobj.group('user') | ||||||
|  |         url_type = mobj.group('type') | ||||||
|  |         if url_type == 'folder': | ||||||
|  |             return self._extract_folder(url, id) | ||||||
|  |         else: | ||||||
|  |             return self._extract_video(user, id) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # The server doesn't support HEAD request, the generic extractor can't detect | ||||||
|  | # the redirection | ||||||
|  | class LivestreamShortenerIE(InfoExtractor): | ||||||
|  |     IE_NAME = 'livestream:shortener' | ||||||
|  |     IE_DESC = False  # Do not list | ||||||
|  |     _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         mobj = re.match(self._VALID_URL, url) | ||||||
|  |         id = mobj.group('id') | ||||||
|  |         webpage = self._download_webpage(url, id) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             '_type': 'url', | ||||||
|  |             'url': self._og_search_url(webpage), | ||||||
|  |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz