mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[extractor/rtl.lu] Add extractor (#4222)
Closes #1721 Authored by: HobbyistDev
This commit is contained in:
		| @@ -141,3 +141,155 @@ class RtlNlIE(InfoExtractor): | ||||
|             'duration': parse_duration(material.get('duration')), | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class RTLLuBaseIE(InfoExtractor): | ||||
|     _MEDIA_REGEX = { | ||||
|         'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)', | ||||
|         'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)', | ||||
|         'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)', | ||||
|     } | ||||
| 
 | ||||
|     def get_media_url(self, webpage, video_id, media_type): | ||||
|         return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None) | ||||
| 
 | ||||
|     def get_formats_and_subtitles(self, webpage, video_id): | ||||
|         video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio') | ||||
| 
 | ||||
|         formats, subtitles = [], {} | ||||
|         if video_url is not None: | ||||
|             formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id) | ||||
|         if audio_url is not None: | ||||
|             formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'}) | ||||
| 
 | ||||
|         return formats, subtitles | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         is_live = video_id in ('live', 'live-2', 'lauschteren') | ||||
| 
 | ||||
|         # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id> | ||||
|         # we can context from <rtl-comments context=<context> in webpage | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
| 
 | ||||
|         formats, subtitles = self.get_formats_and_subtitles(webpage, video_id) | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage, default=None), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None), | ||||
|             'is_live': is_live, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class RTLLuTeleVODIE(RTLLuBaseIE): | ||||
|     IE_NAME = 'rtl.lu:tele-vod' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html', | ||||
|         'info_dict': { | ||||
|             'id': '3266757', | ||||
|             'title': 'Informatiounsversammlung Héichwaasser', | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg', | ||||
|             'description': 'md5:b1db974408cc858c9fd241812e4a2a14', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.rtl.lu/video/3295215', | ||||
|         'info_dict': { | ||||
|             'id': '3295215', | ||||
|             'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht', | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg', | ||||
|             'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b', | ||||
|         } | ||||
|     }] | ||||
| 
 | ||||
| 
 | ||||
| class RTLLuArticleIE(RTLLuBaseIE): | ||||
|     IE_NAME = 'rtl.lu:article' | ||||
|     _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html' | ||||
|     _TESTS = [{ | ||||
|         # Audio-only | ||||
|         'url': 'https://www.rtl.lu/sport/news/a/1934360.html', | ||||
|         'info_dict': { | ||||
|             'id': '1934360', | ||||
|             'ext': 'mp3', | ||||
|             'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg', | ||||
|             'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7', | ||||
|             'title': 'md5:40aa85f135578fbd549d3c9370321f99', | ||||
|         } | ||||
|     }, { | ||||
|         # 5minutes | ||||
|         'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html', | ||||
|         'info_dict': { | ||||
|             'id': '1853173', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'md5:ac031da0740e997a5cf4633173634fee', | ||||
|             'title': 'md5:87e17722ed21af0f24be3243f4ec0c46', | ||||
|             'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg', | ||||
|         } | ||||
|     }, { | ||||
|         # today.lu | ||||
|         'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html', | ||||
|         'info_dict': { | ||||
|             'id': '1936203', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower', | ||||
|             'description': 'The witchy theme continues in the latest episode of Once Upon A Time...', | ||||
|             'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg', | ||||
|         } | ||||
|     }] | ||||
| 
 | ||||
| 
 | ||||
| class RTLLuLiveIE(RTLLuBaseIE): | ||||
|     _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)' | ||||
|     _TESTS = [{ | ||||
|         # Tele:live | ||||
|         'url': 'https://www.rtl.lu/tele/live', | ||||
|         'info_dict': { | ||||
|             'id': 'live', | ||||
|             'ext': 'mp4', | ||||
|             'live_status': 'is_live', | ||||
|             'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', | ||||
|             'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg', | ||||
|         } | ||||
|     }, { | ||||
|         # Tele:live-2 | ||||
|         'url': 'https://www.rtl.lu/tele/live-2', | ||||
|         'info_dict': { | ||||
|             'id': 'live-2', | ||||
|             'ext': 'mp4', | ||||
|             'live_status': 'is_live', | ||||
|             'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', | ||||
|             'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg', | ||||
|         } | ||||
|     }, { | ||||
|         # Radio:lauschteren | ||||
|         'url': 'https://www.rtl.lu/radio/lauschteren', | ||||
|         'info_dict': { | ||||
|             'id': 'lauschteren', | ||||
|             'ext': 'mp4', | ||||
|             'live_status': 'is_live', | ||||
|             'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', | ||||
|             'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg', | ||||
|         } | ||||
|     }] | ||||
| 
 | ||||
| 
 | ||||
| class RTLLuRadioIE(RTLLuBaseIE): | ||||
|     _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html', | ||||
|         'info_dict': { | ||||
|             'id': '4033058', | ||||
|             'ext': 'mp3', | ||||
|             'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9', | ||||
|             'title': '5 vir 12 - Stau um Stau', | ||||
|             'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg', | ||||
|         } | ||||
|     }] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 HobbyistDev
					HobbyistDev