diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in index 21f52798e..bb66c2095 100644 --- a/devscripts/bash-completion.in +++ b/devscripts/bash-completion.in @@ -10,9 +10,13 @@ __yt_dlp() diropts="--cache-dir" if [[ ${prev} =~ ${fileopts} ]]; then + local IFS=$'\n' + type compopt &>/dev/null && compopt -o filenames COMPREPLY=( $(compgen -f -- ${cur}) ) return 0 elif [[ ${prev} =~ ${diropts} ]]; then + local IFS=$'\n' + type compopt &>/dev/null && compopt -o dirnames COMPREPLY=( $(compgen -d -- ${cur}) ) return 0 fi diff --git a/test/test_download.py b/test/test_download.py index 3f36869d9..c7842735c 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -14,6 +14,7 @@ from test.helper import ( assertGreaterEqual, + assertLessEqual, expect_info_dict, expect_warnings, get_params, @@ -121,10 +122,13 @@ def print_skipping(reason): params = get_params(test_case.get('params', {})) params['outtmpl'] = tname + '_' + params['outtmpl'] if is_playlist and 'playlist' not in test_case: - params.setdefault('extract_flat', 'in_playlist') - params.setdefault('playlistend', test_case.get( - 'playlist_mincount', test_case.get('playlist_count', -2) + 1)) + params.setdefault('playlistend', max( + test_case.get('playlist_mincount', -1), + test_case.get('playlist_count', -2) + 1, + test_case.get('playlist_maxcount', -2) + 1)) params.setdefault('skip_download', True) + if 'playlist_duration_sum' not in test_case: + params.setdefault('extract_flat', 'in_playlist') ydl = YoutubeDL(params, auto_init=False) ydl.add_default_info_extractors() @@ -159,6 +163,7 @@ def try_rm_tcs_files(tcs=None): try_rm(os.path.splitext(tc_filename)[0] + '.info.json') try_rm_tcs_files() try: + test_url = test_case['url'] try_num = 1 while True: try: @@ -166,7 +171,7 @@ def try_rm_tcs_files(tcs=None): # for outside error handling, and returns the exit code # instead of the result dict. res_dict = ydl.extract_info( - test_case['url'], + test_url, force_generic_extractor=params.get('force_generic_extractor', False)) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one @@ -194,23 +199,23 @@ def try_rm_tcs_files(tcs=None): self.assertTrue('entries' in res_dict) expect_info_dict(self, res_dict, test_case.get('info_dict', {})) + num_entries = len(res_dict.get('entries', [])) if 'playlist_mincount' in test_case: + mincount = test_case['playlist_mincount'] assertGreaterEqual( - self, - len(res_dict['entries']), - test_case['playlist_mincount'], - 'Expected at least %d in playlist %s, but got only %d' % ( - test_case['playlist_mincount'], test_case['url'], - len(res_dict['entries']))) + self, num_entries, mincount, + f'Expected at least {mincount} entries in playlist {test_url}, but got only {num_entries}') if 'playlist_count' in test_case: + count = test_case['playlist_count'] + got = num_entries if num_entries <= count else 'more' self.assertEqual( - len(res_dict['entries']), - test_case['playlist_count'], - 'Expected %d entries in playlist %s, but got %d.' % ( - test_case['playlist_count'], - test_case['url'], - len(res_dict['entries']), - )) + num_entries, count, + f'Expected exactly {count} entries in playlist {test_url}, but got {got}') + if 'playlist_maxcount' in test_case: + maxcount = test_case['playlist_maxcount'] + assertLessEqual( + self, num_entries, maxcount, + f'Expected at most {maxcount} entries in playlist {test_url}, but got more') if 'playlist_duration_sum' in test_case: got_duration = sum(e['duration'] for e in res_dict['entries']) self.assertEqual( diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0f5c2c97e..0c6535fc7 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -900,7 +900,9 @@ def _real_extract(self, url): headers=headers)) geo_blocked = traverse_obj(play_info, ( - 'raw', 'data', 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any)) + ('result', ('raw', 'data')), 'plugins', + lambda _, v: v['name'] == 'AreaLimitPanel', + 'config', 'is_block', {bool}, any)) premium_only = play_info.get('code') == -10403 video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {} @@ -914,7 +916,7 @@ def _real_extract(self, url): if traverse_obj(play_info, (( ('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' - ('raw', 'data', 'play_video_type'), # 'preview' vs 'whole' + (('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none' ), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})): self.report_warning( 'Only preview format is available, ' diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 0d5e5b0e7..14fbd6ce8 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -8,6 +8,8 @@ get_element_by_class, int_or_none, join_nonempty, + make_archive_id, + orderedSet, parse_duration, remove_end, traverse_obj, @@ -16,6 +18,7 @@ unified_timestamp, url_or_none, urljoin, + variadic, ) @@ -591,102 +594,179 @@ class NhkRadiruIE(InfoExtractor): IE_DESC = 'NHK らじる (Radiru/Rajiru)' _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P[\da-zA-Z]+)_(?P[\da-zA-Z]+)(?:_(?P[\da-zA-Z]+))?' _TESTS = [{ - 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239', - 'skip': 'Episode expired on 2024-06-09', + 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=LG96ZW5KZ4_01_4251382', + 'skip': 'Episode expires on 2025-07-14', 'info_dict': { - 'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集', - 'id': '0449_01_4003239', + 'title': 'クラシックの庭\u3000特集「ドボルザークを聴く」(1)交響曲を中心に', + 'id': 'LG96ZW5KZ4_01_4251382', 'ext': 'm4a', - 'uploader': 'NHK FM 東京', - 'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc', - 'series': 'ジャズ・トゥナイト', - 'channel': 'NHK FM 東京', - 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg', - 'upload_date': '20240601', - 'series_id': '0449_01', - 'release_date': '20240601', - 'timestamp': 1717257600, - 'release_timestamp': 1717250400, + 'description': 'md5:652d3c38a25b77959c716421eba1617a', + 'uploader': 'NHK FM・東京', + 'channel': 'NHK FM・東京', + 'duration': 6597.0, + 'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/LG96ZW5KZ4/LG96ZW5KZ4-eyecatch_a67c6e949325016c0724f2ed3eec8a2f.jpg', + 'categories': ['音楽', 'クラシック・オペラ'], + 'cast': ['田添菜穂子'], + 'series': 'クラシックの庭', + 'series_id': 'LG96ZW5KZ4', + 'episode': '特集「ドボルザークを聴く」(1)交響曲を中心に', + 'episode_id': 'QP1Q2ZXZY3', + 'timestamp': 1751871000, + 'upload_date': '20250707', + 'release_timestamp': 1751864403, + 'release_date': '20250707', }, }, { # playlist, airs every weekday so it should _hopefully_ be okay forever - 'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01', + 'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=Z9L1V2M24L_01', 'info_dict': { - 'id': '0458_01', + 'id': 'Z9L1V2M24L_01', 'title': 'ベストオブクラシック', 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。', - 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg', - 'series_id': '0458_01', + 'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/Z9L1V2M24L/Z9L1V2M24L-eyecatch_83ed28b4782907998875965fee60a351.jpg', + 'series_id': 'Z9L1V2M24L_01', 'uploader': 'NHK FM', 'channel': 'NHK FM', 'series': 'ベストオブクラシック', }, 'playlist_mincount': 3, - }, { - # one with letters in the id - 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688', - 'note': 'Expires on 2025-03-31', - 'info_dict': { - 'id': 'F683_01_3910688', - 'ext': 'm4a', - 'title': '夏目漱石「文鳥」第1回', - 'series': '【らじる文庫】夏目漱石「文鳥」(全4回)', - 'series_id': 'F683_01', - 'description': '朗読:浅井理アナウンサー', - 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg', - 'upload_date': '20240106', - 'release_date': '20240106', - 'uploader': 'NHK R1', - 'release_timestamp': 1704511800, - 'channel': 'NHK R1', - 'timestamp': 1704512700, - }, - 'expected_warnings': ['Unable to download JSON metadata', - 'Failed to get extended metadata. API returned Error 1: Invalid parameters'], }, { # news - 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173', + 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=18439M2W42_02_4251212', + 'skip': 'Expires on 2025-07-15', 'info_dict': { - 'id': 'F261_01_4012173', + 'id': '18439M2W42_02_4251212', 'ext': 'm4a', - 'channel': 'NHKラジオ第1', + 'title': 'マイあさ! 午前5時のNHKニュース 2025年7月8日', 'uploader': 'NHKラジオ第1', + 'channel': 'NHKラジオ第1', + 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg', 'series': 'NHKラジオニュース', - 'title': '午前0時のNHKニュース', - 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', - 'release_timestamp': 1718290800, - 'release_date': '20240613', - 'timestamp': 1718291400, - 'upload_date': '20240613', + 'timestamp': 1751919420, + 'upload_date': '20250707', + 'release_timestamp': 1751918400, + 'release_date': '20250707', }, }, { # fallback when extended metadata fails - 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298', - 'skip': 'Expires on 2024-06-07', + 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=J8792PY43V_20_4253945', + 'skip': 'Expires on 2025-09-01', 'info_dict': { - 'id': '2834_01_4009298', - 'title': 'まち☆キラ!開成町特集', + 'id': 'J8792PY43V_20_4253945', 'ext': 'm4a', - 'release_date': '20240531', - 'upload_date': '20240531', - 'series': 'はま☆キラ!', - 'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg', - 'channel': 'NHK R1,FM', - 'description': '', - 'timestamp': 1717123800, - 'uploader': 'NHK R1,FM', - 'release_timestamp': 1717120800, - 'series_id': '2834_01', + 'title': '「後絶たない筋肉増強剤の使用」ワールドリポート', + 'description': '大濱 敦(ソウル支局)', + 'uploader': 'NHK R1', + 'channel': 'NHK R1', + 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/J8792PY43V/img/corner/box_31_thumbnail.jpg', + 'series': 'マイあさ! ワールドリポート', + 'series_id': 'J8792PY43V_20', + 'timestamp': 1751837100, + 'upload_date': '20250706', + 'release_timestamp': 1751835600, + 'release_date': '20250706', + }, - 'expected_warnings': ['Failed to get extended metadata. API returned empty list.'], + 'expected_warnings': ['Failed to download extended metadata: HTTP Error 404: Not Found'], }] _API_URL_TMPL = None + # The `_format_*` and `_make_*` functions are ported from: https://www.nhk.or.jp/radio/assets/js/timetable_detail_new.js + + def _format_act_list(self, act_list): + role_groups = {} + for act in traverse_obj(act_list, (..., {dict})): + role = act.get('role') + if role not in role_groups: + role_groups[role] = [] + role_groups[role].append(act) + + formatted_roles = [] + for role, acts in role_groups.items(): + for i, act in enumerate(acts): + res = f'【{role}】' if i == 0 and role is not None else '' + if title := act.get('title'): + res += f'{title}…' + formatted_roles.append(join_nonempty(res, act.get('name'), delim='')) + return join_nonempty(*formatted_roles, delim=',') + + def _make_artists(self, track, key): + artists = [] + for artist in traverse_obj(track, (key, ..., {dict})): + if res := join_nonempty(*traverse_obj(artist, (( + ('role', filter, {'{}…'.format}), + ('part', filter, {'({})'.format}), + ('name', filter), + ), {str})), delim=''): + artists.append(res) + + return '、'.join(artists) or None + + def _make_duration(self, track, key): + d = traverse_obj(track, (key, {parse_duration})) + if d is None: + return None + hours, remainder = divmod(d, 3600) + minutes, seconds = divmod(remainder, 60) + res = '(' + if hours > 0: + res += f'{int(hours)}時間' + if minutes > 0: + res += f'{int(minutes)}分' + res += f'{int(seconds):02}秒)' + return res + + def _format_music_list(self, music_list): + tracks = [] + for track in traverse_obj(music_list, (..., {dict})): + track_details = traverse_obj(track, (( + ('name', filter, {'「{}」'.format}), + ('lyricist', filter, {'{}:作詞'.format}), + ('composer', filter, {'{}:作曲'.format}), + ('arranger', filter, {'{}:編曲'.format}), + ), {str})) + + track_details.append(self._make_artists(track, 'byArtist')) + track_details.append(self._make_duration(track, 'duration')) + + if label := join_nonempty('label', 'code', delim=' ', from_dict=track): + track_details.append(f'<{label}>') + if location := traverse_obj(track, ('location', {str})): + track_details.append(f'~{location}~') + tracks.append(join_nonempty(*track_details, delim='\n')) + return '\n\n'.join(tracks) + + def _format_description(self, response): + detailed_description = traverse_obj(response, ('detailedDescription', {dict})) or {} + return join_nonempty( + join_nonempty('epg80', 'epg200', delim='\n\n', from_dict=detailed_description), + traverse_obj(response, ('misc', 'actList', {self._format_act_list})), + traverse_obj(response, ('misc', 'musicList', {self._format_music_list})), + delim='\n\n') + + def _get_thumbnails(self, data, keys, name=None, preference=-1): + thumbnails = [] + for size, thumb in traverse_obj(data, ( + *variadic(keys, (str, bytes, dict, set)), {dict.items}, + lambda _, v: v[0] != 'copyright' and url_or_none(v[1]['url']), + )): + thumbnails.append({ + 'url': thumb['url'], + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + 'preference': preference, + 'id': join_nonempty(name, size), + }) + preference -= 1 + return thumbnails + def _extract_extended_metadata(self, episode_id, aa_vinfo): service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')})) + date_id = aa_vinfo[3] + detail_url = try_call( - lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3])) + lambda: self._API_URL_TMPL.format(broadcastEventId=join_nonempty(service, area, date_id))) if not detail_url: return {} @@ -699,36 +779,37 @@ def _extract_extended_metadata(self, episode_id, aa_vinfo): if error := traverse_obj(response, ('error', {dict})): self.report_warning( 'Failed to get extended metadata. API returned ' - f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}') + f'Error {join_nonempty("statuscode", "message", from_dict=error, delim=": ")}') return {} - full_meta = traverse_obj(response, ('list', service, 0, {dict})) - if not full_meta: - self.report_warning('Failed to get extended metadata. API returned empty list.') - return {} + station = traverse_obj(response, ('publishedOn', 'broadcastDisplayName', {str})) - station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None - thumbnails = [{ - 'id': str(id_), - 'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1, - **traverse_obj(thumb, { - 'url': 'url', - 'width': ('width', {int_or_none}), - 'height': ('height', {int_or_none}), - }), - } for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))] + thumbnails = [] + thumbnails.extend(self._get_thumbnails(response, ('about', 'eyecatch'))) + for num, dct in enumerate(traverse_obj(response, ('about', 'eyecatchList', ...))): + thumbnails.extend(self._get_thumbnails(dct, None, join_nonempty('list', num), -2)) + thumbnails.extend( + self._get_thumbnails(response, ('about', 'partOfSeries', 'eyecatch'), 'series', -3)) return filter_dict({ + 'description': self._format_description(response), + 'cast': traverse_obj(response, ('misc', 'actList', ..., 'name', {str})), + 'thumbnails': thumbnails, + **traverse_obj(response, { + 'title': ('name', {str}), + 'timestamp': ('endDate', {unified_timestamp}), + 'release_timestamp': ('startDate', {unified_timestamp}), + 'duration': ('duration', {parse_duration}), + }), + **traverse_obj(response, ('identifierGroup', { + 'series': ('radioSeriesName', {str}), + 'series_id': ('radioSeriesId', {str}), + 'episode': ('radioEpisodeName', {str}), + 'episode_id': ('radioEpisodeId', {str}), + 'categories': ('genre', ..., ['name1', 'name2'], {str}, all, {orderedSet}), + })), 'channel': station, 'uploader': station, - 'description': join_nonempty( - 'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta), - 'thumbnails': thumbnails, - **traverse_obj(full_meta, { - 'title': ('title', {str}), - 'timestamp': ('end_time', {unified_timestamp}), - 'release_timestamp': ('start_time', {unified_timestamp}), - }), }) def _extract_episode_info(self, episode, programme_id, series_meta): @@ -782,7 +863,9 @@ def _real_extract(self, url): site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline') programme_id = f'{site_id}_{corner_id}' - if site_id == 'F261': # XXX: News programmes use old API (for now?) + # XXX: News programmes use the old API + # Can't move this to NhkRadioNewsPageIE because news items still use the normal URL format + if site_id == '18439M2W42': meta = self._download_json( 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main'] series_meta = traverse_obj(meta, { @@ -843,8 +926,8 @@ class NhkRadioNewsPageIE(InfoExtractor): 'url': 'https://www.nhk.or.jp/radionews/', 'playlist_mincount': 5, 'info_dict': { - 'id': 'F261_01', - 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', + 'id': '18439M2W42_01', + 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg', 'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d', 'channel': 'NHKラジオ第1', 'uploader': 'NHKラジオ第1', @@ -853,7 +936,7 @@ class NhkRadioNewsPageIE(InfoExtractor): }] def _real_extract(self, url): - return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE) + return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=18439M2W42_01', NhkRadiruIE) class NhkRadiruLiveIE(InfoExtractor): @@ -863,11 +946,12 @@ class NhkRadiruLiveIE(InfoExtractor): # radio 1, no area specified 'url': 'https://www.nhk.or.jp/radio/player/?ch=r1', 'info_dict': { - 'id': 'r1-tokyo', - 'title': 're:^NHKネットラジオ第1 東京.+$', + 'id': 'bs-r1-130', + 'title': 're:^NHKラジオ第1・東京.+$', 'ext': 'm4a', - 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png', + 'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r1/r1-logo.svg', 'live_status': 'is_live', + '_old_archive_ids': ['nhkradirulive r1-tokyo'], }, }, { # radio 2, area specified @@ -875,26 +959,28 @@ class NhkRadiruLiveIE(InfoExtractor): 'url': 'https://www.nhk.or.jp/radio/player/?ch=r2', 'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}}, 'info_dict': { - 'id': 'r2-fukuoka', - 'title': 're:^NHKネットラジオ第2 福岡.+$', + 'id': 'bs-r2-400', + 'title': 're:^NHKラジオ第2.+$', 'ext': 'm4a', - 'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png', + 'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r2/r2-logo.svg', 'live_status': 'is_live', + '_old_archive_ids': ['nhkradirulive r2-fukuoka'], }, }, { # fm, area specified 'url': 'https://www.nhk.or.jp/radio/player/?ch=fm', 'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}}, 'info_dict': { - 'id': 'fm-sapporo', - 'title': 're:^NHKネットラジオFM 札幌.+$', + 'id': 'bs-r3-010', + 'title': 're:^NHK FM・札幌.+$', 'ext': 'm4a', - 'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png', + 'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r3/r3-logo.svg', 'live_status': 'is_live', + '_old_archive_ids': ['nhkradirulive fm-sapporo'], }, }] - _NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'} + _NOA_STATION_IDS = {'r1': 'r1', 'r2': 'r2', 'fm': 'r3'} def _real_extract(self, url): station = self._match_id(url) @@ -911,12 +997,15 @@ def _real_extract(self, url): noa_info = self._download_json( f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text), station, note=f'Downloading {area} station metadata', fatal=False) - present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present')) + broadcast_service = traverse_obj(noa_info, (self._NOA_STATION_IDS.get(station), 'publishedOn')) return { - 'title': ' '.join(traverse_obj(present_info, (('service', 'area'), 'name', {str}))), - 'id': join_nonempty(station, area), - 'thumbnails': traverse_obj(present_info, ('service', 'images', ..., { + **traverse_obj(broadcast_service, { + 'title': ('broadcastDisplayName', {str}), + 'id': ('id', {str}), + }), + '_old_archive_ids': [make_archive_id(self, join_nonempty(station, area))], + 'thumbnails': traverse_obj(broadcast_service, ('logo', ..., { 'url': 'url', 'width': ('width', {int_or_none}), 'height': ('height', {int_or_none}), diff --git a/yt_dlp/extractor/youtube/_tab.py b/yt_dlp/extractor/youtube/_tab.py index c018ee8cf..226e5ede3 100644 --- a/yt_dlp/extractor/youtube/_tab.py +++ b/yt_dlp/extractor/youtube/_tab.py @@ -317,17 +317,31 @@ def _extract_lockup_view_model(self, view_model): content_id = view_model.get('contentId') if not content_id: return + content_type = view_model.get('contentType') - if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'): + if content_type == 'LOCKUP_CONTENT_TYPE_VIDEO': + ie = YoutubeIE + url = f'https://www.youtube.com/watch?v={content_id}' + thumb_keys = (None,) + elif content_type in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'): + ie = YoutubeTabIE + url = f'https://www.youtube.com/playlist?list={content_id}' + thumb_keys = ('collectionThumbnailViewModel', 'primaryThumbnail') + else: self.report_warning( - f'Unsupported lockup view model content type "{content_type}"{bug_reports_message()}', only_once=True) + f'Unsupported lockup view model content type "{content_type}"{bug_reports_message()}', + only_once=True) return + return self.url_result( - f'https://www.youtube.com/playlist?list={content_id}', ie=YoutubeTabIE, video_id=content_id, + url, ie, content_id, title=traverse_obj(view_model, ( 'metadata', 'lockupMetadataViewModel', 'title', 'content', {str})), thumbnails=self._extract_thumbnails(view_model, ( - 'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources')) + 'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'), + duration=traverse_obj(view_model, ( + 'contentImage', 'thumbnailViewModel', 'overlays', ..., 'thumbnailOverlayBadgeViewModel', + 'thumbnailBadges', ..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any))) def _rich_entries(self, rich_grid_renderer): if lockup_view_model := traverse_obj(rich_grid_renderer, ('content', 'lockupViewModel', {dict})):