mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[svt] Various improvements
+ [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv * [svt:base] Detect geo restriction * [svt:base] Extract series related metadata
This commit is contained in:
		| @@ -7,13 +7,13 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     dict_get, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SVTBaseIE(InfoExtractor): | ||||
|     def _extract_video(self, info, video_id): | ||||
|         video_info = self._get_video_info(info) | ||||
|  | ||||
|     def _extract_video(self, video_info, video_id): | ||||
|         formats = [] | ||||
|         for vr in video_info['videoReferences']: | ||||
|             player_type = vr.get('playerType') | ||||
| @@ -37,6 +37,8 @@ class SVTBaseIE(InfoExtractor): | ||||
|                     'format_id': player_type, | ||||
|                     'url': vurl, | ||||
|                 }) | ||||
|         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): | ||||
|             self.raise_geo_restricted('This video is only available in Sweden') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
| @@ -52,15 +54,32 @@ class SVTBaseIE(InfoExtractor): | ||||
|  | ||||
|                     subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) | ||||
|  | ||||
|         duration = video_info.get('materialLength') | ||||
|         age_limit = 18 if video_info.get('inappropriateForChildren') else 0 | ||||
|         title = video_info.get('title') | ||||
|  | ||||
|         series = video_info.get('programTitle') | ||||
|         season_number = int_or_none(video_info.get('season')) | ||||
|         episode = video_info.get('episodeTitle') | ||||
|         episode_number = int_or_none(video_info.get('episodeNumber')) | ||||
|  | ||||
|         duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | ||||
|         age_limit = None | ||||
|         adult = dict_get( | ||||
|             video_info, ('inappropriateForChildren', 'blockedForChildren'), | ||||
|             skip_false_values=False) | ||||
|         if adult is not None: | ||||
|             age_limit = 18 if adult else 0 | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'duration': duration, | ||||
|             'age_limit': age_limit, | ||||
|             'series': series, | ||||
|             'season_number': season_number, | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
|         } | ||||
|  | ||||
|  | ||||
| @@ -85,9 +104,6 @@ class SVTIE(SVTBaseIE): | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _get_video_info(self, info): | ||||
|         return info['video'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         widget_id = mobj.group('widget_id') | ||||
| @@ -97,7 +113,7 @@ class SVTIE(SVTBaseIE): | ||||
|             'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), | ||||
|             article_id) | ||||
|  | ||||
|         info_dict = self._extract_video(info, article_id) | ||||
|         info_dict = self._extract_video(info['video'], article_id) | ||||
|         info_dict['title'] = info['context']['title'] | ||||
|         return info_dict | ||||
|  | ||||
| @@ -105,7 +121,7 @@ class SVTIE(SVTBaseIE): | ||||
| class SVTPlayIE(SVTBaseIE): | ||||
|     IE_DESC = 'SVT Play and Öppet arkiv' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', | ||||
|         'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | ||||
|         'info_dict': { | ||||
| @@ -121,25 +137,47 @@ class SVTPlayIE(SVTBaseIE): | ||||
|                 }] | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _get_video_info(self, info): | ||||
|         return info['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'] | ||||
|     }, { | ||||
|         # geo restricted to Sweden | ||||
|         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         data = self._parse_json(self._search_regex( | ||||
|             r'root\["__svtplay"\]\s*=\s*([^;]+);', webpage, 'embedded data'), video_id) | ||||
|         data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'root\["__svtplay"\]\s*=\s*([^;]+);', | ||||
|                 webpage, 'embedded data', default='{}'), | ||||
|             video_id, fatal=False) | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         info_dict = self._extract_video(data, video_id) | ||||
|         info_dict.update({ | ||||
|             'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | ||||
|             'thumbnail': thumbnail, | ||||
|         }) | ||||
|         if data: | ||||
|             video_info = try_get( | ||||
|                 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], | ||||
|                 dict) | ||||
|             if video_info: | ||||
|                 info_dict = self._extract_video(video_info, video_id) | ||||
|                 info_dict.update({ | ||||
|                     'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | ||||
|                     'thumbnail': thumbnail, | ||||
|                 }) | ||||
|                 return info_dict | ||||
|  | ||||
|         return info_dict | ||||
|         video_id = self._search_regex( | ||||
|             r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', | ||||
|             webpage, 'video id', default=None) | ||||
|  | ||||
|         if video_id: | ||||
|             data = self._download_json( | ||||
|                 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) | ||||
|             info_dict = self._extract_video(data, video_id) | ||||
|             if not info_dict.get('title'): | ||||
|                 info_dict['title'] = re.sub( | ||||
|                     r'\s*\|\s*.+?$', '', | ||||
|                     info_dict.get('episode') or self._og_search_title(webpage)) | ||||
|             return info_dict | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․