mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors
This commit is contained in:
		
							
								
								
									
										84
									
								
								youtube_dl/extractor/amp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/amp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | |||||||
|  | # coding: utf-8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
|  | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     int_or_none, | ||||||
|  |     parse_iso8601, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AMPIE(InfoExtractor): | ||||||
|  |     def _get_media_node(self, item, name, default=None): | ||||||
|  |         media_name = 'media-%s' % name | ||||||
|  |         media_group = item.get('media-group') or item | ||||||
|  |         return media_group.get(media_name) or item.get(media_name) or item.get(name, default) | ||||||
|  |  | ||||||
|  |     # parse Akamai Adaptive Media Player feed | ||||||
|  |     def _extract_feed_info(self, url): | ||||||
|  |         item = self._download_json( | ||||||
|  |             url, None, | ||||||
|  |             'Downloading Akamai AMP feed', | ||||||
|  |             'Unable to download Akamai AMP feed' | ||||||
|  |             )['channel']['item'] | ||||||
|  |  | ||||||
|  |         video_id = item['guid'] | ||||||
|  |          | ||||||
|  |         thumbnails = [] | ||||||
|  |         media_thumbnail = self._get_media_node(item, 'thumbnail') | ||||||
|  |         if media_thumbnail: | ||||||
|  |             if isinstance(media_thumbnail, dict): | ||||||
|  |                 media_thumbnail = [media_thumbnail] | ||||||
|  |             for thumbnail_data in media_thumbnail: | ||||||
|  |                 thumbnail = thumbnail_data['@attributes'] | ||||||
|  |                 thumbnails.append({ | ||||||
|  |                     'url': self._proto_relative_url(thumbnail['url'], 'http:'), | ||||||
|  |                     'width': int_or_none(thumbnail.get('width')), | ||||||
|  |                     'height': int_or_none(thumbnail.get('height')), | ||||||
|  |                 }) | ||||||
|  |  | ||||||
|  |         subtitles = {} | ||||||
|  |         media_subtitle = self._get_media_node(item, 'subTitle') | ||||||
|  |         if media_subtitle: | ||||||
|  |             if isinstance(media_subtitle, dict): | ||||||
|  |                 media_subtitle = [media_subtitle] | ||||||
|  |             for subtitle_data in media_subtitle: | ||||||
|  |                 subtitle = subtitle_data['@attributes'] | ||||||
|  |                 lang = subtitle.get('lang') or 'en' | ||||||
|  |                 subtitles[lang] = [{'url': subtitle['href']}] | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         media_content = self._get_media_node(item, 'content') | ||||||
|  |         if isinstance(media_content, dict): | ||||||
|  |             media_content = [media_content] | ||||||
|  |         for media_data in media_content: | ||||||
|  |             media = media_data['@attributes'] | ||||||
|  |             media_type = media['type'] | ||||||
|  |             if media_type == 'video/f4m': | ||||||
|  |                 f4m_formats = self._extract_f4m_formats(media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False) | ||||||
|  |                 if f4m_formats: | ||||||
|  |                     formats.extend(f4m_formats) | ||||||
|  |             elif media_type == 'application/x-mpegURL': | ||||||
|  |                 m3u8_formats = self._extract_m3u8_formats(media['url'], video_id, m3u8_id='hls', fatal=False) | ||||||
|  |                 if m3u8_formats: | ||||||
|  |                     formats.extend(m3u8_formats) | ||||||
|  |             else: | ||||||
|  |                 formats.append({ | ||||||
|  |                     'format_id': media_data['media-category']['@attributes']['label'], | ||||||
|  |                     'url': media['url'], | ||||||
|  |                     'preference': 1, | ||||||
|  |                     'vbr': int_or_none(media.get('bitrate')), | ||||||
|  |                     'filesize': int_or_none(media.get('fileSize')), | ||||||
|  |                 }) | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return { | ||||||
|  |             'id': video_id, | ||||||
|  |             'title': self._get_media_node(item, 'title'), | ||||||
|  |             'description': self._get_media_node(item, 'description'), | ||||||
|  |             'thumbnails': thumbnails, | ||||||
|  |             'timestamp': parse_iso8601(item.get('pubDate'), ' '), | ||||||
|  |             'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), | ||||||
|  |             'formats': formats, | ||||||
|  |         } | ||||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| import itertools | import itertools | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .amp import AMPIE | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|     compat_HTTPError, |     compat_HTTPError, | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
| @@ -19,7 +19,7 @@ from ..utils import ( | |||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class DramaFeverBaseIE(InfoExtractor): | class DramaFeverBaseIE(AMPIE): | ||||||
|     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' |     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' | ||||||
|     _NETRC_MACHINE = 'dramafever' |     _NETRC_MACHINE = 'dramafever' | ||||||
|  |  | ||||||
| @@ -80,60 +80,24 @@ class DramaFeverIE(DramaFeverBaseIE): | |||||||
|             'timestamp': 1404336058, |             'timestamp': 1404336058, | ||||||
|             'upload_date': '20140702', |             'upload_date': '20140702', | ||||||
|             'duration': 343, |             'duration': 343, | ||||||
|         } |         }, | ||||||
|  |         'params': { | ||||||
|  |             # m3u8 download | ||||||
|  |             'skip_download': True, | ||||||
|  |         }, | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url).replace('/', '.') |         video_id = self._match_id(url).replace('/', '.') | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             feed = self._download_json( |             info = self._extract_feed_info('http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id) | ||||||
|                 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id, |  | ||||||
|                 video_id, 'Downloading episode JSON')['channel']['item'] |  | ||||||
|         except ExtractorError as e: |         except ExtractorError as e: | ||||||
|             if isinstance(e.cause, compat_HTTPError): |             if isinstance(e.cause, compat_HTTPError): | ||||||
|                 raise ExtractorError( |                 raise ExtractorError( | ||||||
|                     'Currently unavailable in your country.', expected=True) |                     'Currently unavailable in your country.', expected=True) | ||||||
|             raise |             raise | ||||||
|  |  | ||||||
|         media_group = feed.get('media-group', {}) |  | ||||||
|  |  | ||||||
|         formats = [] |  | ||||||
|         for media_content in media_group['media-content']: |  | ||||||
|             src = media_content.get('@attributes', {}).get('url') |  | ||||||
|             if not src: |  | ||||||
|                 continue |  | ||||||
|             ext = determine_ext(src) |  | ||||||
|             if ext == 'f4m': |  | ||||||
|                 formats.extend(self._extract_f4m_formats( |  | ||||||
|                     src, video_id, f4m_id='hds')) |  | ||||||
|             elif ext == 'm3u8': |  | ||||||
|                 formats.extend(self._extract_m3u8_formats( |  | ||||||
|                     src, video_id, 'mp4', m3u8_id='hls')) |  | ||||||
|             else: |  | ||||||
|                 formats.append({ |  | ||||||
|                     'url': src, |  | ||||||
|                 }) |  | ||||||
|         self._sort_formats(formats) |  | ||||||
|  |  | ||||||
|         title = media_group.get('media-title') |  | ||||||
|         description = media_group.get('media-description') |  | ||||||
|         duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration')) |  | ||||||
|         thumbnail = self._proto_relative_url( |  | ||||||
|             media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url')) |  | ||||||
|         timestamp = parse_iso8601(feed.get('pubDate'), ' ') |  | ||||||
|  |  | ||||||
|         subtitles = {} |  | ||||||
|         for media_subtitle in media_group.get('media-subTitle', []): |  | ||||||
|             lang = media_subtitle.get('@attributes', {}).get('lang') |  | ||||||
|             href = media_subtitle.get('@attributes', {}).get('href') |  | ||||||
|             if not lang or not href: |  | ||||||
|                 continue |  | ||||||
|             subtitles[lang] = [{ |  | ||||||
|                 'ext': 'ttml', |  | ||||||
|                 'url': href, |  | ||||||
|             }] |  | ||||||
|  |  | ||||||
|         series_id, episode_number = video_id.split('.') |         series_id, episode_number = video_id.split('.') | ||||||
|         episode_info = self._download_json( |         episode_info = self._download_json( | ||||||
|             # We only need a single episode info, so restricting page size to one episode |             # We only need a single episode info, so restricting page size to one episode | ||||||
| @@ -146,21 +110,12 @@ class DramaFeverIE(DramaFeverBaseIE): | |||||||
|             if value: |             if value: | ||||||
|                 subfile = value[0].get('subfile') or value[0].get('new_subfile') |                 subfile = value[0].get('subfile') or value[0].get('new_subfile') | ||||||
|                 if subfile and subfile != 'http://www.dramafever.com/st/': |                 if subfile and subfile != 'http://www.dramafever.com/st/': | ||||||
|                     subtitles.setdefault('English', []).append({ |                     info['subtitiles'].setdefault('English', []).append({ | ||||||
|                         'ext': 'srt', |                         'ext': 'srt', | ||||||
|                         'url': subfile, |                         'url': subfile, | ||||||
|                     }) |                     }) | ||||||
|  |  | ||||||
|         return { |         return info | ||||||
|             'id': video_id, |  | ||||||
|             'title': title, |  | ||||||
|             'description': description, |  | ||||||
|             'thumbnail': thumbnail, |  | ||||||
|             'timestamp': timestamp, |  | ||||||
|             'duration': duration, |  | ||||||
|             'formats': formats, |  | ||||||
|             'subtitles': subtitles, |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class DramaFeverSeriesIE(DramaFeverBaseIE): | class DramaFeverSeriesIE(DramaFeverBaseIE): | ||||||
|   | |||||||
| @@ -2,14 +2,14 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .amp import AMPIE | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
|     int_or_none, |     int_or_none, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class FoxNewsIE(InfoExtractor): | class FoxNewsIE(AMPIE): | ||||||
|     IE_DESC = 'Fox News and Fox Business Video' |     IE_DESC = 'Fox News and Fox Business Video' | ||||||
|     _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' |     _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' | ||||||
|     _TESTS = [ |     _TESTS = [ | ||||||
| @@ -20,10 +20,10 @@ class FoxNewsIE(InfoExtractor): | |||||||
|                 'id': '3937480', |                 'id': '3937480', | ||||||
|                 'ext': 'flv', |                 'ext': 'flv', | ||||||
|                 'title': 'Frozen in Time', |                 'title': 'Frozen in Time', | ||||||
|                 'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler', |                 'description': '16-year-old girl is size of toddler', | ||||||
|                 'duration': 265, |                 'duration': 265, | ||||||
|                 'timestamp': 1304411491, |                 #'timestamp': 1304411491, | ||||||
|                 'upload_date': '20110503', |                 #'upload_date': '20110503', | ||||||
|                 'thumbnail': 're:^https?://.*\.jpg$', |                 'thumbnail': 're:^https?://.*\.jpg$', | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
| @@ -34,10 +34,10 @@ class FoxNewsIE(InfoExtractor): | |||||||
|                 'id': '3922535568001', |                 'id': '3922535568001', | ||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", |                 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", | ||||||
|                 'description': "Congressman discusses the president's executive action", |                 'description': "Congressman discusses president's plan", | ||||||
|                 'duration': 292, |                 'duration': 292, | ||||||
|                 'timestamp': 1417662047, |                 #'timestamp': 1417662047, | ||||||
|                 'upload_date': '20141204', |                 #'upload_date': '20141204', | ||||||
|                 'thumbnail': 're:^https?://.*\.jpg$', |                 'thumbnail': 're:^https?://.*\.jpg$', | ||||||
|             }, |             }, | ||||||
|         }, |         }, | ||||||
| @@ -56,48 +56,6 @@ class FoxNewsIE(InfoExtractor): | |||||||
|         video_id = mobj.group('id') |         video_id = mobj.group('id') | ||||||
|         host = mobj.group('host') |         host = mobj.group('host') | ||||||
|  |  | ||||||
|         video = self._download_json( |         info = self._extract_feed_info('http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) | ||||||
|             'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id) |         info['id'] = video_id | ||||||
|  |         return info | ||||||
|         item = video['channel']['item'] |  | ||||||
|         title = item['title'] |  | ||||||
|         description = item['description'] |  | ||||||
|         timestamp = parse_iso8601(item['dc-date']) |  | ||||||
|  |  | ||||||
|         media_group = item['media-group'] |  | ||||||
|         duration = None |  | ||||||
|         formats = [] |  | ||||||
|         for media in media_group['media-content']: |  | ||||||
|             attributes = media['@attributes'] |  | ||||||
|             video_url = attributes['url'] |  | ||||||
|             if video_url.endswith('.f4m'): |  | ||||||
|                 formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id)) |  | ||||||
|             elif video_url.endswith('.m3u8'): |  | ||||||
|                 formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv')) |  | ||||||
|             elif not video_url.endswith('.smil'): |  | ||||||
|                 duration = int_or_none(attributes.get('duration')) |  | ||||||
|                 formats.append({ |  | ||||||
|                     'url': video_url, |  | ||||||
|                     'format_id': media['media-category']['@attributes']['label'], |  | ||||||
|                     'preference': 1, |  | ||||||
|                     'vbr': int_or_none(attributes.get('bitrate')), |  | ||||||
|                     'filesize': int_or_none(attributes.get('fileSize')) |  | ||||||
|                 }) |  | ||||||
|         self._sort_formats(formats) |  | ||||||
|  |  | ||||||
|         media_thumbnail = media_group['media-thumbnail']['@attributes'] |  | ||||||
|         thumbnails = [{ |  | ||||||
|             'url': media_thumbnail['url'], |  | ||||||
|             'width': int_or_none(media_thumbnail.get('width')), |  | ||||||
|             'height': int_or_none(media_thumbnail.get('height')), |  | ||||||
|         }] if media_thumbnail else [] |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             'id': video_id, |  | ||||||
|             'title': title, |  | ||||||
|             'description': description, |  | ||||||
|             'duration': duration, |  | ||||||
|             'timestamp': timestamp, |  | ||||||
|             'formats': formats, |  | ||||||
|             'thumbnails': thumbnails, |  | ||||||
|         } |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 remitamine
					remitamine