mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 00:25:15 +00:00 
			
		
		
		
	[bbc] improve extraction
- extract f4m and dash formats - improve format sorting and listing - improve extraction of articles with `otherSettings.playlist`
This commit is contained in:
		@@ -229,51 +229,6 @@ class BBCCoUkIE(InfoExtractor):
 | 
				
			|||||||
        asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
 | 
					        asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
 | 
				
			||||||
        return [ref.get('href') for ref in asx.findall('./Entry/ref')]
 | 
					        return [ref.get('href') for ref in asx.findall('./Entry/ref')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _extract_connection(self, connection, programme_id):
 | 
					 | 
				
			||||||
        formats = []
 | 
					 | 
				
			||||||
        kind = connection.get('kind')
 | 
					 | 
				
			||||||
        protocol = connection.get('protocol')
 | 
					 | 
				
			||||||
        supplier = connection.get('supplier')
 | 
					 | 
				
			||||||
        if protocol == 'http':
 | 
					 | 
				
			||||||
            href = connection.get('href')
 | 
					 | 
				
			||||||
            transfer_format = connection.get('transferFormat')
 | 
					 | 
				
			||||||
            # ASX playlist
 | 
					 | 
				
			||||||
            if supplier == 'asx':
 | 
					 | 
				
			||||||
                for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
 | 
					 | 
				
			||||||
                    formats.append({
 | 
					 | 
				
			||||||
                        'url': ref,
 | 
					 | 
				
			||||||
                        'format_id': 'ref%s_%s' % (i, supplier),
 | 
					 | 
				
			||||||
                    })
 | 
					 | 
				
			||||||
            # Skip DASH until supported
 | 
					 | 
				
			||||||
            elif transfer_format == 'dash':
 | 
					 | 
				
			||||||
                pass
 | 
					 | 
				
			||||||
            elif transfer_format == 'hls':
 | 
					 | 
				
			||||||
                formats.extend(self._extract_m3u8_formats(
 | 
					 | 
				
			||||||
                    href, programme_id, ext='mp4', entry_protocol='m3u8_native',
 | 
					 | 
				
			||||||
                    m3u8_id=supplier, fatal=False))
 | 
					 | 
				
			||||||
            # Direct link
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                formats.append({
 | 
					 | 
				
			||||||
                    'url': href,
 | 
					 | 
				
			||||||
                    'format_id': supplier or kind or protocol,
 | 
					 | 
				
			||||||
                })
 | 
					 | 
				
			||||||
        elif protocol == 'rtmp':
 | 
					 | 
				
			||||||
            application = connection.get('application', 'ondemand')
 | 
					 | 
				
			||||||
            auth_string = connection.get('authString')
 | 
					 | 
				
			||||||
            identifier = connection.get('identifier')
 | 
					 | 
				
			||||||
            server = connection.get('server')
 | 
					 | 
				
			||||||
            formats.append({
 | 
					 | 
				
			||||||
                'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
 | 
					 | 
				
			||||||
                'play_path': identifier,
 | 
					 | 
				
			||||||
                'app': '%s?%s' % (application, auth_string),
 | 
					 | 
				
			||||||
                'page_url': 'http://www.bbc.co.uk',
 | 
					 | 
				
			||||||
                'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
 | 
					 | 
				
			||||||
                'rtmp_live': False,
 | 
					 | 
				
			||||||
                'ext': 'flv',
 | 
					 | 
				
			||||||
                'format_id': supplier,
 | 
					 | 
				
			||||||
            })
 | 
					 | 
				
			||||||
        return formats
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _extract_items(self, playlist):
 | 
					    def _extract_items(self, playlist):
 | 
				
			||||||
        return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
 | 
					        return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -294,46 +249,6 @@ class BBCCoUkIE(InfoExtractor):
 | 
				
			|||||||
    def _extract_connections(self, media):
 | 
					    def _extract_connections(self, media):
 | 
				
			||||||
        return self._findall_ns(media, './{%s}connection')
 | 
					        return self._findall_ns(media, './{%s}connection')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _extract_video(self, media, programme_id):
 | 
					 | 
				
			||||||
        formats = []
 | 
					 | 
				
			||||||
        vbr = int_or_none(media.get('bitrate'))
 | 
					 | 
				
			||||||
        vcodec = media.get('encoding')
 | 
					 | 
				
			||||||
        service = media.get('service')
 | 
					 | 
				
			||||||
        width = int_or_none(media.get('width'))
 | 
					 | 
				
			||||||
        height = int_or_none(media.get('height'))
 | 
					 | 
				
			||||||
        file_size = int_or_none(media.get('media_file_size'))
 | 
					 | 
				
			||||||
        for connection in self._extract_connections(media):
 | 
					 | 
				
			||||||
            conn_formats = self._extract_connection(connection, programme_id)
 | 
					 | 
				
			||||||
            for format in conn_formats:
 | 
					 | 
				
			||||||
                format.update({
 | 
					 | 
				
			||||||
                    'width': width,
 | 
					 | 
				
			||||||
                    'height': height,
 | 
					 | 
				
			||||||
                    'vbr': vbr,
 | 
					 | 
				
			||||||
                    'vcodec': vcodec,
 | 
					 | 
				
			||||||
                    'filesize': file_size,
 | 
					 | 
				
			||||||
                })
 | 
					 | 
				
			||||||
                if service:
 | 
					 | 
				
			||||||
                    format['format_id'] = '%s_%s' % (service, format['format_id'])
 | 
					 | 
				
			||||||
            formats.extend(conn_formats)
 | 
					 | 
				
			||||||
        return formats
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _extract_audio(self, media, programme_id):
 | 
					 | 
				
			||||||
        formats = []
 | 
					 | 
				
			||||||
        abr = int_or_none(media.get('bitrate'))
 | 
					 | 
				
			||||||
        acodec = media.get('encoding')
 | 
					 | 
				
			||||||
        service = media.get('service')
 | 
					 | 
				
			||||||
        for connection in self._extract_connections(media):
 | 
					 | 
				
			||||||
            conn_formats = self._extract_connection(connection, programme_id)
 | 
					 | 
				
			||||||
            for format in conn_formats:
 | 
					 | 
				
			||||||
                format.update({
 | 
					 | 
				
			||||||
                    'format_id': '%s_%s' % (service, format['format_id']),
 | 
					 | 
				
			||||||
                    'abr': abr,
 | 
					 | 
				
			||||||
                    'acodec': acodec,
 | 
					 | 
				
			||||||
                    'vcodec': 'none',
 | 
					 | 
				
			||||||
                })
 | 
					 | 
				
			||||||
            formats.extend(conn_formats)
 | 
					 | 
				
			||||||
        return formats
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _get_subtitles(self, media, programme_id):
 | 
					    def _get_subtitles(self, media, programme_id):
 | 
				
			||||||
        subtitles = {}
 | 
					        subtitles = {}
 | 
				
			||||||
        for connection in self._extract_connections(media):
 | 
					        for connection in self._extract_connections(media):
 | 
				
			||||||
@@ -382,10 +297,77 @@ class BBCCoUkIE(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        for media in self._extract_medias(media_selection):
 | 
					        for media in self._extract_medias(media_selection):
 | 
				
			||||||
            kind = media.get('kind')
 | 
					            kind = media.get('kind')
 | 
				
			||||||
            if kind == 'audio':
 | 
					            if kind in ('video', 'audio'):
 | 
				
			||||||
                formats.extend(self._extract_audio(media, programme_id))
 | 
					                bitrate = int_or_none(media.get('bitrate'))
 | 
				
			||||||
            elif kind == 'video':
 | 
					                encoding = media.get('encoding')
 | 
				
			||||||
                formats.extend(self._extract_video(media, programme_id))
 | 
					                service = media.get('service')
 | 
				
			||||||
 | 
					                width = int_or_none(media.get('width'))
 | 
				
			||||||
 | 
					                height = int_or_none(media.get('height'))
 | 
				
			||||||
 | 
					                file_size = int_or_none(media.get('media_file_size'))
 | 
				
			||||||
 | 
					                for connection in self._extract_connections(media):
 | 
				
			||||||
 | 
					                    conn_kind = connection.get('kind')
 | 
				
			||||||
 | 
					                    protocol = connection.get('protocol')
 | 
				
			||||||
 | 
					                    supplier = connection.get('supplier')
 | 
				
			||||||
 | 
					                    href = connection.get('href')
 | 
				
			||||||
 | 
					                    transfer_format = connection.get('transferFormat')
 | 
				
			||||||
 | 
					                    format_id = supplier or conn_kind or protocol
 | 
				
			||||||
 | 
					                    if service:
 | 
				
			||||||
 | 
					                        format_id = '%s_%s' % (service, format_id)
 | 
				
			||||||
 | 
					                    # ASX playlist
 | 
				
			||||||
 | 
					                    if supplier == 'asx':
 | 
				
			||||||
 | 
					                        for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
 | 
				
			||||||
 | 
					                            formats.append({
 | 
				
			||||||
 | 
					                                'url': ref,
 | 
				
			||||||
 | 
					                                'format_id': 'ref%s_%s' % (i, format_id),
 | 
				
			||||||
 | 
					                            })
 | 
				
			||||||
 | 
					                    elif transfer_format == 'dash':
 | 
				
			||||||
 | 
					                        formats.extend(self._extract_mpd_formats(
 | 
				
			||||||
 | 
					                            href, programme_id, mpd_id=format_id, fatal=False))
 | 
				
			||||||
 | 
					                    elif transfer_format == 'hls':
 | 
				
			||||||
 | 
					                        formats.extend(self._extract_m3u8_formats(
 | 
				
			||||||
 | 
					                            href, programme_id, ext='mp4', entry_protocol='m3u8_native',
 | 
				
			||||||
 | 
					                            m3u8_id=format_id, fatal=False))
 | 
				
			||||||
 | 
					                    elif transfer_format == 'hds':
 | 
				
			||||||
 | 
					                        formats.extend(self._extract_f4m_formats(
 | 
				
			||||||
 | 
					                            href, programme_id, f4m_id=format_id, fatal=False))
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        fmt = {
 | 
				
			||||||
 | 
					                            'format_id': format_id,
 | 
				
			||||||
 | 
					                            'filesize': file_size,
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        if kind == 'video':
 | 
				
			||||||
 | 
					                            fmt.update({
 | 
				
			||||||
 | 
					                                'width': width,
 | 
				
			||||||
 | 
					                                'height': height,
 | 
				
			||||||
 | 
					                                'vbr': bitrate,
 | 
				
			||||||
 | 
					                                'vcodec': encoding,
 | 
				
			||||||
 | 
					                            })
 | 
				
			||||||
 | 
					                        else:
 | 
				
			||||||
 | 
					                            fmt.update({
 | 
				
			||||||
 | 
					                                'abr': bitrate,
 | 
				
			||||||
 | 
					                                'acodec': encoding,
 | 
				
			||||||
 | 
					                                'vcodec': 'none',
 | 
				
			||||||
 | 
					                            })
 | 
				
			||||||
 | 
					                        if protocol == 'http':
 | 
				
			||||||
 | 
					                            # Direct link
 | 
				
			||||||
 | 
					                            fmt.update({
 | 
				
			||||||
 | 
					                                'url': href,
 | 
				
			||||||
 | 
					                            })
 | 
				
			||||||
 | 
					                        elif protocol == 'rtmp':
 | 
				
			||||||
 | 
					                            application = connection.get('application', 'ondemand')
 | 
				
			||||||
 | 
					                            auth_string = connection.get('authString')
 | 
				
			||||||
 | 
					                            identifier = connection.get('identifier')
 | 
				
			||||||
 | 
					                            server = connection.get('server')
 | 
				
			||||||
 | 
					                            fmt.update({
 | 
				
			||||||
 | 
					                                'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
 | 
				
			||||||
 | 
					                                'play_path': identifier,
 | 
				
			||||||
 | 
					                                'app': '%s?%s' % (application, auth_string),
 | 
				
			||||||
 | 
					                                'page_url': 'http://www.bbc.co.uk',
 | 
				
			||||||
 | 
					                                'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
 | 
				
			||||||
 | 
					                                'rtmp_live': False,
 | 
				
			||||||
 | 
					                                'ext': 'flv',
 | 
				
			||||||
 | 
					                            })
 | 
				
			||||||
 | 
					                        formats.append(fmt)
 | 
				
			||||||
            elif kind == 'captions':
 | 
					            elif kind == 'captions':
 | 
				
			||||||
                subtitles = self.extract_subtitles(media, programme_id)
 | 
					                subtitles = self.extract_subtitles(media, programme_id)
 | 
				
			||||||
        return formats, subtitles
 | 
					        return formats, subtitles
 | 
				
			||||||
@@ -820,13 +802,19 @@ class BBCIE(BBCCoUkIE):
 | 
				
			|||||||
                        # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
 | 
					                        # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
 | 
				
			||||||
                        playlist = data_playable.get('otherSettings', {}).get('playlist', {})
 | 
					                        playlist = data_playable.get('otherSettings', {}).get('playlist', {})
 | 
				
			||||||
                        if playlist:
 | 
					                        if playlist:
 | 
				
			||||||
                            for key in ('progressiveDownload', 'streaming'):
 | 
					                            entry = None
 | 
				
			||||||
 | 
					                            for key in ('streaming', 'progressiveDownload'):
 | 
				
			||||||
                                playlist_url = playlist.get('%sUrl' % key)
 | 
					                                playlist_url = playlist.get('%sUrl' % key)
 | 
				
			||||||
                                if not playlist_url:
 | 
					                                if not playlist_url:
 | 
				
			||||||
                                    continue
 | 
					                                    continue
 | 
				
			||||||
                                try:
 | 
					                                try:
 | 
				
			||||||
                                    entries.append(self._extract_from_playlist_sxml(
 | 
					                                    info = self._extract_from_playlist_sxml(
 | 
				
			||||||
                                        playlist_url, playlist_id, timestamp))
 | 
					                                        playlist_url, playlist_id, timestamp)
 | 
				
			||||||
 | 
					                                    if not entry:
 | 
				
			||||||
 | 
					                                        entry = info
 | 
				
			||||||
 | 
					                                    else:
 | 
				
			||||||
 | 
					                                        entry['title'] = info['title']
 | 
				
			||||||
 | 
					                                        entry['formats'].extend(info['formats'])
 | 
				
			||||||
                                except Exception as e:
 | 
					                                except Exception as e:
 | 
				
			||||||
                                    # Some playlist URL may fail with 500, at the same time
 | 
					                                    # Some playlist URL may fail with 500, at the same time
 | 
				
			||||||
                                    # the other one may work fine (e.g.
 | 
					                                    # the other one may work fine (e.g.
 | 
				
			||||||
@@ -834,6 +822,9 @@ class BBCIE(BBCCoUkIE):
 | 
				
			|||||||
                                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
 | 
					                                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
 | 
				
			||||||
                                        continue
 | 
					                                        continue
 | 
				
			||||||
                                    raise
 | 
					                                    raise
 | 
				
			||||||
 | 
					                            if entry:
 | 
				
			||||||
 | 
					                                self._sort_formats(entry['formats'])
 | 
				
			||||||
 | 
					                                entries.append(entry)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if entries:
 | 
					        if entries:
 | 
				
			||||||
            return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
 | 
					            return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user