mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[extractor/generic] Add fragment_query extractor arg for DASH and HLS (#5528)
				
					
				
			* `fragment_query`: passthrough any query in generic mpd/m3u8 manifest URLs to their fragments * Add support for `extra_param_to_segment_url` to DASH downloader Authored by: bashonly, pukkandan
This commit is contained in:
		| @@ -1736,6 +1736,9 @@ The following extractors use this feature: | |||||||
| * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) | * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) | ||||||
| * `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off | * `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off | ||||||
| 
 | 
 | ||||||
|  | #### generic | ||||||
|  | * `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg | ||||||
|  | 
 | ||||||
| #### funimation | #### funimation | ||||||
| * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` | * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` | ||||||
| * `version`: The video version to extract - `uncut` or `simulcast` | * `version`: The video version to extract - `uncut` or `simulcast` | ||||||
|   | |||||||
| @@ -1,8 +1,9 @@ | |||||||
| import time | import time | ||||||
|  | import urllib.parse | ||||||
| 
 | 
 | ||||||
| from . import get_suitable_downloader | from . import get_suitable_downloader | ||||||
| from .fragment import FragmentFD | from .fragment import FragmentFD | ||||||
| from ..utils import urljoin | from ..utils import update_url_query, urljoin | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class DashSegmentsFD(FragmentFD): | class DashSegmentsFD(FragmentFD): | ||||||
| @@ -40,7 +41,12 @@ class DashSegmentsFD(FragmentFD): | |||||||
|                 self._prepare_and_start_frag_download(ctx, fmt) |                 self._prepare_and_start_frag_download(ctx, fmt) | ||||||
|             ctx['start'] = real_start |             ctx['start'] = real_start | ||||||
| 
 | 
 | ||||||
|             fragments_to_download = self._get_fragments(fmt, ctx) |             extra_query = None | ||||||
|  |             extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') | ||||||
|  |             if extra_param_to_segment_url: | ||||||
|  |                 extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) | ||||||
|  | 
 | ||||||
|  |             fragments_to_download = self._get_fragments(fmt, ctx, extra_query) | ||||||
| 
 | 
 | ||||||
|             if real_downloader: |             if real_downloader: | ||||||
|                 self.to_screen( |                 self.to_screen( | ||||||
| @@ -57,7 +63,7 @@ class DashSegmentsFD(FragmentFD): | |||||||
|         fragments = fragments(ctx) if callable(fragments) else fragments |         fragments = fragments(ctx) if callable(fragments) else fragments | ||||||
|         return [next(iter(fragments))] if self.params.get('test') else fragments |         return [next(iter(fragments))] if self.params.get('test') else fragments | ||||||
| 
 | 
 | ||||||
|     def _get_fragments(self, fmt, ctx): |     def _get_fragments(self, fmt, ctx, extra_query): | ||||||
|         fragment_base_url = fmt.get('fragment_base_url') |         fragment_base_url = fmt.get('fragment_base_url') | ||||||
|         fragments = self._resolve_fragments(fmt['fragments'], ctx) |         fragments = self._resolve_fragments(fmt['fragments'], ctx) | ||||||
| 
 | 
 | ||||||
| @@ -70,6 +76,8 @@ class DashSegmentsFD(FragmentFD): | |||||||
|             if not fragment_url: |             if not fragment_url: | ||||||
|                 assert fragment_base_url |                 assert fragment_base_url | ||||||
|                 fragment_url = urljoin(fragment_base_url, fragment['path']) |                 fragment_url = urljoin(fragment_base_url, fragment['path']) | ||||||
|  |             if extra_query: | ||||||
|  |                 fragment_url = update_url_query(fragment_url, extra_query) | ||||||
| 
 | 
 | ||||||
|             yield { |             yield { | ||||||
|                 'frag_index': frag_index, |                 'frag_index': frag_index, | ||||||
|   | |||||||
| @@ -2189,6 +2189,13 @@ class GenericIE(InfoExtractor): | |||||||
| 
 | 
 | ||||||
|         self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}') |         self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}') | ||||||
| 
 | 
 | ||||||
|  |     def _fragment_query(self, url): | ||||||
|  |         if self._configuration_arg('fragment_query'): | ||||||
|  |             query_string = urllib.parse.urlparse(url).query | ||||||
|  |             if query_string: | ||||||
|  |                 return {'extra_param_to_segment_url': query_string} | ||||||
|  |         return {} | ||||||
|  | 
 | ||||||
|     def _extract_rss(self, url, video_id, doc): |     def _extract_rss(self, url, video_id, doc): | ||||||
|         NS_MAP = { |         NS_MAP = { | ||||||
|             'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', |             'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', | ||||||
| @@ -2351,8 +2358,10 @@ class GenericIE(InfoExtractor): | |||||||
|             subtitles = {} |             subtitles = {} | ||||||
|             if format_id.endswith('mpegurl'): |             if format_id.endswith('mpegurl'): | ||||||
|                 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) |                 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) | ||||||
|  |                 info_dict.update(self._fragment_query(url)) | ||||||
|             elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): |             elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): | ||||||
|                 formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) |                 formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) | ||||||
|  |                 info_dict.update(self._fragment_query(url)) | ||||||
|             elif format_id == 'f4m': |             elif format_id == 'f4m': | ||||||
|                 formats = self._extract_f4m_formats(url, video_id, headers=headers) |                 formats = self._extract_f4m_formats(url, video_id, headers=headers) | ||||||
|             else: |             else: | ||||||
| @@ -2379,6 +2388,7 @@ class GenericIE(InfoExtractor): | |||||||
|         if first_bytes.startswith(b'#EXTM3U'): |         if first_bytes.startswith(b'#EXTM3U'): | ||||||
|             self.report_detected('M3U playlist') |             self.report_detected('M3U playlist') | ||||||
|             info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') |             info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') | ||||||
|  |             info_dict.update(self._fragment_query(url)) | ||||||
|             return info_dict |             return info_dict | ||||||
| 
 | 
 | ||||||
|         # Maybe it's a direct link to a video? |         # Maybe it's a direct link to a video? | ||||||
| @@ -2429,6 +2439,7 @@ class GenericIE(InfoExtractor): | |||||||
|                     doc, |                     doc, | ||||||
|                     mpd_base_url=full_response.geturl().rpartition('/')[0], |                     mpd_base_url=full_response.geturl().rpartition('/')[0], | ||||||
|                     mpd_url=url) |                     mpd_url=url) | ||||||
|  |                 info_dict.update(self._fragment_query(url)) | ||||||
|                 self.report_detected('DASH manifest') |                 self.report_detected('DASH manifest') | ||||||
|                 return info_dict |                 return info_dict | ||||||
|             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): |             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): | ||||||
| @@ -2541,7 +2552,10 @@ class GenericIE(InfoExtractor): | |||||||
|                         m3u8_id='hls', fatal=False) |                         m3u8_id='hls', fatal=False) | ||||||
|                     formats.extend(fmts) |                     formats.extend(fmts) | ||||||
|                     self._merge_subtitles(subs, target=subtitles) |                     self._merge_subtitles(subs, target=subtitles) | ||||||
|                 else: |                 for fmt in formats: | ||||||
|  |                     fmt.update(self._fragment_query(src)) | ||||||
|  | 
 | ||||||
|  |                 if not formats: | ||||||
|                     formats.append({ |                     formats.append({ | ||||||
|                         'url': src, |                         'url': src, | ||||||
|                         'ext': (mimetype2ext(src_type) |                         'ext': (mimetype2ext(src_type) | ||||||
| @@ -2776,8 +2790,10 @@ class GenericIE(InfoExtractor): | |||||||
|                 return [self._extract_xspf_playlist(video_url, video_id)] |                 return [self._extract_xspf_playlist(video_url, video_id)] | ||||||
|             elif ext == 'm3u8': |             elif ext == 'm3u8': | ||||||
|                 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers) |                 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers) | ||||||
|  |                 entry_info_dict.update(self._fragment_query(video_url)) | ||||||
|             elif ext == 'mpd': |             elif ext == 'mpd': | ||||||
|                 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers) |                 entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers) | ||||||
|  |                 entry_info_dict.update(self._fragment_query(video_url)) | ||||||
|             elif ext == 'f4m': |             elif ext == 'f4m': | ||||||
|                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers) |                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers) | ||||||
|             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: |             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 bashonly
					bashonly