mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[arte] Clean up format sorting mess
We now use our standard sorting facilities. As a side effect, it's finally possible to download German videos from French URLs and vice versa.
This commit is contained in:
		| @@ -8,10 +8,10 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     find_xpath_attr, | ||||
|     unified_strdate, | ||||
|     determine_ext, | ||||
|     get_element_by_id, | ||||
|     get_element_by_attribute, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
| ) | ||||
|  | ||||
| # There are different sources of video in arte.tv, the extraction process  | ||||
| @@ -102,79 +102,54 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|             'upload_date': unified_strdate(upload_date_str), | ||||
|             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), | ||||
|         } | ||||
|         qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) | ||||
|  | ||||
|         all_formats = [] | ||||
|         formats = [] | ||||
|         for format_id, format_dict in player_info['VSR'].items(): | ||||
|             fmt = dict(format_dict) | ||||
|             fmt['format_id'] = format_id | ||||
|             all_formats.append(fmt) | ||||
|         # Some formats use the m3u8 protocol | ||||
|         all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) | ||||
|         def _match_lang(f): | ||||
|             if f.get('versionCode') is None: | ||||
|                 return True | ||||
|             # Return true if that format is in the language of the url | ||||
|             if lang == 'fr': | ||||
|                 l = 'F' | ||||
|             elif lang == 'de': | ||||
|                 l = 'A' | ||||
|             else: | ||||
|                 l = lang | ||||
|             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] | ||||
|             return any(re.match(r, f['versionCode']) for r in regexes) | ||||
|         # Some formats may not be in the same language as the url | ||||
|         # TODO: Might want not to drop videos that does not match requested language | ||||
|         # but to process those formats with lower precedence | ||||
|         formats = filter(_match_lang, all_formats) | ||||
|         formats = list(formats)  # in python3 filter returns an iterator | ||||
|         if not formats: | ||||
|             # Some videos are only available in the 'Originalversion' | ||||
|             # they aren't tagged as being in French or German | ||||
|             # Sometimes there are neither videos of requested lang code | ||||
|             # nor original version videos available | ||||
|             # For such cases we just take all_formats as is | ||||
|             formats = all_formats | ||||
|             if not formats: | ||||
|                 raise ExtractorError('The formats list is empty') | ||||
|             f = dict(format_dict) | ||||
|             versionCode = f.get('versionCode') | ||||
|  | ||||
|         if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: | ||||
|             def sort_key(f): | ||||
|                 return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) | ||||
|         else: | ||||
|             def sort_key(f): | ||||
|                 versionCode = f.get('versionCode') | ||||
|                 if versionCode is None: | ||||
|                     versionCode = '' | ||||
|                 return ( | ||||
|                     # Sort first by quality | ||||
|                     int(f.get('height', -1)), | ||||
|                     int(f.get('bitrate', -1)), | ||||
|                     # The original version with subtitles has lower relevance | ||||
|                     re.match(r'VO-ST(F|A)', versionCode) is None, | ||||
|                     # The version with sourds/mal subtitles has also lower relevance | ||||
|                     re.match(r'VO?(F|A)-STM\1', versionCode) is None, | ||||
|                     # Prefer http downloads over m3u8 | ||||
|                     0 if f['url'].endswith('m3u8') else 1, | ||||
|                 ) | ||||
|         formats = sorted(formats, key=sort_key) | ||||
|         def _format(format_info): | ||||
|             info = { | ||||
|                 'format_id': format_info['format_id'], | ||||
|                 'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')), | ||||
|                 'width': int_or_none(format_info.get('width')), | ||||
|                 'height': int_or_none(format_info.get('height')), | ||||
|                 'tbr': int_or_none(format_info.get('bitrate')), | ||||
|             langcode = { | ||||
|                 'fr': 'F', | ||||
|                 'de': 'A', | ||||
|             }.get(lang, lang) | ||||
|             lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode] | ||||
|             lang_pref = ( | ||||
|                 None if versionCode is None else ( | ||||
|                     10 if any(re.match(r, versionCode) for r in lang_rexs) | ||||
|                     else -10)) | ||||
|             source_pref = 0 | ||||
|             if versionCode is not None: | ||||
|                 # The original version with subtitles has lower relevance | ||||
|                 if re.match(r'VO-ST(F|A)', versionCode): | ||||
|                     source_pref -= 10 | ||||
|                 # The version with sourds/mal subtitles has also lower relevance | ||||
|                 elif re.match(r'VO?(F|A)-STM\1', versionCode): | ||||
|                     source_pref -= 9 | ||||
|             format = { | ||||
|                 'format_id': format_id, | ||||
|                 'preference': -10 if f.get('videoFormat') == 'M3U8' else None, | ||||
|                 'language_preference': lang_pref, | ||||
|                 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')), | ||||
|                 'width': int_or_none(f.get('width')), | ||||
|                 'height': int_or_none(f.get('height')), | ||||
|                 'tbr': int_or_none(f.get('bitrate')), | ||||
|                 'quality': qfunc(f['quality']), | ||||
|                 'source_preference': source_pref, | ||||
|             } | ||||
|             if format_info['mediaType'] == 'rtmp': | ||||
|                 info['url'] = format_info['streamer'] | ||||
|                 info['play_path'] = 'mp4:' + format_info['url'] | ||||
|                 info['ext'] = 'flv' | ||||
|             else: | ||||
|                 info['url'] = format_info['url'] | ||||
|                 info['ext'] = determine_ext(info['url']) | ||||
|             return info | ||||
|         info_dict['formats'] = [_format(f) for f in formats] | ||||
|  | ||||
|             if f.get('mediaType') == 'rtmp': | ||||
|                 format['url'] = f['streamer'] | ||||
|                 format['play_path'] = 'mp4:' + f['url'] | ||||
|                 format['ext'] = 'flv' | ||||
|             else: | ||||
|                 format['url'] = f['url'] | ||||
|  | ||||
|             formats.append(format) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info_dict['formats'] = formats | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -87,6 +87,11 @@ class InfoExtractor(object): | ||||
|                                  by this field, regardless of all other values. | ||||
|                                  -1 for default (order by other properties), | ||||
|                                  -2 or smaller for less than default. | ||||
|                     * language_preference  Is this in the correct requested | ||||
|                                  language? | ||||
|                                  10 if it's what the URL is about, | ||||
|                                  -1 for default (don't know), | ||||
|                                  -10 otherwise, other values reserved for now. | ||||
|                     * quality    Order number of the video quality of this | ||||
|                                  format, irrespective of the file format. | ||||
|                                  -1 for default (order by other properties), | ||||
| @@ -615,6 +620,7 @@ class InfoExtractor(object): | ||||
|  | ||||
|             return ( | ||||
|                 preference, | ||||
|                 f.get('language_preference') if f.get('language_preference') is not None else -1, | ||||
|                 f.get('quality') if f.get('quality') is not None else -1, | ||||
|                 f.get('height') if f.get('height') is not None else -1, | ||||
|                 f.get('width') if f.get('width') is not None else -1, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister