mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[arte] Clean up format sorting mess
We now use our standard sorting facilities. As a side effect, it's finally possible to download German videos from French URLs and vice versa.
This commit is contained in:
		| @@ -8,10 +8,10 @@ from ..utils import ( | |||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     determine_ext, |  | ||||||
|     get_element_by_id, |     get_element_by_id, | ||||||
|     get_element_by_attribute, |     get_element_by_attribute, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     qualities, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| # There are different sources of video in arte.tv, the extraction process  | # There are different sources of video in arte.tv, the extraction process  | ||||||
| @@ -102,79 +102,54 @@ class ArteTVPlus7IE(InfoExtractor): | |||||||
|             'upload_date': unified_strdate(upload_date_str), |             'upload_date': unified_strdate(upload_date_str), | ||||||
|             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), |             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), | ||||||
|         } |         } | ||||||
|  |         qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) | ||||||
|  |  | ||||||
|         all_formats = [] |         formats = [] | ||||||
|         for format_id, format_dict in player_info['VSR'].items(): |         for format_id, format_dict in player_info['VSR'].items(): | ||||||
|             fmt = dict(format_dict) |             f = dict(format_dict) | ||||||
|             fmt['format_id'] = format_id |  | ||||||
|             all_formats.append(fmt) |  | ||||||
|         # Some formats use the m3u8 protocol |  | ||||||
|         all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) |  | ||||||
|         def _match_lang(f): |  | ||||||
|             if f.get('versionCode') is None: |  | ||||||
|                 return True |  | ||||||
|             # Return true if that format is in the language of the url |  | ||||||
|             if lang == 'fr': |  | ||||||
|                 l = 'F' |  | ||||||
|             elif lang == 'de': |  | ||||||
|                 l = 'A' |  | ||||||
|             else: |  | ||||||
|                 l = lang |  | ||||||
|             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] |  | ||||||
|             return any(re.match(r, f['versionCode']) for r in regexes) |  | ||||||
|         # Some formats may not be in the same language as the url |  | ||||||
|         # TODO: Might want not to drop videos that does not match requested language |  | ||||||
|         # but to process those formats with lower precedence |  | ||||||
|         formats = filter(_match_lang, all_formats) |  | ||||||
|         formats = list(formats)  # in python3 filter returns an iterator |  | ||||||
|         if not formats: |  | ||||||
|             # Some videos are only available in the 'Originalversion' |  | ||||||
|             # they aren't tagged as being in French or German |  | ||||||
|             # Sometimes there are neither videos of requested lang code |  | ||||||
|             # nor original version videos available |  | ||||||
|             # For such cases we just take all_formats as is |  | ||||||
|             formats = all_formats |  | ||||||
|             if not formats: |  | ||||||
|                 raise ExtractorError('The formats list is empty') |  | ||||||
|  |  | ||||||
|         if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: |  | ||||||
|             def sort_key(f): |  | ||||||
|                 return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) |  | ||||||
|         else: |  | ||||||
|             def sort_key(f): |  | ||||||
|             versionCode = f.get('versionCode') |             versionCode = f.get('versionCode') | ||||||
|                 if versionCode is None: |  | ||||||
|                     versionCode = '' |  | ||||||
|                 return ( |  | ||||||
|                     # Sort first by quality |  | ||||||
|                     int(f.get('height', -1)), |  | ||||||
|                     int(f.get('bitrate', -1)), |  | ||||||
|                     # The original version with subtitles has lower relevance |  | ||||||
|                     re.match(r'VO-ST(F|A)', versionCode) is None, |  | ||||||
|                     # The version with sourds/mal subtitles has also lower relevance |  | ||||||
|                     re.match(r'VO?(F|A)-STM\1', versionCode) is None, |  | ||||||
|                     # Prefer http downloads over m3u8 |  | ||||||
|                     0 if f['url'].endswith('m3u8') else 1, |  | ||||||
|                 ) |  | ||||||
|         formats = sorted(formats, key=sort_key) |  | ||||||
|         def _format(format_info): |  | ||||||
|             info = { |  | ||||||
|                 'format_id': format_info['format_id'], |  | ||||||
|                 'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')), |  | ||||||
|                 'width': int_or_none(format_info.get('width')), |  | ||||||
|                 'height': int_or_none(format_info.get('height')), |  | ||||||
|                 'tbr': int_or_none(format_info.get('bitrate')), |  | ||||||
|             } |  | ||||||
|             if format_info['mediaType'] == 'rtmp': |  | ||||||
|                 info['url'] = format_info['streamer'] |  | ||||||
|                 info['play_path'] = 'mp4:' + format_info['url'] |  | ||||||
|                 info['ext'] = 'flv' |  | ||||||
|             else: |  | ||||||
|                 info['url'] = format_info['url'] |  | ||||||
|                 info['ext'] = determine_ext(info['url']) |  | ||||||
|             return info |  | ||||||
|         info_dict['formats'] = [_format(f) for f in formats] |  | ||||||
|  |  | ||||||
|  |             langcode = { | ||||||
|  |                 'fr': 'F', | ||||||
|  |                 'de': 'A', | ||||||
|  |             }.get(lang, lang) | ||||||
|  |             lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode] | ||||||
|  |             lang_pref = ( | ||||||
|  |                 None if versionCode is None else ( | ||||||
|  |                     10 if any(re.match(r, versionCode) for r in lang_rexs) | ||||||
|  |                     else -10)) | ||||||
|  |             source_pref = 0 | ||||||
|  |             if versionCode is not None: | ||||||
|  |                 # The original version with subtitles has lower relevance | ||||||
|  |                 if re.match(r'VO-ST(F|A)', versionCode): | ||||||
|  |                     source_pref -= 10 | ||||||
|  |                 # The version with sourds/mal subtitles has also lower relevance | ||||||
|  |                 elif re.match(r'VO?(F|A)-STM\1', versionCode): | ||||||
|  |                     source_pref -= 9 | ||||||
|  |             format = { | ||||||
|  |                 'format_id': format_id, | ||||||
|  |                 'preference': -10 if f.get('videoFormat') == 'M3U8' else None, | ||||||
|  |                 'language_preference': lang_pref, | ||||||
|  |                 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')), | ||||||
|  |                 'width': int_or_none(f.get('width')), | ||||||
|  |                 'height': int_or_none(f.get('height')), | ||||||
|  |                 'tbr': int_or_none(f.get('bitrate')), | ||||||
|  |                 'quality': qfunc(f['quality']), | ||||||
|  |                 'source_preference': source_pref, | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             if f.get('mediaType') == 'rtmp': | ||||||
|  |                 format['url'] = f['streamer'] | ||||||
|  |                 format['play_path'] = 'mp4:' + f['url'] | ||||||
|  |                 format['ext'] = 'flv' | ||||||
|  |             else: | ||||||
|  |                 format['url'] = f['url'] | ||||||
|  |  | ||||||
|  |             formats.append(format) | ||||||
|  |  | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         info_dict['formats'] = formats | ||||||
|         return info_dict |         return info_dict | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -87,6 +87,11 @@ class InfoExtractor(object): | |||||||
|                                  by this field, regardless of all other values. |                                  by this field, regardless of all other values. | ||||||
|                                  -1 for default (order by other properties), |                                  -1 for default (order by other properties), | ||||||
|                                  -2 or smaller for less than default. |                                  -2 or smaller for less than default. | ||||||
|  |                     * language_preference  Is this in the correct requested | ||||||
|  |                                  language? | ||||||
|  |                                  10 if it's what the URL is about, | ||||||
|  |                                  -1 for default (don't know), | ||||||
|  |                                  -10 otherwise, other values reserved for now. | ||||||
|                     * quality    Order number of the video quality of this |                     * quality    Order number of the video quality of this | ||||||
|                                  format, irrespective of the file format. |                                  format, irrespective of the file format. | ||||||
|                                  -1 for default (order by other properties), |                                  -1 for default (order by other properties), | ||||||
| @@ -615,6 +620,7 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|             return ( |             return ( | ||||||
|                 preference, |                 preference, | ||||||
|  |                 f.get('language_preference') if f.get('language_preference') is not None else -1, | ||||||
|                 f.get('quality') if f.get('quality') is not None else -1, |                 f.get('quality') if f.get('quality') is not None else -1, | ||||||
|                 f.get('height') if f.get('height') is not None else -1, |                 f.get('height') if f.get('height') is not None else -1, | ||||||
|                 f.get('width') if f.get('width') is not None else -1, |                 f.get('width') if f.get('width') is not None else -1, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister