mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[utils] add support for ttml styles
This commit is contained in:
		| @@ -1069,6 +1069,47 @@ The first line | |||||||
| ''' | ''' | ||||||
|         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) |         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) | ||||||
|  |  | ||||||
|  |         dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?> | ||||||
|  | <tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata"> | ||||||
|  |   <head> | ||||||
|  |     <styling> | ||||||
|  |       <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" /> | ||||||
|  |       <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" /> | ||||||
|  |       <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" /> | ||||||
|  |       <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" /> | ||||||
|  |     </styling> | ||||||
|  |   </head> | ||||||
|  |   <body tts:textAlign="center" style="s0"> | ||||||
|  |     <div> | ||||||
|  |       <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p> | ||||||
|  |       <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p> | ||||||
|  |       <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p> | ||||||
|  |       <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p> | ||||||
|  |     </div> | ||||||
|  |   </body> | ||||||
|  | </tt>''' | ||||||
|  |         srt_data = '''1 | ||||||
|  | 00:00:02,080 --> 00:00:05,839 | ||||||
|  | <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> | ||||||
|  |  | ||||||
|  | 2 | ||||||
|  | 00:00:02,080 --> 00:00:05,839 | ||||||
|  | <b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 | ||||||
|  | </font>part 2</font></b> | ||||||
|  |  | ||||||
|  | 3 | ||||||
|  | 00:00:05,839 --> 00:00:09,560 | ||||||
|  | <u><font color="lime">line 3 | ||||||
|  | part 3</font></u> | ||||||
|  |  | ||||||
|  | 4 | ||||||
|  | 00:00:09,560 --> 00:00:12,359 | ||||||
|  | <i><u><font color="yellow"><font color="lime">inner | ||||||
|  |  </font>style</font></u></i> | ||||||
|  |  | ||||||
|  | ''' | ||||||
|  |         self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data) | ||||||
|  |  | ||||||
|     def test_cli_option(self): |     def test_cli_option(self): | ||||||
|         self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) |         self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) | ||||||
|         self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) |         self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) | ||||||
|   | |||||||
| @@ -2511,27 +2511,97 @@ def srt_subtitles_timecode(seconds): | |||||||
|  |  | ||||||
|  |  | ||||||
| def dfxp2srt(dfxp_data): | def dfxp2srt(dfxp_data): | ||||||
|  |     LEGACY_NAMESPACES = ( | ||||||
|  |         ('http://www.w3.org/ns/ttml', [ | ||||||
|  |             'http://www.w3.org/2004/11/ttaf1', | ||||||
|  |             'http://www.w3.org/2006/04/ttaf1', | ||||||
|  |             'http://www.w3.org/2006/10/ttaf1', | ||||||
|  |         ]), | ||||||
|  |         ('http://www.w3.org/ns/ttml#styling', [ | ||||||
|  |             'http://www.w3.org/ns/ttml#style', | ||||||
|  |         ]), | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     SUPPORTED_STYLING = [ | ||||||
|  |         'color', | ||||||
|  |         'fontFamily', | ||||||
|  |         'fontSize', | ||||||
|  |         'fontStyle', | ||||||
|  |         'fontWeight', | ||||||
|  |         'textDecoration' | ||||||
|  |     ] | ||||||
|  |  | ||||||
|     _x = functools.partial(xpath_with_ns, ns_map={ |     _x = functools.partial(xpath_with_ns, ns_map={ | ||||||
|         'ttml': 'http://www.w3.org/ns/ttml', |         'ttml': 'http://www.w3.org/ns/ttml', | ||||||
|         'ttaf1': 'http://www.w3.org/2006/10/ttaf1', |         'tts': 'http://www.w3.org/ns/ttml#styling', | ||||||
|         'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', |  | ||||||
|     }) |     }) | ||||||
|  |  | ||||||
|  |     styles = {} | ||||||
|  |     default_style = {} | ||||||
|  |  | ||||||
|     class TTMLPElementParser(object): |     class TTMLPElementParser(object): | ||||||
|         out = '' |         _out = '' | ||||||
|  |         _unclosed_elements = [] | ||||||
|  |         _applied_styles = [] | ||||||
|  |  | ||||||
|         def start(self, tag, attrib): |         def start(self, tag, attrib): | ||||||
|             if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): |             if tag in (_x('ttml:br'), 'br'): | ||||||
|                 self.out += '\n' |                 self._out += '\n' | ||||||
|  |             else: | ||||||
|  |                 unclosed_elements = [] | ||||||
|  |                 style = {} | ||||||
|  |                 element_style_id = attrib.get('style') | ||||||
|  |                 if default_style: | ||||||
|  |                     style.update(default_style) | ||||||
|  |                 if element_style_id: | ||||||
|  |                     style.update(styles.get(element_style_id, {})) | ||||||
|  |                 for prop in SUPPORTED_STYLING: | ||||||
|  |                     prop_val = attrib.get(_x('tts:' + prop)) | ||||||
|  |                     if prop_val: | ||||||
|  |                         style[prop] = prop_val | ||||||
|  |                 if style: | ||||||
|  |                     font = '' | ||||||
|  |                     for k, v in sorted(style.items()): | ||||||
|  |                         if self._applied_styles and self._applied_styles[-1].get(k) == v: | ||||||
|  |                             continue | ||||||
|  |                         if k == 'color': | ||||||
|  |                             font += ' color="%s"' % v | ||||||
|  |                         elif k == 'fontSize': | ||||||
|  |                             font += ' size="%s"' % v | ||||||
|  |                         elif k == 'fontFamily': | ||||||
|  |                             font += ' face="%s"' % v | ||||||
|  |                         elif k == 'fontWeight' and v == 'bold': | ||||||
|  |                             self._out += '<b>' | ||||||
|  |                             unclosed_elements.append('b') | ||||||
|  |                         elif k == 'fontStyle' and v == 'italic': | ||||||
|  |                             self._out += '<i>' | ||||||
|  |                             unclosed_elements.append('i') | ||||||
|  |                         elif k == 'textDecoration' and v == 'underline': | ||||||
|  |                             self._out += '<u>' | ||||||
|  |                             unclosed_elements.append('u') | ||||||
|  |                     if font: | ||||||
|  |                         self._out += '<font' + font + '>' | ||||||
|  |                         unclosed_elements.append('font') | ||||||
|  |                     applied_style = {} | ||||||
|  |                     if self._applied_styles: | ||||||
|  |                         applied_style.update(self._applied_styles[-1]) | ||||||
|  |                     applied_style.update(style) | ||||||
|  |                     self._applied_styles.append(applied_style) | ||||||
|  |                 self._unclosed_elements.append(unclosed_elements) | ||||||
|  |  | ||||||
|         def end(self, tag): |         def end(self, tag): | ||||||
|             pass |             if tag not in (_x('ttml:br'), 'br'): | ||||||
|  |                 unclosed_elements = self._unclosed_elements.pop() | ||||||
|  |                 for element in reversed(unclosed_elements): | ||||||
|  |                     self._out += '</%s>' % element | ||||||
|  |                 if unclosed_elements and self._applied_styles: | ||||||
|  |                     self._applied_styles.pop() | ||||||
|  |  | ||||||
|         def data(self, data): |         def data(self, data): | ||||||
|             self.out += data |             self._out += data | ||||||
|  |  | ||||||
|         def close(self): |         def close(self): | ||||||
|             return self.out.strip() |             return self._out.strip() | ||||||
|  |  | ||||||
|     def parse_node(node): |     def parse_node(node): | ||||||
|         target = TTMLPElementParser() |         target = TTMLPElementParser() | ||||||
| @@ -2539,13 +2609,45 @@ def dfxp2srt(dfxp_data): | |||||||
|         parser.feed(xml.etree.ElementTree.tostring(node)) |         parser.feed(xml.etree.ElementTree.tostring(node)) | ||||||
|         return parser.close() |         return parser.close() | ||||||
|  |  | ||||||
|  |     for k, v in LEGACY_NAMESPACES: | ||||||
|  |         for ns in v: | ||||||
|  |             dfxp_data = dfxp_data.replace(ns, k) | ||||||
|  |  | ||||||
|     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) |     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) | ||||||
|     out = [] |     out = [] | ||||||
|     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p') |     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') | ||||||
|  |  | ||||||
|     if not paras: |     if not paras: | ||||||
|         raise ValueError('Invalid dfxp/TTML subtitle') |         raise ValueError('Invalid dfxp/TTML subtitle') | ||||||
|  |  | ||||||
|  |     repeat = False | ||||||
|  |     while True: | ||||||
|  |         for style in dfxp.findall(_x('.//ttml:style')): | ||||||
|  |             style_id = style.get('id') | ||||||
|  |             parent_style_id = style.get('style') | ||||||
|  |             if parent_style_id: | ||||||
|  |                 if parent_style_id not in styles: | ||||||
|  |                     repeat = True | ||||||
|  |                     continue | ||||||
|  |                 styles[style_id] = styles[parent_style_id].copy() | ||||||
|  |             for prop in SUPPORTED_STYLING: | ||||||
|  |                 prop_val = style.get(_x('tts:' + prop)) | ||||||
|  |                 if prop_val: | ||||||
|  |                     styles.setdefault(style_id, {})[prop] = prop_val | ||||||
|  |         if repeat: | ||||||
|  |             repeat = False | ||||||
|  |         else: | ||||||
|  |             break | ||||||
|  |  | ||||||
|  |     for p in ('body', 'div'): | ||||||
|  |         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p]) | ||||||
|  |         if ele is None: | ||||||
|  |             continue | ||||||
|  |         style = styles.get(ele.get('style')) | ||||||
|  |         if not style: | ||||||
|  |             continue | ||||||
|  |         default_style.update(style) | ||||||
|  |  | ||||||
|     for para, index in zip(paras, itertools.count(1)): |     for para, index in zip(paras, itertools.count(1)): | ||||||
|         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) |         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) | ||||||
|         end_time = parse_dfxp_time_expr(para.attrib.get('end')) |         end_time = parse_dfxp_time_expr(para.attrib.get('end')) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine