mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[utils] add support for ttml styles
This commit is contained in:
		| @@ -1069,6 +1069,47 @@ The first line | ||||
| ''' | ||||
|         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) | ||||
|  | ||||
|         dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?> | ||||
| <tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata"> | ||||
|   <head> | ||||
|     <styling> | ||||
|       <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" /> | ||||
|       <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" /> | ||||
|       <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" /> | ||||
|       <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" /> | ||||
|     </styling> | ||||
|   </head> | ||||
|   <body tts:textAlign="center" style="s0"> | ||||
|     <div> | ||||
|       <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p> | ||||
|       <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p> | ||||
|       <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p> | ||||
|       <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p> | ||||
|     </div> | ||||
|   </body> | ||||
| </tt>''' | ||||
|         srt_data = '''1 | ||||
| 00:00:02,080 --> 00:00:05,839 | ||||
| <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> | ||||
|  | ||||
| 2 | ||||
| 00:00:02,080 --> 00:00:05,839 | ||||
| <b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 | ||||
| </font>part 2</font></b> | ||||
|  | ||||
| 3 | ||||
| 00:00:05,839 --> 00:00:09,560 | ||||
| <u><font color="lime">line 3 | ||||
| part 3</font></u> | ||||
|  | ||||
| 4 | ||||
| 00:00:09,560 --> 00:00:12,359 | ||||
| <i><u><font color="yellow"><font color="lime">inner | ||||
|  </font>style</font></u></i> | ||||
|  | ||||
| ''' | ||||
|         self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data) | ||||
|  | ||||
|     def test_cli_option(self): | ||||
|         self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) | ||||
|         self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) | ||||
|   | ||||
| @@ -2511,27 +2511,97 @@ def srt_subtitles_timecode(seconds): | ||||
|  | ||||
|  | ||||
| def dfxp2srt(dfxp_data): | ||||
|     LEGACY_NAMESPACES = ( | ||||
|         ('http://www.w3.org/ns/ttml', [ | ||||
|             'http://www.w3.org/2004/11/ttaf1', | ||||
|             'http://www.w3.org/2006/04/ttaf1', | ||||
|             'http://www.w3.org/2006/10/ttaf1', | ||||
|         ]), | ||||
|         ('http://www.w3.org/ns/ttml#styling', [ | ||||
|             'http://www.w3.org/ns/ttml#style', | ||||
|         ]), | ||||
|     ) | ||||
|  | ||||
|     SUPPORTED_STYLING = [ | ||||
|         'color', | ||||
|         'fontFamily', | ||||
|         'fontSize', | ||||
|         'fontStyle', | ||||
|         'fontWeight', | ||||
|         'textDecoration' | ||||
|     ] | ||||
|  | ||||
|     _x = functools.partial(xpath_with_ns, ns_map={ | ||||
|         'ttml': 'http://www.w3.org/ns/ttml', | ||||
|         'ttaf1': 'http://www.w3.org/2006/10/ttaf1', | ||||
|         'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', | ||||
|         'tts': 'http://www.w3.org/ns/ttml#styling', | ||||
|     }) | ||||
|  | ||||
|     styles = {} | ||||
|     default_style = {} | ||||
|  | ||||
|     class TTMLPElementParser(object): | ||||
|         out = '' | ||||
|         _out = '' | ||||
|         _unclosed_elements = [] | ||||
|         _applied_styles = [] | ||||
|  | ||||
|         def start(self, tag, attrib): | ||||
|             if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): | ||||
|                 self.out += '\n' | ||||
|             if tag in (_x('ttml:br'), 'br'): | ||||
|                 self._out += '\n' | ||||
|             else: | ||||
|                 unclosed_elements = [] | ||||
|                 style = {} | ||||
|                 element_style_id = attrib.get('style') | ||||
|                 if default_style: | ||||
|                     style.update(default_style) | ||||
|                 if element_style_id: | ||||
|                     style.update(styles.get(element_style_id, {})) | ||||
|                 for prop in SUPPORTED_STYLING: | ||||
|                     prop_val = attrib.get(_x('tts:' + prop)) | ||||
|                     if prop_val: | ||||
|                         style[prop] = prop_val | ||||
|                 if style: | ||||
|                     font = '' | ||||
|                     for k, v in sorted(style.items()): | ||||
|                         if self._applied_styles and self._applied_styles[-1].get(k) == v: | ||||
|                             continue | ||||
|                         if k == 'color': | ||||
|                             font += ' color="%s"' % v | ||||
|                         elif k == 'fontSize': | ||||
|                             font += ' size="%s"' % v | ||||
|                         elif k == 'fontFamily': | ||||
|                             font += ' face="%s"' % v | ||||
|                         elif k == 'fontWeight' and v == 'bold': | ||||
|                             self._out += '<b>' | ||||
|                             unclosed_elements.append('b') | ||||
|                         elif k == 'fontStyle' and v == 'italic': | ||||
|                             self._out += '<i>' | ||||
|                             unclosed_elements.append('i') | ||||
|                         elif k == 'textDecoration' and v == 'underline': | ||||
|                             self._out += '<u>' | ||||
|                             unclosed_elements.append('u') | ||||
|                     if font: | ||||
|                         self._out += '<font' + font + '>' | ||||
|                         unclosed_elements.append('font') | ||||
|                     applied_style = {} | ||||
|                     if self._applied_styles: | ||||
|                         applied_style.update(self._applied_styles[-1]) | ||||
|                     applied_style.update(style) | ||||
|                     self._applied_styles.append(applied_style) | ||||
|                 self._unclosed_elements.append(unclosed_elements) | ||||
|  | ||||
|         def end(self, tag): | ||||
|             pass | ||||
|             if tag not in (_x('ttml:br'), 'br'): | ||||
|                 unclosed_elements = self._unclosed_elements.pop() | ||||
|                 for element in reversed(unclosed_elements): | ||||
|                     self._out += '</%s>' % element | ||||
|                 if unclosed_elements and self._applied_styles: | ||||
|                     self._applied_styles.pop() | ||||
|  | ||||
|         def data(self, data): | ||||
|             self.out += data | ||||
|             self._out += data | ||||
|  | ||||
|         def close(self): | ||||
|             return self.out.strip() | ||||
|             return self._out.strip() | ||||
|  | ||||
|     def parse_node(node): | ||||
|         target = TTMLPElementParser() | ||||
| @@ -2539,13 +2609,45 @@ def dfxp2srt(dfxp_data): | ||||
|         parser.feed(xml.etree.ElementTree.tostring(node)) | ||||
|         return parser.close() | ||||
|  | ||||
|     for k, v in LEGACY_NAMESPACES: | ||||
|         for ns in v: | ||||
|             dfxp_data = dfxp_data.replace(ns, k) | ||||
|  | ||||
|     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) | ||||
|     out = [] | ||||
|     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p') | ||||
|     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') | ||||
|  | ||||
|     if not paras: | ||||
|         raise ValueError('Invalid dfxp/TTML subtitle') | ||||
|  | ||||
|     repeat = False | ||||
|     while True: | ||||
|         for style in dfxp.findall(_x('.//ttml:style')): | ||||
|             style_id = style.get('id') | ||||
|             parent_style_id = style.get('style') | ||||
|             if parent_style_id: | ||||
|                 if parent_style_id not in styles: | ||||
|                     repeat = True | ||||
|                     continue | ||||
|                 styles[style_id] = styles[parent_style_id].copy() | ||||
|             for prop in SUPPORTED_STYLING: | ||||
|                 prop_val = style.get(_x('tts:' + prop)) | ||||
|                 if prop_val: | ||||
|                     styles.setdefault(style_id, {})[prop] = prop_val | ||||
|         if repeat: | ||||
|             repeat = False | ||||
|         else: | ||||
|             break | ||||
|  | ||||
|     for p in ('body', 'div'): | ||||
|         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p]) | ||||
|         if ele is None: | ||||
|             continue | ||||
|         style = styles.get(ele.get('style')) | ||||
|         if not style: | ||||
|             continue | ||||
|         default_style.update(style) | ||||
|  | ||||
|     for para, index in zip(paras, itertools.count(1)): | ||||
|         begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) | ||||
|         end_time = parse_dfxp_time_expr(para.attrib.get('end')) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Remita Amine
					Remita Amine