mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Merge pull request #8348 from remitamine/dfxp2srt-text
[utils] fix dfxp2srt text extraction(fixes #8055)
This commit is contained in:
		| @@ -2017,20 +2017,27 @@ def dfxp2srt(dfxp_data): | |||||||
|         'ttaf1': 'http://www.w3.org/2006/10/ttaf1', |         'ttaf1': 'http://www.w3.org/2006/10/ttaf1', | ||||||
|     }) |     }) | ||||||
|  |  | ||||||
|  |     class TTMLPElementParser: | ||||||
|  |         out = '' | ||||||
|  |  | ||||||
|  |         def start(self, tag, attrib): | ||||||
|  |             if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): | ||||||
|  |                 self.out += '\n' | ||||||
|  |  | ||||||
|  |         def end(self, tag): | ||||||
|  |             pass | ||||||
|  |  | ||||||
|  |         def data(self, data): | ||||||
|  |             self.out += data | ||||||
|  |  | ||||||
|  |         def close(self): | ||||||
|  |             return self.out.strip() | ||||||
|  |  | ||||||
|     def parse_node(node): |     def parse_node(node): | ||||||
|         str_or_empty = functools.partial(str_or_none, default='') |         target = TTMLPElementParser() | ||||||
|  |         parser = xml.etree.ElementTree.XMLParser(target=target) | ||||||
|         out = str_or_empty(node.text) |         parser.feed(xml.etree.ElementTree.tostring(node)) | ||||||
|  |         return parser.close() | ||||||
|         for child in node: |  | ||||||
|             if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): |  | ||||||
|                 out += '\n' + str_or_empty(child.tail) |  | ||||||
|             elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'): |  | ||||||
|                 out += str_or_empty(parse_node(child)) |  | ||||||
|             else: |  | ||||||
|                 out += str_or_empty(xml.etree.ElementTree.tostring(child)) |  | ||||||
|  |  | ||||||
|         return out |  | ||||||
|  |  | ||||||
|     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) |     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) | ||||||
|     out = [] |     out = [] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 remitamine
					remitamine