mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[crunchycroll] Fix building of ass subtitles (reported in #4019)
Parse the xml document instead of using regexes, otherwise unicode characters are left unescaped.
This commit is contained in:
		| @@ -109,19 +109,17 @@ class CrunchyrollIE(SubtitlesInfoExtractor): | ||||
|         decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) | ||||
|         return zlib.decompress(decrypted_data) | ||||
|  | ||||
|     def _convert_subtitles_to_srt(self, subtitles): | ||||
|     def _convert_subtitles_to_srt(self, sub_root): | ||||
|         output = '' | ||||
|         for i, (start, end, text) in enumerate(re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1): | ||||
|             start = start.replace('.', ',') | ||||
|             end = end.replace('.', ',') | ||||
|             text = clean_html(text) | ||||
|             text = text.replace('\\N', '\n') | ||||
|             if not text: | ||||
|                 continue | ||||
|  | ||||
|         for i, event in enumerate(sub_root.findall('./events/event'), 1): | ||||
|             start = event.attrib['start'].replace('.', ',') | ||||
|             end = event.attrib['end'].replace('.', ',') | ||||
|             text = event.attrib['text'].replace('\\N', '\n') | ||||
|             output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) | ||||
|         return output | ||||
|  | ||||
|     def _convert_subtitles_to_ass(self, subtitles): | ||||
|     def _convert_subtitles_to_ass(self, sub_root): | ||||
|         output = '' | ||||
|  | ||||
|         def ass_bool(strvalue): | ||||
| @@ -130,10 +128,6 @@ class CrunchyrollIE(SubtitlesInfoExtractor): | ||||
|                 assvalue = '-1' | ||||
|             return assvalue | ||||
|  | ||||
|         sub_root = xml.etree.ElementTree.fromstring(subtitles) | ||||
|         if not sub_root: | ||||
|             return output | ||||
|  | ||||
|         output = '[Script Info]\n' | ||||
|         output += 'Title: %s\n' % sub_root.attrib["title"] | ||||
|         output += 'ScriptType: v4.00+\n' | ||||
| @@ -270,10 +264,13 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             if not lang_code: | ||||
|                 continue | ||||
|             sub_root = xml.etree.ElementTree.fromstring(subtitle) | ||||
|             if not sub_root: | ||||
|                 subtitles[lang_code] = '' | ||||
|             if sub_format == 'ass': | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_ass(subtitle) | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root) | ||||
|             else: | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz