mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-03 16:15:14 +00:00 
			
		
		
		
	[fragment] Read downloaded fragments only when needed (#3069)
Authored by: Lesmiscore
This commit is contained in:
		
				
					committed by
					
						
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							d69e55c1d8
						
					
				
				
					commit
					d71fd41249
				
			@@ -133,19 +133,19 @@ class FragmentFD(FileDownloader):
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
 | 
					        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
 | 
				
			||||||
        if not success:
 | 
					        if not success:
 | 
				
			||||||
            return False, None
 | 
					            return False
 | 
				
			||||||
        if fragment_info_dict.get('filetime'):
 | 
					        if fragment_info_dict.get('filetime'):
 | 
				
			||||||
            ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
 | 
					            ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
 | 
				
			||||||
        ctx['fragment_filename_sanitized'] = fragment_filename
 | 
					        ctx['fragment_filename_sanitized'] = fragment_filename
 | 
				
			||||||
        try:
 | 
					        return True
 | 
				
			||||||
            return True, self._read_fragment(ctx)
 | 
					 | 
				
			||||||
        except FileNotFoundError:
 | 
					 | 
				
			||||||
            if not info_dict.get('is_live'):
 | 
					 | 
				
			||||||
                raise
 | 
					 | 
				
			||||||
            return False, None
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _read_fragment(self, ctx):
 | 
					    def _read_fragment(self, ctx):
 | 
				
			||||||
        down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
 | 
					        try:
 | 
				
			||||||
 | 
					            down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
 | 
				
			||||||
 | 
					        except FileNotFoundError:
 | 
				
			||||||
 | 
					            if ctx.get('live'):
 | 
				
			||||||
 | 
					                return None
 | 
				
			||||||
 | 
					            raise
 | 
				
			||||||
        ctx['fragment_filename_sanitized'] = frag_sanitized
 | 
					        ctx['fragment_filename_sanitized'] = frag_sanitized
 | 
				
			||||||
        frag_content = down.read()
 | 
					        frag_content = down.read()
 | 
				
			||||||
        down.close()
 | 
					        down.close()
 | 
				
			||||||
@@ -457,7 +457,7 @@ class FragmentFD(FileDownloader):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        def download_fragment(fragment, ctx):
 | 
					        def download_fragment(fragment, ctx):
 | 
				
			||||||
            if not interrupt_trigger[0]:
 | 
					            if not interrupt_trigger[0]:
 | 
				
			||||||
                return False, fragment['frag_index']
 | 
					                return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            frag_index = ctx['fragment_index'] = fragment['frag_index']
 | 
					            frag_index = ctx['fragment_index'] = fragment['frag_index']
 | 
				
			||||||
            ctx['last_error'] = None
 | 
					            ctx['last_error'] = None
 | 
				
			||||||
@@ -467,14 +467,12 @@ class FragmentFD(FileDownloader):
 | 
				
			|||||||
                headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
 | 
					                headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # Never skip the first fragment
 | 
					            # Never skip the first fragment
 | 
				
			||||||
            fatal = is_fatal(fragment.get('index') or (frag_index - 1))
 | 
					            fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
 | 
				
			||||||
            count, frag_content = 0, None
 | 
					 | 
				
			||||||
            while count <= fragment_retries:
 | 
					            while count <= fragment_retries:
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
 | 
					                    if self._download_fragment(ctx, fragment['url'], info_dict, headers):
 | 
				
			||||||
                    if not success:
 | 
					                        break
 | 
				
			||||||
                        return False, frag_index
 | 
					                    return
 | 
				
			||||||
                    break
 | 
					 | 
				
			||||||
                except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
 | 
					                except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
 | 
				
			||||||
                    # Unavailable (possibly temporary) fragments may be served.
 | 
					                    # Unavailable (possibly temporary) fragments may be served.
 | 
				
			||||||
                    # First we try to retry then either skip or abort.
 | 
					                    # First we try to retry then either skip or abort.
 | 
				
			||||||
@@ -491,13 +489,9 @@ class FragmentFD(FileDownloader):
 | 
				
			|||||||
                        break
 | 
					                        break
 | 
				
			||||||
                    raise
 | 
					                    raise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if count > fragment_retries:
 | 
					            if count > fragment_retries and fatal:
 | 
				
			||||||
                if not fatal:
 | 
					 | 
				
			||||||
                    return False, frag_index
 | 
					 | 
				
			||||||
                ctx['dest_stream'].close()
 | 
					                ctx['dest_stream'].close()
 | 
				
			||||||
                self.report_error('Giving up after %s fragment retries' % fragment_retries)
 | 
					                self.report_error('Giving up after %s fragment retries' % fragment_retries)
 | 
				
			||||||
                return False, frag_index
 | 
					 | 
				
			||||||
            return frag_content, frag_index
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def append_fragment(frag_content, frag_index, ctx):
 | 
					        def append_fragment(frag_content, frag_index, ctx):
 | 
				
			||||||
            if not frag_content:
 | 
					            if not frag_content:
 | 
				
			||||||
@@ -520,23 +514,23 @@ class FragmentFD(FileDownloader):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            def _download_fragment(fragment):
 | 
					            def _download_fragment(fragment):
 | 
				
			||||||
                ctx_copy = ctx.copy()
 | 
					                ctx_copy = ctx.copy()
 | 
				
			||||||
                frag_content, frag_index = download_fragment(fragment, ctx_copy)
 | 
					                download_fragment(fragment, ctx_copy)
 | 
				
			||||||
                return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
 | 
					                return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
 | 
					            self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
 | 
				
			||||||
            with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
 | 
					            with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
 | 
				
			||||||
                for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
 | 
					                for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
 | 
				
			||||||
                    ctx['fragment_filename_sanitized'] = frag_filename
 | 
					                    ctx['fragment_filename_sanitized'] = frag_filename
 | 
				
			||||||
                    ctx['fragment_index'] = frag_index
 | 
					                    ctx['fragment_index'] = frag_index
 | 
				
			||||||
                    result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
 | 
					                    result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
 | 
				
			||||||
                    if not result:
 | 
					                    if not result:
 | 
				
			||||||
                        return False
 | 
					                        return False
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            for fragment in fragments:
 | 
					            for fragment in fragments:
 | 
				
			||||||
                if not interrupt_trigger[0]:
 | 
					                if not interrupt_trigger[0]:
 | 
				
			||||||
                    break
 | 
					                    break
 | 
				
			||||||
                frag_content, frag_index = download_fragment(fragment, ctx)
 | 
					                download_fragment(fragment, ctx)
 | 
				
			||||||
                result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
 | 
					                result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
 | 
				
			||||||
                if not result:
 | 
					                if not result:
 | 
				
			||||||
                    return False
 | 
					                    return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -263,9 +263,11 @@ class IsmFD(FragmentFD):
 | 
				
			|||||||
            count = 0
 | 
					            count = 0
 | 
				
			||||||
            while count <= fragment_retries:
 | 
					            while count <= fragment_retries:
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
 | 
					                    success = self._download_fragment(ctx, segment['url'], info_dict)
 | 
				
			||||||
                    if not success:
 | 
					                    if not success:
 | 
				
			||||||
                        return False
 | 
					                        return False
 | 
				
			||||||
 | 
					                    frag_content = self._read_fragment(ctx)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if not extra_state['ism_track_written']:
 | 
					                    if not extra_state['ism_track_written']:
 | 
				
			||||||
                        tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
 | 
					                        tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
 | 
				
			||||||
                        info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
 | 
					                        info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -171,9 +171,10 @@ body > figure > img {
 | 
				
			|||||||
                assert fragment_base_url
 | 
					                assert fragment_base_url
 | 
				
			||||||
                fragment_url = urljoin(fragment_base_url, fragment['path'])
 | 
					                fragment_url = urljoin(fragment_base_url, fragment['path'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
 | 
					            success = self._download_fragment(ctx, fragment_url, info_dict)
 | 
				
			||||||
            if not success:
 | 
					            if not success:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
 | 
					            frag_content = self._read_fragment(ctx)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            mime_type = b'image/jpeg'
 | 
					            mime_type = b'image/jpeg'
 | 
				
			||||||
            if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
 | 
					            if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -115,9 +115,10 @@ class YoutubeLiveChatFD(FragmentFD):
 | 
				
			|||||||
            count = 0
 | 
					            count = 0
 | 
				
			||||||
            while count <= fragment_retries:
 | 
					            while count <= fragment_retries:
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    success, raw_fragment = dl_fragment(url, request_data, headers)
 | 
					                    success = dl_fragment(url, request_data, headers)
 | 
				
			||||||
                    if not success:
 | 
					                    if not success:
 | 
				
			||||||
                        return False, None, None, None
 | 
					                        return False, None, None, None
 | 
				
			||||||
 | 
					                    raw_fragment = self._read_fragment(ctx)
 | 
				
			||||||
                    try:
 | 
					                    try:
 | 
				
			||||||
                        data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
 | 
					                        data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
 | 
				
			||||||
                    except RegexNotFoundError:
 | 
					                    except RegexNotFoundError:
 | 
				
			||||||
@@ -145,9 +146,10 @@ class YoutubeLiveChatFD(FragmentFD):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self._prepare_and_start_frag_download(ctx, info_dict)
 | 
					        self._prepare_and_start_frag_download(ctx, info_dict)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        success, raw_fragment = dl_fragment(info_dict['url'])
 | 
					        success = dl_fragment(info_dict['url'])
 | 
				
			||||||
        if not success:
 | 
					        if not success:
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
 | 
					        raw_fragment = self._read_fragment(ctx)
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
 | 
					            data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
 | 
				
			||||||
        except RegexNotFoundError:
 | 
					        except RegexNotFoundError:
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user