From e07c63f2aac5c35ec3aaed8afb25911feec49d21 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Thu, 4 Nov 2021 23:53:30 +0100 Subject: [PATCH 01/15] add live chat extraction to separate branch --- yt_dlp/extractor/twitch.py | 80 +++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index a36de3c01..9357e1719 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -542,6 +542,73 @@ def _extract_storyboard(self, item_id, storyboard_json_url, duration): } for path in images], } + def _download_chat(self, vod_id): + live_chat = list() + + request_url = f'https://api.twitch.tv/v5/videos/{vod_id}/comments' + query_params = { + 'client_id': self._CLIENT_ID + } + + self.to_screen('Downloading chat fragment JSONs') + + # TODO: question: is it OK to use this config value for this purpose? + max_retries = self.get_param('extractor_retries') + retries = 0 + pagenum = 1 + while True: + response_json = self._download_json( + request_url, + vod_id, + fatal=False, + note='Downloading chat fragment JSON page %d' % pagenum, + errnote='Live chat fragment download failed.', + query=query_params) + + if response_json is False: + self.report_warning(f'Unable to fetch next chat history fragment. {retries}. try of {max_retries}') + + if retries < max_retries: + retries += 1 + continue + else: + self.report_warning('Chat history download failed: retry limit reached') + # TODO: when this happens, should I forget a partial chat history, or is it better to keep it too? + # I think if I keep it, it might be better to persist a warning that it is incomplete + # live_chat.clear() + break + + live_chat.extend(response_json.get('comments') or []) + next_fragment_cursor = str_or_none(response_json.get('_next')) + + if next_fragment_cursor is None: + break + + query_params['cursor'] = next_fragment_cursor + pagenum += 1 + + chat_history_length = len(live_chat) + + self.to_screen('Extracted %d chat messages' % chat_history_length) + if chat_history_length == 0: + return None + + return self._extract_chat(live_chat, request_url) + + def _extract_chat(self, chat_history, request_url): + return { + 'live_chat': [ # subtitle tag + { # JSON subformat as URL + 'url': request_url, + 'ext': 'json' + }, + { # JSON subformat as data + 'data': json.dumps(chat_history), + 'ext': 'json' + } + ] + } + def _real_extract(self, url): vod_id = self._match_id(url) @@ -561,16 +628,9 @@ def _real_extract(self, url): if 't' in query: info['start_time'] = parse_duration(query['t'][0]) - if info.get('timestamp') is not None: - info['subtitles'] = { - 'rechat': [{ - 'url': update_url_query( - f'https://api.twitch.tv/v5/videos/{vod_id}/comments', { - 'client_id': self._CLIENT_ID, - }), - 'ext': 'json', - }], - } + if ('live_chat' in self.get_param('subtitleslangs', [])) \ + and info.get('timestamp') is not None: + info['subtitles'] = self._download_chat(vod_id) return info From f6bad349054d1adb52b5a6234c105a716d2d1e48 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Fri, 26 Nov 2021 20:35:34 +0100 Subject: [PATCH 02/15] fix import list formatting --- yt_dlp/extractor/twitch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 9357e1719..ada6a588a 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -316,6 +316,7 @@ class TwitchVodIE(TwitchBaseIE): 'view_count': int, }, 'params': { + 'subtitleslangs': ['live_chat'], 'skip_download': True, }, }, { From 883c3c16b342f3ab8ae0e13d1a4b6724f6dfcd90 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Wed, 28 Dec 2022 18:50:42 +0100 Subject: [PATCH 03/15] fix twitch vod chat download chat download now uses the GraphQL API, instead of the old one that doesn't work anymore --- yt_dlp/extractor/twitch.py | 87 ++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index ada6a588a..07aeb9807 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -4,6 +4,7 @@ import random import re import urllib.parse +import time from .common import InfoExtractor from ..utils import ( @@ -51,6 +52,7 @@ class TwitchBaseIE(InfoExtractor): 'VideoMetadata': '49b5b8f268cdeb259d75b58dcb0c1a748e3b575003448a2333dc5cdafd49adad', 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41', 'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6', + 'VideoCommentsByOffsetOrCursor': 'b70a3591ff0f4e0313d126c6a1502d79a1c02baebb288227c582044aa76adf6a', } @property @@ -543,67 +545,72 @@ def _extract_storyboard(self, item_id, storyboard_json_url, duration): } for path in images], } - def _download_chat(self, vod_id): - live_chat = list() - - request_url = f'https://api.twitch.tv/v5/videos/{vod_id}/comments' - query_params = { - 'client_id': self._CLIENT_ID - } - - self.to_screen('Downloading chat fragment JSONs') - - # TODO: question: is it OK to use this config value for this purpose? - max_retries = self.get_param('extractor_retries') + def _extract_chat(self, vod_id): + chat_history = [] + has_more_pages = True + retry_sleep = 5 + max_retries = 3 retries = 0 pagenum = 1 - while True: - response_json = self._download_json( - request_url, - vod_id, - fatal=False, - note='Downloading chat fragment JSON page %d' % pagenum, - errnote='Live chat fragment download failed.', - query=query_params) + gql_ops = [ + { + 'operationName': 'VideoCommentsByOffsetOrCursor', + 'variables': { + 'videoID': vod_id, + # 'cursor': + } + } + ] - if response_json is False: - self.report_warning(f'Unable to fetch next chat history fragment. {retries}. try of {max_retries}') + self.to_screen('Downloading chat fragment pages') + + while has_more_pages: + response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False) + + if response is False: + self.report_warning(f'Unable to fetch next chat history fragment. {retries + 1}. try of {max_retries}') if retries < max_retries: retries += 1 + time.sleep(retry_sleep) continue else: self.report_warning('Chat history download failed: retry limit reached') - # TODO: when this happens, should I forget a partial chat history, or is it better to keep it too? + # TODO: when this happens, should I forget a partial chat history, or is it better to keep it? # I think if I keep it, it might be better to persist a warning that it is incomplete - # live_chat.clear() + # chat_history.clear() break - live_chat.extend(response_json.get('comments') or []) - next_fragment_cursor = str_or_none(response_json.get('_next')) + comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments')) + chat_history.extend(traverse_obj(comments_obj, ('edges', slice, 'node'))) - if next_fragment_cursor is None: - break + has_more_pages = traverse_obj(comments_obj, ('pageInfo', 'hasNextPage')) - query_params['cursor'] = next_fragment_cursor - pagenum += 1 + if has_more_pages: + cursor = traverse_obj(comments_obj, ('edges', 0, 'cursor')) + if cursor is None: + self.report_warning("Cannot continue downloading chat history: cursor is missing. There are additional chat pages to download.") + break - chat_history_length = len(live_chat) + pagenum += 1 + gql_ops[0]['variables']['cursor'] = cursor + if has_more_pages is None: + cursor = traverse_obj(comments_obj, ('edges', 0, 'cursor')) + + if cursor is not None: + self.report_warning("Next page indication is missing, but found cursor. Continuing chat history download.") + else: # In this case maintenance might be needed. Purpose is to prevent silent errors. + self.report_warning("Next page indication is missing, and cursor not found.") + + chat_history_length = len(chat_history) self.to_screen('Extracted %d chat messages' % chat_history_length) if chat_history_length == 0: return None - return self._extract_chat(live_chat, request_url) - - def _extract_chat(self, chat_history, request_url): return { 'live_chat': [ # subtitle tag - { # JSON subformat as URL - 'url': request_url, - 'ext': 'json' - }, - { # JSON subformat as data + { 'data': json.dumps(chat_history), 'ext': 'json' } @@ -631,7 +638,7 @@ def _real_extract(self, url): if ('live_chat' in self.get_param('subtitleslangs', [])) \ and info.get('timestamp') is not None: - info['subtitles'] = self._download_chat(vod_id) + info['subtitles'] = self._extract_chat(vod_id) return info From faa471803d6347ff7995c21c051e8546c198a46a Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Wed, 28 Dec 2022 18:52:25 +0100 Subject: [PATCH 04/15] add version indication to the subtitles.live_chat.[].ext field --- yt_dlp/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 07aeb9807..8a24160a6 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -612,7 +612,7 @@ def _extract_chat(self, vod_id): 'live_chat': [ # subtitle tag { 'data': json.dumps(chat_history), - 'ext': 'json' + 'ext': 'twitch-gql-20221228.json' } ] } From 6a0401087a673231cb20d5341b5b08d5b02bfce6 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Mon, 15 May 2023 13:13:47 +0200 Subject: [PATCH 05/15] print twitch gql api errors when extracting chat --- yt_dlp/extractor/twitch.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 8a24160a6..a645773eb 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -581,6 +581,11 @@ def _extract_chat(self, vod_id): # chat_history.clear() break + response_errors = traverse_obj(response, (slice, 'errors')) + + if response_errors is not None and len(response_errors) > 0: + self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}") + comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments')) chat_history.extend(traverse_obj(comments_obj, ('edges', slice, 'node'))) From f0a195cdca7a87be6124b01e968f51a9cad43865 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Thu, 13 Jul 2023 20:43:01 +0200 Subject: [PATCH 06/15] twitch: fix traversing objects --- yt_dlp/extractor/twitch.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index a645773eb..ba8e2767b 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -581,13 +581,12 @@ def _extract_chat(self, vod_id): # chat_history.clear() break - response_errors = traverse_obj(response, (slice, 'errors')) - + response_errors = traverse_obj(response, (..., 'errors')) if response_errors is not None and len(response_errors) > 0: self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}") comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments')) - chat_history.extend(traverse_obj(comments_obj, ('edges', slice, 'node'))) + chat_history.extend(traverse_obj(comments_obj, ('edges', ..., 'node'))) has_more_pages = traverse_obj(comments_obj, ('pageInfo', 'hasNextPage')) From 206ca4092b600db9bebe2c074f5da3d123da425b Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Fri, 14 Jul 2023 01:03:27 +0200 Subject: [PATCH 07/15] twitch chat: review changes part 1 --- yt_dlp/extractor/twitch.py | 70 +++++++++++++++----------------------- 1 file changed, 28 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index ba8e2767b..b7a6b2245 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -21,6 +21,7 @@ parse_iso8601, parse_qs, qualities, + RetryManager, str_or_none, try_get, unified_timestamp, @@ -545,44 +546,34 @@ def _extract_storyboard(self, item_id, storyboard_json_url, duration): } for path in images], } - def _extract_chat(self, vod_id): + def _get_subtitles(self, vod_id): chat_history = [] has_more_pages = True - retry_sleep = 5 - max_retries = 3 - retries = 0 pagenum = 1 - gql_ops = [ - { - 'operationName': 'VideoCommentsByOffsetOrCursor', - 'variables': { - 'videoID': vod_id, - # 'cursor': - } - } - ] - - self.to_screen('Downloading chat fragment pages') + gql_ops = [{ + 'operationName': 'VideoCommentsByOffsetOrCursor', + 'variables': { 'videoID': vod_id } + # 'variables.cursor': + }] while has_more_pages: - response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False) + response = None - if response is False: - self.report_warning(f'Unable to fetch next chat history fragment. {retries + 1}. try of {max_retries}') + for retry in self.RetryManager(): + response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False) + # response = False + # TODO: delete the direct False, uncomment _download_gql + + if response is False: + retry.error = ExtractorError("f'Unable to fetch next chat history fragment.'", video_id=vod_id, ie=self) - if retries < max_retries: - retries += 1 - time.sleep(retry_sleep) - continue - else: - self.report_warning('Chat history download failed: retry limit reached') # TODO: when this happens, should I forget a partial chat history, or is it better to keep it? # I think if I keep it, it might be better to persist a warning that it is incomplete - # chat_history.clear() - break + + # time.sleep(5) response_errors = traverse_obj(response, (..., 'errors')) - if response_errors is not None and len(response_errors) > 0: + if response_errors: self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}") comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments')) @@ -607,19 +598,15 @@ def _extract_chat(self, vod_id): else: # In this case maintenance might be needed. Purpose is to prevent silent errors. self.report_warning("Next page indication is missing, and cursor not found.") - chat_history_length = len(chat_history) - self.to_screen('Extracted %d chat messages' % chat_history_length) - if chat_history_length == 0: - return None + if not chat_history: + return - return { - 'live_chat': [ # subtitle tag - { - 'data': json.dumps(chat_history), - 'ext': 'twitch-gql-20221228.json' - } - ] - } + self.to_screen('Extracted %d chat messages' % len(chat_history)) + + return { 'rechat': [{ + 'data': json.dumps(chat_history), + 'ext': 'twitch-gql-20221228.json' + }]} def _real_extract(self, url): vod_id = self._match_id(url) @@ -640,9 +627,8 @@ def _real_extract(self, url): if 't' in query: info['start_time'] = parse_duration(query['t'][0]) - if ('live_chat' in self.get_param('subtitleslangs', [])) \ - and info.get('timestamp') is not None: - info['subtitles'] = self._extract_chat(vod_id) + if info.get('timestamp'): + info['subtitles'] = self.extract_subtitles(vod_id) return info From 0aa41b8e8492f349a3c16d52330e38f2a3cc2eee Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Fri, 14 Jul 2023 13:38:50 +0200 Subject: [PATCH 08/15] twitch chat: review changes part 2 --- yt_dlp/extractor/twitch.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index b7a6b2245..4a20de25e 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -21,7 +21,6 @@ parse_iso8601, parse_qs, qualities, - RetryManager, str_or_none, try_get, unified_timestamp, @@ -552,7 +551,7 @@ def _get_subtitles(self, vod_id): pagenum = 1 gql_ops = [{ 'operationName': 'VideoCommentsByOffsetOrCursor', - 'variables': { 'videoID': vod_id } + 'variables': {'videoID': vod_id} # 'variables.cursor': }] @@ -561,8 +560,6 @@ def _get_subtitles(self, vod_id): for retry in self.RetryManager(): response = self._download_gql(vod_id, gql_ops, 'Downloading chat fragment page %d' % pagenum, fatal=False) - # response = False - # TODO: delete the direct False, uncomment _download_gql if response is False: retry.error = ExtractorError("f'Unable to fetch next chat history fragment.'", video_id=vod_id, ie=self) @@ -570,8 +567,6 @@ def _get_subtitles(self, vod_id): # TODO: when this happens, should I forget a partial chat history, or is it better to keep it? # I think if I keep it, it might be better to persist a warning that it is incomplete - # time.sleep(5) - response_errors = traverse_obj(response, (..., 'errors')) if response_errors: self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}") @@ -600,10 +595,10 @@ def _get_subtitles(self, vod_id): if not chat_history: return + else: + self.write_debug(f'Extracted {len(chat_history)} chat messages') - self.to_screen('Extracted %d chat messages' % len(chat_history)) - - return { 'rechat': [{ + return {'rechat': [{ 'data': json.dumps(chat_history), 'ext': 'twitch-gql-20221228.json' }]} @@ -628,7 +623,12 @@ def _real_extract(self, url): info['start_time'] = parse_duration(query['t'][0]) if info.get('timestamp'): - info['subtitles'] = self.extract_subtitles(vod_id) + info['subtitles'] = {'rechat': [{ + 'url': update_url_query(f'https://api.twitch.tv/v5/videos/{vod_id}/comments', + {'client_id': self._CLIENT_ID}), + 'ext': 'json', + }]}, + info['__post_extractor'] = lambda: {'subtitles': self.extract_subtitles(vod_id)} return info From 43fd8ad02f35f65c0d0132dc67841567e55b1125 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Fri, 14 Jul 2023 14:47:51 +0200 Subject: [PATCH 09/15] fix setting the subtitle in the infodict --- yt_dlp/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 4a20de25e..48235c76c 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -627,7 +627,7 @@ def _real_extract(self, url): 'url': update_url_query(f'https://api.twitch.tv/v5/videos/{vod_id}/comments', {'client_id': self._CLIENT_ID}), 'ext': 'json', - }]}, + }]} info['__post_extractor'] = lambda: {'subtitles': self.extract_subtitles(vod_id)} return info From 27b8892037fb0240c0403bc7f17fd7134e0de570 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Fri, 14 Jul 2023 15:43:04 +0200 Subject: [PATCH 10/15] add extractor args for setting the IDs from args --- yt_dlp/extractor/twitch.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 48235c76c..15cf51639 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -60,6 +60,14 @@ def _CLIENT_ID(self): return self._configuration_arg( 'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key='Twitch', casesense=True)[0] + @property + def _DEVICE_ID(self): + return self._configuration_arg('device_id', [None], ie_key='Twitch')[0] + + @property + def _CLIENT_INTEGRITY(self): + return self._configuration_arg('client_integrity', [None], ie_key='Twitch', casesense=True)[0] + def _perform_login(self, username, password): def fail(message): raise ExtractorError( @@ -144,6 +152,14 @@ def _download_base_gql(self, video_id, ops, note, fatal=True): gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token') if gql_auth: headers['Authorization'] = 'OAuth ' + gql_auth.value + + # TODO: remove existence checks when the values will be generated + if self._DEVICE_ID: + headers["X-Device-Id"] = self._DEVICE_ID + + if self._CLIENT_INTEGRITY: + headers["Client-Integrity"] = self._CLIENT_INTEGRITY + return self._download_json( 'https://gql.twitch.tv/gql', video_id, note, data=json.dumps(ops).encode(), From 167812af9d10d6b3268224ed45aeecde91e79d01 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Sat, 9 Sep 2023 21:52:57 +0200 Subject: [PATCH 11/15] read device_id extractor arg case sensitively the Device ID usually contains lowercase and uppercase letters, and twitch cares about it --- yt_dlp/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 15cf51639..414cd9866 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -62,7 +62,7 @@ def _CLIENT_ID(self): @property def _DEVICE_ID(self): - return self._configuration_arg('device_id', [None], ie_key='Twitch')[0] + return self._configuration_arg('device_id', [None], ie_key='Twitch', casesense=True)[0] @property def _CLIENT_INTEGRITY(self): From 4ddd8de69bca3779c148b9bbb87c9a0cd78fe62b Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Sat, 9 Sep 2023 21:56:36 +0200 Subject: [PATCH 12/15] revert moving comment extraction to late running function --- yt_dlp/extractor/twitch.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 414cd9866..215386c82 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -639,12 +639,7 @@ def _real_extract(self, url): info['start_time'] = parse_duration(query['t'][0]) if info.get('timestamp'): - info['subtitles'] = {'rechat': [{ - 'url': update_url_query(f'https://api.twitch.tv/v5/videos/{vod_id}/comments', - {'client_id': self._CLIENT_ID}), - 'ext': 'json', - }]} - info['__post_extractor'] = lambda: {'subtitles': self.extract_subtitles(vod_id)} + info['subtitles'] = self.extract_subtitles(vod_id) return info From 72dbbb5763ab61f1b76c15204791b2e696c5ebee Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Sat, 9 Sep 2023 22:43:48 +0200 Subject: [PATCH 13/15] redo moving chat extraction to __post_exctractor --- yt_dlp/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 215386c82..32d396175 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -639,7 +639,7 @@ def _real_extract(self, url): info['start_time'] = parse_duration(query['t'][0]) if info.get('timestamp'): - info['subtitles'] = self.extract_subtitles(vod_id) + info['__post_extractor'] = lambda: {'requested_subtitles': {'rechat': traverse_obj(self.extract_subtitles(vod_id), ['rechat', 0])}} return info From c50c0642844c92337e3e9bbf8a13b87260b67498 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Sat, 12 Apr 2025 01:50:05 +0200 Subject: [PATCH 14/15] remove unused import --- yt_dlp/extractor/twitch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 32d396175..7d981f565 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -4,7 +4,6 @@ import random import re import urllib.parse -import time from .common import InfoExtractor from ..utils import ( From 96a5cd570b775f07289612c5a6e642c0ceb1c290 Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Sat, 12 Apr 2025 01:54:21 +0200 Subject: [PATCH 15/15] formatting changes for ruff --- yt_dlp/extractor/twitch.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 7d981f565..1521be127 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -154,10 +154,10 @@ def _download_base_gql(self, video_id, ops, note, fatal=True): # TODO: remove existence checks when the values will be generated if self._DEVICE_ID: - headers["X-Device-Id"] = self._DEVICE_ID + headers['X-Device-Id'] = self._DEVICE_ID if self._CLIENT_INTEGRITY: - headers["Client-Integrity"] = self._CLIENT_INTEGRITY + headers['Client-Integrity'] = self._CLIENT_INTEGRITY return self._download_json( 'https://gql.twitch.tv/gql', video_id, note, @@ -566,7 +566,7 @@ def _get_subtitles(self, vod_id): pagenum = 1 gql_ops = [{ 'operationName': 'VideoCommentsByOffsetOrCursor', - 'variables': {'videoID': vod_id} + 'variables': {'videoID': vod_id}, # 'variables.cursor': }] @@ -584,7 +584,7 @@ def _get_subtitles(self, vod_id): response_errors = traverse_obj(response, (..., 'errors')) if response_errors: - self.report_warning(f"Error response recevied for fetching next chat history fragment: {response_errors}") + self.report_warning(f'Error response recevied for fetching next chat history fragment: {response_errors}') comments_obj = traverse_obj(response, (0, 'data', 'video', 'comments')) chat_history.extend(traverse_obj(comments_obj, ('edges', ..., 'node'))) @@ -594,7 +594,7 @@ def _get_subtitles(self, vod_id): if has_more_pages: cursor = traverse_obj(comments_obj, ('edges', 0, 'cursor')) if cursor is None: - self.report_warning("Cannot continue downloading chat history: cursor is missing. There are additional chat pages to download.") + self.report_warning('Cannot continue downloading chat history: cursor is missing. There are additional chat pages to download.') break pagenum += 1 @@ -604,9 +604,9 @@ def _get_subtitles(self, vod_id): cursor = traverse_obj(comments_obj, ('edges', 0, 'cursor')) if cursor is not None: - self.report_warning("Next page indication is missing, but found cursor. Continuing chat history download.") + self.report_warning('Next page indication is missing, but found cursor. Continuing chat history download.') else: # In this case maintenance might be needed. Purpose is to prevent silent errors. - self.report_warning("Next page indication is missing, and cursor not found.") + self.report_warning('Next page indication is missing, and cursor not found.') if not chat_history: return @@ -615,7 +615,7 @@ def _get_subtitles(self, vod_id): return {'rechat': [{ 'data': json.dumps(chat_history), - 'ext': 'twitch-gql-20221228.json' + 'ext': 'twitch-gql-20221228.json', }]} def _real_extract(self, url):