mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 09:28:33 +00:00
add live chat extraction to separate branch
This commit is contained in:
parent
74e90dd9b8
commit
e07c63f2aa
@ -542,6 +542,73 @@ def _extract_storyboard(self, item_id, storyboard_json_url, duration):
|
|||||||
} for path in images],
|
} for path in images],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _download_chat(self, vod_id):
|
||||||
|
live_chat = list()
|
||||||
|
|
||||||
|
request_url = f'https://api.twitch.tv/v5/videos/{vod_id}/comments'
|
||||||
|
query_params = {
|
||||||
|
'client_id': self._CLIENT_ID
|
||||||
|
}
|
||||||
|
|
||||||
|
self.to_screen('Downloading chat fragment JSONs')
|
||||||
|
|
||||||
|
# TODO: question: is it OK to use this config value for this purpose?
|
||||||
|
max_retries = self.get_param('extractor_retries')
|
||||||
|
retries = 0
|
||||||
|
pagenum = 1
|
||||||
|
while True:
|
||||||
|
response_json = self._download_json(
|
||||||
|
request_url,
|
||||||
|
vod_id,
|
||||||
|
fatal=False,
|
||||||
|
note='Downloading chat fragment JSON page %d' % pagenum,
|
||||||
|
errnote='Live chat fragment download failed.',
|
||||||
|
query=query_params)
|
||||||
|
|
||||||
|
if response_json is False:
|
||||||
|
self.report_warning(f'Unable to fetch next chat history fragment. {retries}. try of {max_retries}')
|
||||||
|
|
||||||
|
if retries < max_retries:
|
||||||
|
retries += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
self.report_warning('Chat history download failed: retry limit reached')
|
||||||
|
# TODO: when this happens, should I forget a partial chat history, or is it better to keep it too?
|
||||||
|
# I think if I keep it, it might be better to persist a warning that it is incomplete
|
||||||
|
# live_chat.clear()
|
||||||
|
break
|
||||||
|
|
||||||
|
live_chat.extend(response_json.get('comments') or [])
|
||||||
|
next_fragment_cursor = str_or_none(response_json.get('_next'))
|
||||||
|
|
||||||
|
if next_fragment_cursor is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
query_params['cursor'] = next_fragment_cursor
|
||||||
|
pagenum += 1
|
||||||
|
|
||||||
|
chat_history_length = len(live_chat)
|
||||||
|
|
||||||
|
self.to_screen('Extracted %d chat messages' % chat_history_length)
|
||||||
|
if chat_history_length == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return self._extract_chat(live_chat, request_url)
|
||||||
|
|
||||||
|
def _extract_chat(self, chat_history, request_url):
|
||||||
|
return {
|
||||||
|
'live_chat': [ # subtitle tag
|
||||||
|
{ # JSON subformat as URL
|
||||||
|
'url': request_url,
|
||||||
|
'ext': 'json'
|
||||||
|
},
|
||||||
|
{ # JSON subformat as data
|
||||||
|
'data': json.dumps(chat_history),
|
||||||
|
'ext': 'json'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
vod_id = self._match_id(url)
|
vod_id = self._match_id(url)
|
||||||
|
|
||||||
@ -561,16 +628,9 @@ def _real_extract(self, url):
|
|||||||
if 't' in query:
|
if 't' in query:
|
||||||
info['start_time'] = parse_duration(query['t'][0])
|
info['start_time'] = parse_duration(query['t'][0])
|
||||||
|
|
||||||
if info.get('timestamp') is not None:
|
if ('live_chat' in self.get_param('subtitleslangs', [])) \
|
||||||
info['subtitles'] = {
|
and info.get('timestamp') is not None:
|
||||||
'rechat': [{
|
info['subtitles'] = self._download_chat(vod_id)
|
||||||
'url': update_url_query(
|
|
||||||
f'https://api.twitch.tv/v5/videos/{vod_id}/comments', {
|
|
||||||
'client_id': self._CLIENT_ID,
|
|
||||||
}),
|
|
||||||
'ext': 'json',
|
|
||||||
}],
|
|
||||||
}
|
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user