From a9b370069838e84d44ac7ad095d657003665885a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 30 May 2025 17:48:48 -0500 Subject: [PATCH 001/103] [test:postprocessors] Remove binary thumbnail test data (#13341) Authored by: bashonly --- .gitignore | 2 ++ Makefile | 5 ++-- test/test_postprocessors.py | 23 ++++++++++++++++-- .../thumbnails/foo %d bar/foo_%d.webp | Bin 3928 -> 0 bytes .../thumbnails/foo %d bar/placeholder | 0 5 files changed, 26 insertions(+), 4 deletions(-) delete mode 100644 test/testdata/thumbnails/foo %d bar/foo_%d.webp create mode 100644 test/testdata/thumbnails/foo %d bar/placeholder diff --git a/.gitignore b/.gitignore index 8fcd0de64..40bb34d2a 100644 --- a/.gitignore +++ b/.gitignore @@ -105,6 +105,8 @@ README.txt *.zsh *.spec test/testdata/sigs/player-*.js +test/testdata/thumbnails/empty.webp +test/testdata/thumbnails/foo\ %d\ bar/foo_%d.* # Binary /youtube-dl diff --git a/Makefile b/Makefile index 6c72ead1e..273cb3cc0 100644 --- a/Makefile +++ b/Makefile @@ -18,10 +18,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ tar pypi-files lazy-extractors install uninstall clean-test: - rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ + rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \ - *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp + *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp \ + test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."* clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 603f85c65..ecc73e39e 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -8,6 +8,8 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import subprocess + from yt_dlp import YoutubeDL from yt_dlp.utils import shell_quote from yt_dlp.postprocessor import ( @@ -47,7 +49,18 @@ def test_escaping(self): print('Skipping: ffmpeg not found') return - file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}' + test_data_dir = 'test/testdata/thumbnails' + generated_file = f'{test_data_dir}/empty.webp' + + subprocess.check_call([ + pp.executable, '-y', '-f', 'lavfi', '-i', 'color=c=black:s=320x320', + '-c:v', 'libwebp', '-pix_fmt', 'yuv420p', '-vframes', '1', generated_file, + ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + file = test_data_dir + '/foo %d bar/foo_%d.{}' + initial_file = file.format('webp') + os.replace(generated_file, initial_file) + tests = (('webp', 'png'), ('png', 'jpg')) for inp, out in tests: @@ -55,11 +68,13 @@ def test_escaping(self): if os.path.exists(out_file): os.remove(out_file) pp.convert_thumbnail(file.format(inp), out) - assert os.path.exists(out_file) + self.assertTrue(os.path.exists(out_file)) for _, out in tests: os.remove(file.format(out)) + os.remove(initial_file) + class TestExec(unittest.TestCase): def test_parse_cmd(self): @@ -610,3 +625,7 @@ def test_quote_for_concat_QuotesAtEnd(self): self.assertEqual( r"'special '\'' characters '\'' galore'\'\'\'", self._pp._quote_for_ffmpeg("special ' characters ' galore'''")) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/testdata/thumbnails/foo %d bar/foo_%d.webp b/test/testdata/thumbnails/foo %d bar/foo_%d.webp deleted file mode 100644 index d64d0839f054071849aa12f194b8b20b19e6bb59..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3928 zcmb`~_dgVX;|B0EvWr7mA)$~Jm7J}!$vn+C%bAtz*&5!t#N zM><8u`F>xo&+GFae4byP=lv%E7)ILKgM{C&GlLaE?Cfjmla3>)I?=d&Zkv^~-GLWfe{ zK>u^3wjFzU%IaXRpvHx8A_<BxfTx=;c=8V{e2M1TL%=v@KU&^381D# z$)%j_5oUzXGr*LID8MsK6PkZ&CmB!FDx)bE3YYB{I{g9U08+2VWr#`!!keagUZ#(U ziG}Szb^2>YOQ;KpA9|)^`9H=~U$>r3SB#uq^T?wlDNZFPH5vl8CWvd8)3MkjQS^KW z`|(CDZ$n}8?|rjF>cov{+=^mRXJv(;?@bBzN&Lh_M*==DUGHe%aB(L*1yCoez9M zyGr;+(Ovgt@q6(?+CVOdR%MzzUQ6ZbYOZ0KCa|hEZ^@ZpfsJt0Uk#jaKDqpsgU|WP zxIX1V?Lj&^#(PQAykx~TRh6}TPc{gld91w>tRl2y3(jryT8-n)Pht{#6ZocJx9t6d z!LK{ltSgrSB7RZtqsH5Ez zP-JpJu4^L2#3yrl49UB7v8Yav>N(Tx60!j`{!Oom8QZ2~2ruy@j(q!%%dmp~E2RAb zP*%AC{|!CP^?G(;0#}{Lx(ALpXOshmm(2G9;&=thDR-GH0O6ts6P8%#ShAAXW^znkqhw0nIrZp5%o`b!ZuV9+)E;a|@Z}y= zu(%%zpEXRx)ZJp+bZ{N(R7k*1A*p~*CG{iSv9v6Yp82v-mr7cqvV9ahKm#JZS0=ot zDlO!LcOd+80*d=7V`Zbe`T07*4#q{aS5HdUZ_a}HtG;Rf{ljdko$$oRzh_UCezmWO zZ*nc-G}vl@cQiJDRCpdiF5c+roHeq{VHvAvme+dm4xJQl#LrdAWW$i`&aRi}-7( z)&e{#56KI6QjOr9z1}2LxqeADdiZzlj>$plMm-F}YCYS0ykxa8Q1#mxDpd{^rWd#) z;qxawT1T$wDG=#Z8+yXlz75~-PRlt`i00>Ugew=7R}_Zx%C zpT|rizqBo)P#6_>$*F%v?-d7UoldpQYO`zhejAM0(a%TS(K0C{`;jEuD8a%^i;E13 z{Mr1qQg@SC4@vH@9PUFw0|)GEH?LR(>kFP87S@rKgLb|R2*=_B7z88K<`=Fg*jS7m zZh;!+3r4c7>FirW5|T#&e*u;fL|3-{uNd<6*B`KCT9W94H$8C!h~c)1o82-U22zSr zB7Du!8)SJvlz>XhwjK7AUWhB-c&2?SYGFS$)AbWVi9NeGC66 z&W7Fz9Q8znbl#k1O5D6`vy6Tg6D4a{AQj8OYgN9TL?`iVgp(~ZnbY6b;7GK4O13ji z1zhQ6-9B+TyP2FRlEcTNFc9#*_@f18(2hgrC`KZts{ZLJ1g?1qd%RS!)&)I&&y_|=*9Md)i!>(jZAgz7Sqyy$)gPg-N@=|5qID2}H|^uB@3enIKc;0*Ng4o!dJ zWQEy!^t!68U32z>ldxR+mj_;&UPI!MpFP}YBBD5sIyegIymbctib)QR(o%9O!(?Td zK6ORqa2|mB(FFU{Hz&t#;4fTan)(J!ISByq{23BphZdQAu^Ap7>1TI(DUOs*k@q+( z)}()Zg3@>qX}O?{QdXx;bWV>Wv+UkDz2Uj=$ga+VdA?hN`{qQU`O8tW!(+A1Cdrrf zQ!~t~J{wvC7tD^IN%cu8z=l>4zz7{@1^p?j2FTurKBqPxTMLee zZ?YMmSyr&6E?Ib-xa!wp7JjDbaSycO8S23Cgqmg7ut!YPis}WOqV^B>W1|iXs-4HD ziZt)lXm_08nQU+i86xctG7rEl)lZ6y(5x4+pDTTkL#Jh_@<`7AUOYwcV~g3X!J37qr==x8LBMG zs3Qm@2KATnz@or5y!U*>>I3sKRitt-Nm4{%BA8*vSO1I z-TBkal#y}l{wy@%JLk_t$eXc_7B`4C-bw1=oQT zOG=~n5vf9O?%PIK%RDFGQ$)vm`smN{*N}#qk5ATfnpFC*{9&a&MxS-w*4WP?h0#=u zKwOQsYe=OBy2$fTQVJS5yrWDU#7uADW)i$p58A`>Zi}|LVP-*s0Ot0A0qx+$ix<^b z*yefreyF;#<-WA|{t`&J!K-*ZJ#s-_aVI^lQ~byImxuQf2Xm09S+v=3ux_Genm64m zCSA!QyfR(!-dd*)jE^zcaLF;SQU|#28br_KBUw}w`_jVO_kMmUC-e0IumQgMR2D(( zXapmum}1d2Y+r)9M{o?>leMG1PJfe*R^9HRd7v}39@H9@iQ51$tWN$V#V~E3zDl+s z+`U!n+_m?nCBY@E%UlGdNY{fU-0!#4qBbvEY~$2ER?OqPcw>%4s+4HpC>3a%*UH99qJk@ z1Dk4^boGJ3AE6f`rtPwh#mb(np{kAw7Jo<~)-v#2?KuF2)@*Wli4fN8()8)7yq?<} zgnBk};ECdiD0CWAcwaf^y!^_fnJ-Y|>EG(VM}%~{N7fHb6d#K=3}7^k3YC}PvAcqZ zU_b^qkmj1{?b+4>U;#~+kx_b*65p>x}GJ?dp4i_=%eiu*JwNvt?$*Hz5zS2*Izit$A^lTYd}grzO;` zO|OQ+po8&Lv91}gUJFyD2oRiUVnufu)6QVbZjJo(i)pX$LW0w4LAgSzMpjJ9g@Ng# zmuDjGdn&6v4yUb9FaIxRDC~Ns1y>j$eH0n~N3%telVR))|K*a%;sB@6DNj(raSBi^ z{n&TFPhp&U-TMjm?O|^cQzI1Eb2+rjgEe*9TZB|CF66SI_09)tqv95Sz%Hya^-egh zTm18|Rdqv`aVvB5PsJT`QiXdd6UHQjz#I`{TgD&C4?Q;>J!3-Wr$=Q!NgEAsYMrsZ zj*DFL`U7%Dyo+@b6#L#{e(1bdcy#ob=VU*)PUvC)t7R`*mkFq zjJs{iz-RcCNt23{KZ!Tf;Kat bLpo5mJGW#CT`nX2$#Kl+y#GFf|MdR>k&2#! diff --git a/test/testdata/thumbnails/foo %d bar/placeholder b/test/testdata/thumbnails/foo %d bar/placeholder new file mode 100644 index 000000000..e69de29bb From 6d265388c6e943419ac99e9151cf75a3265f980f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 30 May 2025 17:51:25 -0500 Subject: [PATCH 002/103] [ie/10play] Fix extractor (#13349) Closes #12337 Authored by: bashonly --- yt_dlp/extractor/tenplay.py | 87 ++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 31 deletions(-) diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index cc7bc3b2f..825da6516 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -6,32 +6,32 @@ class TenPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?Ptpv\d{6}[a-z]{5})' + IE_NAME = '10play' + _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/?#]+/)+(?Ptpv\d{6}[a-z]{5})' _NETRC_MACHINE = '10play' _TESTS = [{ - 'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz', + # Geo-restricted to Australia + 'url': 'https://10play.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf', 'info_dict': { - 'id': '6336940246112', + 'id': '7440980000013868', 'ext': 'mp4', - 'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut', - 'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut', - 'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach', - 'duration': 74, - 'season': 'Season 41', - 'season_number': 41, - 'series': 'Neighbours', - 'thumbnail': r're:https://.*\.jpg', + 'title': 'Myles\'s Journey', + 'alt_title': 'Myles\'s Journey', + 'description': 'Relive Myles\'s epic Brains V Brawn II journey to reach the game\'s final two', 'uploader': 'Channel 10', - 'age_limit': 15, - 'timestamp': 1694386800, - 'upload_date': '20230910', 'uploader_id': '2199827728001', + 'age_limit': 15, + 'duration': 249, + 'thumbnail': r're:https://.+/.+\.jpg', + 'series': 'Australian Survivor', + 'season': 'Season 10', + 'season_number': 10, + 'timestamp': 1744629420, + 'upload_date': '20250414', }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Only available in Australia', + 'params': {'skip_download': 'm3u8'}, }, { + # Geo-restricted to Australia 'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp', 'info_dict': { 'id': '9000000000091177', @@ -45,17 +45,38 @@ class TenPlayIE(InfoExtractor): 'season': 'Season 42', 'season_number': 42, 'series': 'Neighbours', - 'thumbnail': r're:https://.*\.jpg', + 'thumbnail': r're:https://.+/.+\.jpg', 'age_limit': 15, 'timestamp': 1725517860, 'upload_date': '20240905', 'uploader': 'Channel 10', 'uploader_id': '2199827728001', }, - 'params': { - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + }, { + # Geo-restricted to Australia; upgrading the m3u8 quality fails and we need the fallback + 'url': 'https://10play.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt', + 'info_dict': { + 'id': '9000000000084116', + 'ext': 'mp4', + 'uploader': 'Channel 10', + 'uploader_id': '2199827728001', + 'duration': 1297, + 'title': 'The Tiny Chef Show - S1 Ep. 2', + 'alt_title': 'S1 Ep. 2 - Popcorn/banana', + 'description': 'md5:d4758b52b5375dfaa67a78261dcb5763', + 'age_limit': 0, + 'series': 'The Tiny Chef Show', + 'season_number': 1, + 'episode_number': 2, + 'timestamp': 1747957740, + 'thumbnail': r're:https://.+/.+\.jpg', + 'upload_date': '20250522', + 'season': 'Season 1', + 'episode': 'Episode 2', }, - 'skip': 'Only available in Australia', + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to download m3u8 information: HTTP Error 502'], }, { 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', 'only_matching': True, @@ -86,8 +107,11 @@ def _real_extract(self, url): if '10play-not-in-oz' in m3u8_url: self.raise_geo_restricted(countries=['AU']) # Attempt to get a higher quality stream - m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000') - formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4') + formats = self._extract_m3u8_formats( + m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000'), + content_id, 'mp4', fatal=False) + if not formats: + formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4') return { 'id': content_id, @@ -112,21 +136,22 @@ def _real_extract(self, url): class TenPlaySeasonIE(InfoExtractor): + IE_NAME = '10play:season' _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P[^/?#]+)/episodes/(?P[^/?#]+)/?(?:$|[?#])' _TESTS = [{ - 'url': 'https://10play.com.au/masterchef/episodes/season-14', + 'url': 'https://10play.com.au/masterchef/episodes/season-15', 'info_dict': { - 'title': 'Season 14', - 'id': 'MjMyOTIy', + 'title': 'Season 15', + 'id': 'MTQ2NjMxOQ==', }, - 'playlist_mincount': 64, + 'playlist_mincount': 50, }, { - 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2022', + 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024', 'info_dict': { - 'title': 'Season 2022', + 'title': 'Season 2024', 'id': 'Mjc0OTIw', }, - 'playlist_mincount': 256, + 'playlist_mincount': 159, }] def _entries(self, load_more_url, display_id=None): From d30a49742cfa22e61c47df4ac0e7334d648fb85d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 30 May 2025 18:16:47 -0500 Subject: [PATCH 003/103] [ie/youtube] Improve signature extraction debug output (#13327) Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 3d4bdfd56..d82225718 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -3398,8 +3398,15 @@ def build_fragments(f): self._decrypt_signature(encrypted_sig, video_id, player_url), ) except ExtractorError as e: - self.report_warning('Signature extraction failed: Some formats may be missing', - video_id=video_id, only_once=True) + self.report_warning( + f'Signature extraction failed: Some formats may be missing\n' + f' player = {player_url}\n' + f' {bug_reports_message(before="")}', + video_id=video_id, only_once=True) + self.write_debug( + f'{video_id}: Signature extraction failure info:\n' + f' encrypted sig = {encrypted_sig}\n' + f' player = {player_url}') self.write_debug(e, only_once=True) continue From 3fe72e9eea38d9a58211cde42cfaa577ce020e2c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 30 May 2025 18:20:59 -0500 Subject: [PATCH 004/103] [ie/weverse] Support login with oauth refresh tokens (#13284) Closes #7806 Authored by: bashonly --- yt_dlp/extractor/weverse.py | 243 +++++++++++++++++++++++++++--------- 1 file changed, 186 insertions(+), 57 deletions(-) diff --git a/yt_dlp/extractor/weverse.py b/yt_dlp/extractor/weverse.py index 42b1189fe..c13ab8e23 100644 --- a/yt_dlp/extractor/weverse.py +++ b/yt_dlp/extractor/weverse.py @@ -1,4 +1,5 @@ import base64 +import functools import hashlib import hmac import itertools @@ -17,99 +18,227 @@ UserNotLive, float_or_none, int_or_none, + join_nonempty, + jwt_decode_hs256, str_or_none, - traverse_obj, try_call, update_url_query, url_or_none, ) +from ..utils.traversal import require, traverse_obj class WeverseBaseIE(InfoExtractor): _NETRC_MACHINE = 'weverse' - _ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api' + _ACCOUNT_API_BASE = 'https://accountapi.weverse.io' + _CLIENT_PLATFORM = 'WEB' + _SIGNING_KEY = b'1b9cb6378d959b45714bec49971ade22e6e24e42' + _ACCESS_TOKEN_KEY = 'we2_access_token' + _REFRESH_TOKEN_KEY = 'we2_refresh_token' + _DEVICE_ID_KEY = 'we2_device_id' _API_HEADERS = { 'Accept': 'application/json', + 'Origin': 'https://weverse.io', 'Referer': 'https://weverse.io/', - 'WEV-device-Id': str(uuid.uuid4()), } + _LOGIN_HINT_TMPL = ( + 'You can log in using your refresh token with --username "{}" --password "REFRESH_TOKEN" ' + '(replace REFRESH_TOKEN with the actual value of the "{}" cookie found in your web browser). ' + 'You can add an optional username suffix, e.g. --username "{}" , ' + 'if you need to manage multiple accounts. ') + _LOGIN_ERRORS_MAP = { + 'login_required': 'This content is only available for logged-in users. ', + 'invalid_username': '"{}" is not valid login username for this extractor. ', + 'invalid_password': ( + 'Your password is not a valid refresh token. Make sure that ' + 'you are passing the refresh token, and NOT the access token. '), + 'no_refresh_token': ( + 'Your access token has expired and there is no refresh token available. ' + 'Refresh your session/cookies in the web browser and try again. '), + 'expired_refresh_token': ( + 'Your refresh token has expired. Log in to the site again using ' + 'your web browser to get a new refresh token or export fresh cookies. '), + } + _OAUTH_PREFIX = 'oauth' + _oauth_tokens = {} + _device_id = None - def _perform_login(self, username, password): - if self._API_HEADERS.get('Authorization'): - return - - headers = { - 'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a', - 'x-acc-app-version': '3.3.6', - 'x-acc-language': 'en', - 'x-acc-service-id': 'weverse', - 'x-acc-trace-id': str(uuid.uuid4()), - 'x-clog-user-device-id': str(uuid.uuid4()), + @property + def _oauth_headers(self): + return { + **self._API_HEADERS, + 'X-ACC-APP-SECRET': '5419526f1c624b38b10787e5c10b2a7a', + 'X-ACC-SERVICE-ID': 'weverse', + 'X-ACC-TRACE-ID': str(uuid.uuid4()), } - valid_username = traverse_obj(self._download_json( - f'{self._ACCOUNT_API_BASE}/v2/signup/email/status', None, note='Checking username', - query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword') - if not valid_username: - raise ExtractorError('Invalid username provided', expected=True) - headers['content-type'] = 'application/json' + @functools.cached_property + def _oauth_cache_key(self): + username = self._get_login_info()[0] + if not username: + return 'cookies' + return join_nonempty(self._OAUTH_PREFIX, username.partition('+')[2]) + + @property + def _is_logged_in(self): + return bool(self._oauth_tokens.get(self._ACCESS_TOKEN_KEY)) + + def _access_token_is_valid(self): + response = self._download_json( + f'{self._ACCOUNT_API_BASE}/api/v1/token/validate', None, + 'Validating access token', 'Unable to valid access token', + expected_status=401, headers={ + **self._oauth_headers, + 'Authorization': f'Bearer {self._oauth_tokens[self._ACCESS_TOKEN_KEY]}', + }) + return traverse_obj(response, ('expiresIn', {int}), default=0) > 60 + + def _token_is_expired(self, key): + is_expired = jwt_decode_hs256(self._oauth_tokens[key])['exp'] - time.time() < 3600 + if key == self._REFRESH_TOKEN_KEY or not is_expired: + return is_expired + return not self._access_token_is_valid() + + def _refresh_access_token(self): + if not self._oauth_tokens.get(self._REFRESH_TOKEN_KEY): + self._report_login_error('no_refresh_token') + if self._token_is_expired(self._REFRESH_TOKEN_KEY): + self._report_login_error('expired_refresh_token') + + headers = {'Content-Type': 'application/json'} + if self._is_logged_in: + headers['Authorization'] = f'Bearer {self._oauth_tokens[self._ACCESS_TOKEN_KEY]}' + try: - auth = self._download_json( - f'{self._ACCOUNT_API_BASE}/v3/auth/token/by-credentials', None, data=json.dumps({ - 'email': username, - 'otpSessionId': 'BY_PASS', - 'password': password, - }, separators=(',', ':')).encode(), headers=headers, note='Logging in') + response = self._download_json( + f'{self._ACCOUNT_API_BASE}/api/v1/token/refresh', None, + 'Refreshing access token', 'Unable to refresh access token', + headers={**self._oauth_headers, **headers}, + data=json.dumps({ + 'refreshToken': self._oauth_tokens[self._REFRESH_TOKEN_KEY], + }, separators=(',', ':')).encode()) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 401: - raise ExtractorError('Invalid password provided', expected=True) + self._oauth_tokens.clear() + if self._oauth_cache_key == 'cookies': + self.cookiejar.clear(domain='.weverse.io', path='/', name=self._ACCESS_TOKEN_KEY) + self.cookiejar.clear(domain='.weverse.io', path='/', name=self._REFRESH_TOKEN_KEY) + else: + self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, self._oauth_tokens) + self._report_login_error('expired_refresh_token') raise - WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {auth["accessToken"]}' + self._oauth_tokens.update(traverse_obj(response, { + self._ACCESS_TOKEN_KEY: ('accessToken', {str}, {require('access token')}), + self._REFRESH_TOKEN_KEY: ('refreshToken', {str}, {require('refresh token')}), + })) - def _real_initialize(self): - if self._API_HEADERS.get('Authorization'): + if self._oauth_cache_key == 'cookies': + self._set_cookie('.weverse.io', self._ACCESS_TOKEN_KEY, self._oauth_tokens[self._ACCESS_TOKEN_KEY]) + self._set_cookie('.weverse.io', self._REFRESH_TOKEN_KEY, self._oauth_tokens[self._REFRESH_TOKEN_KEY]) + else: + self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, self._oauth_tokens) + + def _get_authorization_header(self): + if not self._is_logged_in: + return {} + if self._token_is_expired(self._ACCESS_TOKEN_KEY): + self._refresh_access_token() + return {'Authorization': f'Bearer {self._oauth_tokens[self._ACCESS_TOKEN_KEY]}'} + + def _report_login_error(self, error_id): + error_msg = self._LOGIN_ERRORS_MAP[error_id] + username = self._get_login_info()[0] + + if error_id == 'invalid_username': + error_msg = error_msg.format(username) + username = f'{self._OAUTH_PREFIX}+{username}' + elif not username: + username = f'{self._OAUTH_PREFIX}+USERNAME' + + raise ExtractorError(join_nonempty( + error_msg, self._LOGIN_HINT_TMPL.format(self._OAUTH_PREFIX, self._REFRESH_TOKEN_KEY, username), + 'Or else you can u', self._login_hint(method='session_cookies')[1:], delim=''), expected=True) + + def _perform_login(self, username, password): + if self._is_logged_in: return - token = try_call(lambda: self._get_cookies('https://weverse.io/')['we2_access_token'].value) - if token: - WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {token}' + if username.partition('+')[0] != self._OAUTH_PREFIX: + self._report_login_error('invalid_username') + + self._oauth_tokens.update(self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key, default={})) + if self._is_logged_in and self._access_token_is_valid(): + return + + rt_key = self._REFRESH_TOKEN_KEY + if not self._oauth_tokens.get(rt_key) or self._token_is_expired(rt_key): + if try_call(lambda: jwt_decode_hs256(password)['scope']) != 'refresh': + self._report_login_error('invalid_password') + self._oauth_tokens[rt_key] = password + + self._refresh_access_token() + + def _real_initialize(self): + cookies = self._get_cookies('https://weverse.io/') + + if not self._device_id: + self._device_id = traverse_obj(cookies, (self._DEVICE_ID_KEY, 'value')) or str(uuid.uuid4()) + + if self._is_logged_in: + return + + self._oauth_tokens.update(traverse_obj(cookies, { + self._ACCESS_TOKEN_KEY: (self._ACCESS_TOKEN_KEY, 'value'), + self._REFRESH_TOKEN_KEY: (self._REFRESH_TOKEN_KEY, 'value'), + })) + if self._is_logged_in and not self._access_token_is_valid(): + self._refresh_access_token() def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'): # Ref: https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/2488.a09b41ff.chunk.js # From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js: - key = b'1b9cb6378d959b45714bec49971ade22e6e24e42' api_path = update_url_query(ep, { # 'gcc': 'US', 'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4', 'language': 'en', - 'os': 'WEB', - 'platform': 'WEB', + 'os': self._CLIENT_PLATFORM, + 'platform': self._CLIENT_PLATFORM, 'wpf': 'pc', }) - wmsgpad = int(time.time() * 1000) - wmd = base64.b64encode(hmac.HMAC( - key, f'{api_path[:255]}{wmsgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode() - headers = {'Content-Type': 'application/json'} if data else {} - try: - return self._download_json( - f'https://global.apis.naver.com/weverse/wevweb{api_path}', video_id, note=note, - data=data, headers={**self._API_HEADERS, **headers}, query={ - 'wmsgpad': wmsgpad, - 'wmd': wmd, - }) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - self.raise_login_required( - 'Session token has expired. Log in again or refresh cookies in browser') - elif isinstance(e.cause, HTTPError) and e.cause.status == 403: - if 'Authorization' in self._API_HEADERS: - raise ExtractorError('Your account does not have access to this content', expected=True) - self.raise_login_required() - raise + for is_retry in (False, True): + wmsgpad = int(time.time() * 1000) + wmd = base64.b64encode(hmac.HMAC( + self._SIGNING_KEY, f'{api_path[:255]}{wmsgpad}'.encode(), + digestmod=hashlib.sha1).digest()).decode() + + try: + return self._download_json( + f'https://global.apis.naver.com/weverse/wevweb{api_path}', video_id, note=note, + data=data, headers={ + **self._API_HEADERS, + **self._get_authorization_header(), + **({'Content-Type': 'application/json'} if data else {}), + 'WEV-device-Id': self._device_id, + }, query={ + 'wmsgpad': wmsgpad, + 'wmd': wmd, + }) + except ExtractorError as e: + if is_retry or not isinstance(e.cause, HTTPError): + raise + elif self._is_logged_in and e.cause.status == 401: + self._refresh_access_token() + continue + elif e.cause.status == 403: + if self._is_logged_in: + raise ExtractorError( + 'Your account does not have access to this content', expected=True) + self._report_login_error('login_required') + raise def _call_post_api(self, video_id): - path = '' if 'Authorization' in self._API_HEADERS else '/preview' + path = '' if self._is_logged_in else '/preview' return self._call_api(f'/post/v1.0/post-{video_id}{path}?fieldSet=postV1', video_id) def _get_community_id(self, channel): From 943083edcd3df45aaa597a6967bc6c95b720f54c Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Sun, 1 Jun 2025 13:26:33 -0400 Subject: [PATCH 005/103] [ie/adobepass] Fix Philo MSO authentication (#13335) Closes #2603 Authored by: Sipherdrakon --- yt_dlp/extractor/adobepass.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 91c40b32e..8c2d9d934 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1574,18 +1574,29 @@ def extract_redirect_url(html, url=None, fatal=False): post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Philo': # Philo has very unique authentication method - self._download_webpage( - 'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({ + self._request_webpage( + 'https://idp.philo.com/auth/init/login_code', video_id, + 'Requesting Philo auth code', data=json.dumps({ 'ident': username, 'device': 'web', 'send_confirm_link': False, 'send_token': True, - })) + 'device_ident': f'web-{uuid.uuid4().hex}', + 'include_login_link': True, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + philo_code = getpass.getpass('Type auth code you have received [Return]: ') - self._download_webpage( - 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({ - 'token': philo_code, - })) + self._request_webpage( + 'https://idp.philo.com/auth/update/login_code', video_id, + 'Submitting token', data=json.dumps({'token': philo_code}).encode(), + headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login') post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Verizon': From 85c8a405e3651dc041b758f4744d4fb3c4c55e01 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 1 Jun 2025 18:09:47 -0500 Subject: [PATCH 006/103] [ie] Improve JSON LD thumbnails extraction (#13368) Authored by: bashonly, doe1080 Co-authored-by: doe1080 <98906116+doe1080@users.noreply.github.com> --- test/test_InfoExtractor.py | 14 ++++++++++++++ yt_dlp/extractor/common.py | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c6ff6209a..bc89b2955 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -314,6 +314,20 @@ def test_search_json_ld_realworld(self): }, {}, ), + ( + # test thumbnail_url key without URL scheme + r''' +''', + { + 'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}], + }, + {}, + ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d5607296d..1174bd4f5 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1675,9 +1675,9 @@ def extract_video_object(e): 'ext': mimetype2ext(e.get('encodingFormat')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnails': [{'url': unescapeHTML(url)} - for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL')) - if url_or_none(url)], + 'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), { + 'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}), + })), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), # author can be an instance of 'Organization' or 'Person' types. From 148a1eb4c59e127965396c7a6e6acf1979de459e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 1 Jun 2025 18:18:24 -0500 Subject: [PATCH 007/103] [ie/odnoklassniki] Detect and raise when login is required (#13361) Closes #13360 Authored by: bashonly --- yt_dlp/extractor/odnoklassniki.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index d27d1c3f0..18eba42e6 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -273,6 +273,8 @@ def _extract_desktop(self, url): return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'})) elif error: raise ExtractorError(error, expected=True) + elif '>Access to this video is restricted' in webpage: + self.raise_login_required() player = self._parse_json( unescapeHTML(self._search_regex( @@ -429,7 +431,7 @@ def _extract_mobile(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - f'http://m.ok.ru/video/{video_id}', video_id, + f'https://m.ok.ru/video/{video_id}', video_id, note='Downloading mobile webpage') error = self._search_regex( From c723c4e5e78263df178dbe69844a3d05f3ef9e35 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 1 Jun 2025 18:20:29 -0500 Subject: [PATCH 008/103] [ie/vimeo] Extract subtitles from player subdomain (#13350) Closes #12198 Authored by: bashonly --- yt_dlp/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 09497b699..b268fad56 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -236,7 +236,7 @@ def _parse_config(self, config, video_id): for tt in (request.get('text_tracks') or []): subtitles.setdefault(tt['lang'], []).append({ 'ext': 'vtt', - 'url': urljoin('https://vimeo.com', tt['url']), + 'url': urljoin('https://player.vimeo.com/', tt['url']), }) thumbnails = [] From e1b6062f8c4a3fa33c65269d48d09ec78de765a2 Mon Sep 17 00:00:00 2001 From: barsnick Date: Tue, 3 Jun 2025 04:29:03 +0200 Subject: [PATCH 009/103] [ie/svt:play] Fix extractor (#13329) Closes #13312 Authored by: barsnick, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/svt.py | 134 +++++++++++--------------------- 2 files changed, 44 insertions(+), 91 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b0c52e0fc..34c98b537 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2017,7 +2017,6 @@ SverigesRadioPublicationIE, ) from .svt import ( - SVTIE, SVTPageIE, SVTPlayIE, SVTSeriesIE, diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py index 6a72f8d42..a48d7858d 100644 --- a/yt_dlp/extractor/svt.py +++ b/yt_dlp/extractor/svt.py @@ -6,10 +6,13 @@ determine_ext, dict_get, int_or_none, - traverse_obj, try_get, unified_timestamp, ) +from ..utils.traversal import ( + require, + traverse_obj, +) class SVTBaseIE(InfoExtractor): @@ -97,40 +100,8 @@ def _extract_video(self, video_info, video_id): } -class SVTIE(SVTBaseIE): - _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P\d+)&.*?\barticleId=(?P\d+)' - _EMBED_REGEX = [rf'(?: