From 9639c599f0ed5c15cb7b79b222a70bdf6d0da5e7 Mon Sep 17 00:00:00 2001 From: wukko Date: Tue, 11 Mar 2025 10:00:24 +0600 Subject: [PATCH 01/12] api/twitter: handle empty body properly --- api/src/processing/services/twitter.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/api/src/processing/services/twitter.js b/api/src/processing/services/twitter.js index b4a1d557..0f5f15e6 100644 --- a/api/src/processing/services/twitter.js +++ b/api/src/processing/services/twitter.js @@ -112,7 +112,17 @@ export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { // get new token & retry if old one expired if ([403, 429].includes(tweet.status)) { guestToken = await getGuestToken(dispatcher, true); - tweet = await requestTweet(dispatcher, id, guestToken) + if (cookie) { + tweet = await requestTweet(dispatcher, id, guestToken, cookie); + } else { + tweet = await requestTweet(dispatcher, id, guestToken); + } + } + + const contentLength = tweet.headers.get("content-length"); + + if (!contentLength || tweet.headers.get("content-length") === '0') { + return { error: "content.post.unavailable" } } tweet = await tweet.json(); From 69dd37c5c3368765e4190d65ba497588cc96c804 Mon Sep 17 00:00:00 2001 From: wukko Date: Tue, 11 Mar 2025 10:25:49 +0600 Subject: [PATCH 02/12] api/twitter: handle 403 with no cookie in `requestTweet()` --- api/src/processing/services/twitter.js | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/api/src/processing/services/twitter.js b/api/src/processing/services/twitter.js index 0f5f15e6..8fd26ee3 100644 --- a/api/src/processing/services/twitter.js +++ b/api/src/processing/services/twitter.js @@ -89,13 +89,16 @@ const requestTweet = async(dispatcher, tweetId, token, cookie) => { // we might have been missing the `ct0` cookie, retry if (result.status === 403 && result.headers.get('set-cookie')) { - result = await fetch(graphqlTweetURL, { - headers: { - ...headers, - 'x-csrf-token': cookie.values().ct0 - }, - dispatcher - }); + const cookieValues = cookie?.values(); + if (cookieValues?.ct0) { + result = await fetch(graphqlTweetURL, { + headers: { + ...headers, + 'x-csrf-token': cookieValues.ct0 + }, + dispatcher + }); + } } return result From 75b498ed77e3ad917245cb0f9220d8a7b9b7e269 Mon Sep 17 00:00:00 2001 From: wukko Date: Tue, 11 Mar 2025 12:34:04 +0600 Subject: [PATCH 03/12] api/twitter: add fallback to syndication api it's back yet again, now for good, i suppose --- api/src/processing/services/twitter.js | 126 ++++++++++++++++++------- 1 file changed, 93 insertions(+), 33 deletions(-) diff --git a/api/src/processing/services/twitter.js b/api/src/processing/services/twitter.js index 8fd26ee3..36ab2470 100644 --- a/api/src/processing/services/twitter.js +++ b/api/src/processing/services/twitter.js @@ -24,6 +24,11 @@ const badContainerEnd = new Date(1702605600000); function needsFixing(media) { const representativeId = media.source_status_id_str ?? media.id_str; + + // syndication api doesn't have media ids in its response, + // so we just assume it's all good + if (!representativeId) return false; + const mediaTimestamp = new Date( Number((BigInt(representativeId) >> 22n) + TWITTER_EPOCH) ); @@ -53,6 +58,25 @@ const getGuestToken = async (dispatcher, forceReload = false) => { } } +const requestSyndication = async(dispatcher, tweetId) => { + // thank you + // https://github.com/yt-dlp/yt-dlp/blob/05c8023a27dd37c49163c0498bf98e3e3c1cb4b9/yt_dlp/extractor/twitter.py#L1334 + const token = (id) => ((Number(id) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, ''); + const syndicationUrl = new URL("https://cdn.syndication.twimg.com/tweet-result"); + + syndicationUrl.searchParams.set("id", tweetId); + syndicationUrl.searchParams.set("token", token(tweetId)); + + const result = await fetch(syndicationUrl, { + headers: { + "user-agent": genericUserAgent + }, + dispatcher + }); + + return result; +} + const requestTweet = async(dispatcher, tweetId, token, cookie) => { const graphqlTweetURL = new URL(graphqlURL); @@ -87,7 +111,7 @@ const requestTweet = async(dispatcher, tweetId, token, cookie) => { let result = await fetch(graphqlTweetURL, { headers, dispatcher }); updateCookie(cookie, result.headers); - // we might have been missing the `ct0` cookie, retry + // we might have been missing the ct0 cookie, retry if (result.status === 403 && result.headers.get('set-cookie')) { const cookieValues = cookie?.values(); if (cookieValues?.ct0) { @@ -104,12 +128,31 @@ const requestTweet = async(dispatcher, tweetId, token, cookie) => { return result } +const testResponse = (result) => { + const contentLength = result.headers.get("content-length"); + + if (!contentLength || contentLength === '0') { + return false; + } + + if (!result.headers.get("content-type").startsWith("application/json")) { + return false; + } + + return true; +} + export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { const cookie = await getCookie('twitter'); + let syndication = false; + let guestToken = await getGuestToken(dispatcher); if (!guestToken) return { error: "fetch.fail" }; + // for now we assume that graphql api will come back after some time, + // so we try it first + let tweet = await requestTweet(dispatcher, id, guestToken); // get new token & retry if old one expired @@ -122,48 +165,65 @@ export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { } } - const contentLength = tweet.headers.get("content-length"); + const testGraphql = testResponse(tweet); - if (!contentLength || tweet.headers.get("content-length") === '0') { - return { error: "content.post.unavailable" } + // if graphql requests fail, then resort to tweet embed api + if (!testGraphql) { + syndication = true; + tweet = await requestSyndication(dispatcher, id); + + const testSyndication = testResponse(tweet); + + // if even syndication request failed, then cry out loud + if (!testSyndication) { + return { error: "fetch.fail" }; + } } tweet = await tweet.json(); - let tweetTypename = tweet?.data?.tweetResult?.result?.__typename; + let media; - if (!tweetTypename) { - return { error: "fetch.empty" } - } + if (!syndication) { + let tweetTypename = tweet?.data?.tweetResult?.result?.__typename; - if (tweetTypename === "TweetUnavailable") { - const reason = tweet?.data?.tweetResult?.result?.reason; - switch(reason) { - case "Protected": - return { error: "content.post.private" } - case "NsfwLoggedOut": - if (cookie) { - tweet = await requestTweet(dispatcher, id, guestToken, cookie); - tweet = await tweet.json(); - tweetTypename = tweet?.data?.tweetResult?.result?.__typename; - } else return { error: "content.post.age" } + if (!tweetTypename) { + return { error: "fetch.empty" } } + + if (tweetTypename === "TweetUnavailable") { + const reason = tweet?.data?.tweetResult?.result?.reason; + switch(reason) { + case "Protected": + return { error: "content.post.private" } + case "NsfwLoggedOut": + if (cookie) { + tweet = await requestTweet(dispatcher, id, guestToken, cookie); + tweet = await tweet.json(); + tweetTypename = tweet?.data?.tweetResult?.result?.__typename; + } else return { error: "content.post.age" } + } + } + + if (!["Tweet", "TweetWithVisibilityResults"].includes(tweetTypename)) { + return { error: "content.post.unavailable" } + } + + let tweetResult = tweet.data.tweetResult.result, + baseTweet = tweetResult.legacy, + repostedTweet = baseTweet?.retweeted_status_result?.result.legacy.extended_entities; + + if (tweetTypename === "TweetWithVisibilityResults") { + baseTweet = tweetResult.tweet.legacy; + repostedTweet = baseTweet?.retweeted_status_result?.result.tweet.legacy.extended_entities; + } + + media = (repostedTweet?.media || baseTweet?.extended_entities?.media); + } else { + media = tweet.mediaDetails; } - if (!["Tweet", "TweetWithVisibilityResults"].includes(tweetTypename)) { - return { error: "content.post.unavailable" } - } - - let tweetResult = tweet.data.tweetResult.result, - baseTweet = tweetResult.legacy, - repostedTweet = baseTweet?.retweeted_status_result?.result.legacy.extended_entities; - - if (tweetTypename === "TweetWithVisibilityResults") { - baseTweet = tweetResult.tweet.legacy; - repostedTweet = baseTweet?.retweeted_status_result?.result.tweet.legacy.extended_entities; - } - - let media = (repostedTweet?.media || baseTweet?.extended_entities?.media); + if (!media) return { error: "fetch.empty" } // check if there's a video at given index (/video/) if (index >= 0 && index < media?.length) { From 30460586c4eef58d5ce0359bc9bf634c9e5547de Mon Sep 17 00:00:00 2001 From: wukko Date: Tue, 11 Mar 2025 12:34:07 +0600 Subject: [PATCH 04/12] api/tests/twitter: add a gif test --- api/src/util/tests/twitter.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/api/src/util/tests/twitter.json b/api/src/util/tests/twitter.json index 4fc5900f..4139e39d 100644 --- a/api/src/util/tests/twitter.json +++ b/api/src/util/tests/twitter.json @@ -169,6 +169,15 @@ "status": "tunnel" } }, + { + "name": "gif", + "url": "https://x.com/thelastromances/status/1897839691212202479", + "params": {}, + "expected": { + "code": 200, + "status": "tunnel" + } + }, { "name": "inexistent post", "url": "https://twitter.com/test/status/9487653", From 69421a11ad00c8e4d6b03bba9c10417144585f71 Mon Sep 17 00:00:00 2001 From: wukko Date: Tue, 11 Mar 2025 13:58:24 +0600 Subject: [PATCH 05/12] api/twitter: refactor, move graphql media extraction to a function --- api/src/processing/services/twitter.js | 85 +++++++++++++------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/api/src/processing/services/twitter.js b/api/src/processing/services/twitter.js index 36ab2470..e96c0dad 100644 --- a/api/src/processing/services/twitter.js +++ b/api/src/processing/services/twitter.js @@ -128,6 +128,43 @@ const requestTweet = async(dispatcher, tweetId, token, cookie) => { return result } +const extractGraphqlMedia = async (tweet, dispatcher, id, guestToken, cookie) => { + let tweetTypename = tweet?.data?.tweetResult?.result?.__typename; + + if (!tweetTypename) { + return { error: "fetch.empty" } + } + + if (tweetTypename === "TweetUnavailable") { + const reason = tweet?.data?.tweetResult?.result?.reason; + switch(reason) { + case "Protected": + return { error: "content.post.private" }; + case "NsfwLoggedOut": + if (cookie) { + tweet = await requestTweet(dispatcher, id, guestToken, cookie); + tweet = await tweet.json(); + tweetTypename = tweet?.data?.tweetResult?.result?.__typename; + } else return { error: "content.post.age" }; + } + } + + if (!["Tweet", "TweetWithVisibilityResults"].includes(tweetTypename)) { + return { error: "content.post.unavailable" } + } + + let tweetResult = tweet.data.tweetResult.result, + baseTweet = tweetResult.legacy, + repostedTweet = baseTweet?.retweeted_status_result?.result.legacy.extended_entities; + + if (tweetTypename === "TweetWithVisibilityResults") { + baseTweet = tweetResult.tweet.legacy; + repostedTweet = baseTweet?.retweeted_status_result?.result.tweet.legacy.extended_entities; + } + + media = (repostedTweet?.media || baseTweet?.extended_entities?.media); +} + const testResponse = (result) => { const contentLength = result.headers.get("content-length"); @@ -182,48 +219,12 @@ export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { tweet = await tweet.json(); - let media; + const media = + syndication + ? tweet.mediaDetails + : await extractGraphqlMedia(tweet, dispatcher, id, guestToken, cookie); - if (!syndication) { - let tweetTypename = tweet?.data?.tweetResult?.result?.__typename; - - if (!tweetTypename) { - return { error: "fetch.empty" } - } - - if (tweetTypename === "TweetUnavailable") { - const reason = tweet?.data?.tweetResult?.result?.reason; - switch(reason) { - case "Protected": - return { error: "content.post.private" } - case "NsfwLoggedOut": - if (cookie) { - tweet = await requestTweet(dispatcher, id, guestToken, cookie); - tweet = await tweet.json(); - tweetTypename = tweet?.data?.tweetResult?.result?.__typename; - } else return { error: "content.post.age" } - } - } - - if (!["Tweet", "TweetWithVisibilityResults"].includes(tweetTypename)) { - return { error: "content.post.unavailable" } - } - - let tweetResult = tweet.data.tweetResult.result, - baseTweet = tweetResult.legacy, - repostedTweet = baseTweet?.retweeted_status_result?.result.legacy.extended_entities; - - if (tweetTypename === "TweetWithVisibilityResults") { - baseTweet = tweetResult.tweet.legacy; - repostedTweet = baseTweet?.retweeted_status_result?.result.tweet.legacy.extended_entities; - } - - media = (repostedTweet?.media || baseTweet?.extended_entities?.media); - } else { - media = tweet.mediaDetails; - } - - if (!media) return { error: "fetch.empty" } + if (!media) return { error: "fetch.empty" }; // check if there's a video at given index (/video/) if (index >= 0 && index < media?.length) { @@ -236,7 +237,7 @@ export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { service: "twitter", type: "proxy", url, filename, - }) + }); switch (media?.length) { case undefined: From 9579c3dd08e7cd49acaa1407cc0d87e3da0506b6 Mon Sep 17 00:00:00 2001 From: wukko Date: Tue, 11 Mar 2025 13:59:59 +0600 Subject: [PATCH 06/12] api/twitter: fix return in extractGraphqlMedia --- api/src/processing/services/twitter.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/processing/services/twitter.js b/api/src/processing/services/twitter.js index e96c0dad..4fb57cc5 100644 --- a/api/src/processing/services/twitter.js +++ b/api/src/processing/services/twitter.js @@ -162,7 +162,7 @@ const extractGraphqlMedia = async (tweet, dispatcher, id, guestToken, cookie) => repostedTweet = baseTweet?.retweeted_status_result?.result.tweet.legacy.extended_entities; } - media = (repostedTweet?.media || baseTweet?.extended_entities?.media); + return (repostedTweet?.media || baseTweet?.extended_entities?.media); } const testResponse = (result) => { From 39b6bb2593c9614d88e9cd91327460dd10e5a74c Mon Sep 17 00:00:00 2001 From: wukko Date: Tue, 11 Mar 2025 14:01:34 +0600 Subject: [PATCH 07/12] api/twitter: change const to let for media --- api/src/processing/services/twitter.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/processing/services/twitter.js b/api/src/processing/services/twitter.js index 4fb57cc5..a4f4505e 100644 --- a/api/src/processing/services/twitter.js +++ b/api/src/processing/services/twitter.js @@ -219,7 +219,7 @@ export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { tweet = await tweet.json(); - const media = + let media = syndication ? tweet.mediaDetails : await extractGraphqlMedia(tweet, dispatcher, id, guestToken, cookie); From 440d039e2cafa12051e7e6c1912a1673ed1eb533 Mon Sep 17 00:00:00 2001 From: wukko Date: Tue, 11 Mar 2025 14:10:01 +0600 Subject: [PATCH 08/12] api/package: bump version to 10.7.8 --- api/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/package.json b/api/package.json index fc5304ed..94ae6310 100644 --- a/api/package.json +++ b/api/package.json @@ -1,7 +1,7 @@ { "name": "@imput/cobalt-api", "description": "save what you love", - "version": "10.7.7", + "version": "10.7.8", "author": "imput", "exports": "./src/cobalt.js", "type": "module", From d00d94f3dcdd22305bbf51c540ec74d254cc241e Mon Sep 17 00:00:00 2001 From: hyperdefined Date: Wed, 12 Mar 2025 07:35:27 -0400 Subject: [PATCH 09/12] api/pinterest: fix video parsing (#1153) fixes #1148 --- api/src/processing/services/pinterest.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/processing/services/pinterest.js b/api/src/processing/services/pinterest.js index ea4275cb..15566cc4 100644 --- a/api/src/processing/services/pinterest.js +++ b/api/src/processing/services/pinterest.js @@ -23,7 +23,7 @@ export default async function(o) { const videoLink = [...html.matchAll(videoRegex)] .map(([, link]) => link) - .find(a => a.endsWith('.mp4') && a.includes('720p')); + .find(a => a.endsWith('.mp4')); if (videoLink) return { urls: videoLink, From 2ebe2899be68274d4e76526126a3c6984aa05127 Mon Sep 17 00:00:00 2001 From: wukko Date: Thu, 13 Mar 2025 13:23:03 +0600 Subject: [PATCH 10/12] api/youtube: return an appropriate error if a video is locked behind DRM --- api/src/processing/services/youtube.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api/src/processing/services/youtube.js b/api/src/processing/services/youtube.js index a4c54a82..b12d2cee 100644 --- a/api/src/processing/services/youtube.js +++ b/api/src/processing/services/youtube.js @@ -428,6 +428,10 @@ export default async function (o) { } } + if (video?.drm_families || audio?.drm_families) { + return { error: "youtube.drm" }; + } + const fileMetadata = { title: basicInfo.title.trim(), artist: basicInfo.author.replace("- Topic", "").trim() From 5900d6aa4a55735b7ab0f6c594fff91b5c3b56c8 Mon Sep 17 00:00:00 2001 From: wukko Date: Thu, 13 Mar 2025 13:30:05 +0600 Subject: [PATCH 11/12] web/i18n/error: add youtube drm error --- web/i18n/en/error.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/i18n/en/error.json b/web/i18n/en/error.json index 2788d9e4..fe276aa0 100644 --- a/web/i18n/en/error.json +++ b/web/i18n/en/error.json @@ -67,5 +67,6 @@ "api.youtube.token_expired": "couldn't get this video because the youtube token expired and i couldn't refresh it. try again in a few seconds, but if it still doesn't work, tell the instance owner about this error!", "api.youtube.no_hls_streams": "couldn't find any matching HLS streams for this video. try downloading it without HLS!", "api.youtube.api_error": "youtube updated something about its api and i couldn't get any info about this video. try again in a few seconds, but if this issue sticks, please report it!", - "api.youtube.temporary_disabled": "youtube downloading is temporarily disabled due to restrictions from youtube's side. we're already looking for ways to go around them.\n\nwe apologize for the inconvenience and are doing our best to restore this functionality. check cobalt's socials or github for timely updates!" + "api.youtube.temporary_disabled": "youtube downloading is temporarily disabled due to restrictions from youtube's side. we're already looking for ways to go around them.\n\nwe apologize for the inconvenience and are doing our best to restore this functionality. check cobalt's socials or github for timely updates!", + "api.youtube.drm": "this youtube video is protected by widevine DRM, so i can't download it. try a different link!" } From aba23f8655abeb00ef6c6bea9ec6fc2f1ee96731 Mon Sep 17 00:00:00 2001 From: wukko Date: Thu, 13 Mar 2025 14:56:31 +0600 Subject: [PATCH 12/12] api/package: bump version to 10.7.9 --- api/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/package.json b/api/package.json index 94ae6310..1e8f506a 100644 --- a/api/package.json +++ b/api/package.json @@ -1,7 +1,7 @@ { "name": "@imput/cobalt-api", "description": "save what you love", - "version": "10.7.8", + "version": "10.7.9", "author": "imput", "exports": "./src/cobalt.js", "type": "module",