From 1477dcd4e79d6fcfe4911de40ba5ae5c9f995034 Mon Sep 17 00:00:00 2001 From: wukko Date: Wed, 2 Apr 2025 17:35:01 +0600 Subject: [PATCH 1/5] api/tests/instagram: allow the private post test to fail sometimes the visibility status isn't returned --- api/src/util/tests/instagram.json | 1 + 1 file changed, 1 insertion(+) diff --git a/api/src/util/tests/instagram.json b/api/src/util/tests/instagram.json index 1df87b9a..4adcf6f8 100644 --- a/api/src/util/tests/instagram.json +++ b/api/src/util/tests/instagram.json @@ -123,6 +123,7 @@ { "name": "private instagram post", "url": "https://www.instagram.com/p/C5_A1TQNPrYw4c2g9KAUTPUl8RVHqiAdAcOOSY0", + "canFail": true, "params": {}, "expected": { "code": 400, From b1bde25dee2836de615d0d6e598d404789a5ab70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Vuong=20=F0=9F=8D=82?= Date: Sat, 29 Mar 2025 13:29:22 +0700 Subject: [PATCH 2/5] api/reddit: add support for short links --- api/src/processing/service-config.js | 4 +++- api/src/processing/service-patterns.js | 3 ++- api/src/processing/services/reddit.js | 19 ++++++++++++++++++- api/src/processing/url.js | 8 ++++++++ api/src/util/tests/reddit.json | 18 ++++++++++++++++++ 5 files changed, 49 insertions(+), 3 deletions(-) diff --git a/api/src/processing/service-config.js b/api/src/processing/service-config.js index 00fa4ebf..87a71c38 100644 --- a/api/src/processing/service-config.js +++ b/api/src/processing/service-config.js @@ -90,7 +90,9 @@ export const services = { "r/u_:user/comments/:id/:title", "r/u_:user/comments/:id/comment/:commentId", - "r/:sub/s/:shareId" + "r/:sub/s/:shareId", + + "video/:shortId", ], subdomains: "*", }, diff --git a/api/src/processing/service-patterns.js b/api/src/processing/service-patterns.js index 8735f123..2412fd46 100644 --- a/api/src/processing/service-patterns.js +++ b/api/src/processing/service-patterns.js @@ -23,7 +23,8 @@ export const testers = { pattern.id?.length <= 16 && !pattern.sub && !pattern.user || (pattern.sub?.length <= 22 && pattern.id?.length <= 16) || (pattern.user?.length <= 22 && pattern.id?.length <= 16) - || (pattern.sub?.length <= 22 && pattern.shareId?.length <= 16), + || (pattern.sub?.length <= 22 && pattern.shareId?.length <= 16) + || (pattern.shortId?.length <= 16), "rutube": pattern => (pattern.id?.length === 32 && pattern.key?.length <= 32) || diff --git a/api/src/processing/services/reddit.js b/api/src/processing/services/reddit.js index 50c78d35..3bd8e88f 100644 --- a/api/src/processing/services/reddit.js +++ b/api/src/processing/services/reddit.js @@ -50,6 +50,24 @@ async function getAccessToken() { export default async function(obj) { let params = obj; + const accessToken = await getAccessToken(); + + if (params.shortId) { + let url = await fetch(`https://www.reddit.com/video/${params.shortId}`, { + headers: { + 'User-Agent': genericUserAgent, + 'Authorization': `Bearer ${accessToken}` + } + }).then(r => r.url).catch(() => {}); + + if (!url) return { error: "fetch.fail" }; + + try { + params = extract(normalizeURL(url)).patternMatch; + } catch (error) { + return { error: "fetch.fail" }; + } + } if (!params.id && params.shareId) { params = await resolveRedirectingURL( @@ -63,7 +81,6 @@ export default async function(obj) { const url = new URL(`https://www.reddit.com/comments/${params.id}.json`); - const accessToken = await getAccessToken(); if (accessToken) url.hostname = 'oauth.reddit.com'; let data = await fetch( diff --git a/api/src/processing/url.js b/api/src/processing/url.js index 82299999..a0f70fed 100644 --- a/api/src/processing/url.js +++ b/api/src/processing/url.js @@ -106,6 +106,14 @@ function aliasURL(url) { url.pathname = `/share/${idPart.slice(-32)}`; } break; + + case "redd": + /* reddit short video links can be treated by changing https://v.redd.it/ + to https://reddit.com/video/.*/ + if (url.hostname === "v.redd.it" && parts.length === 2) { + url = new URL(`https://www.reddit.com/video/${parts[1]}`); + } + break; } return url; diff --git a/api/src/util/tests/reddit.json b/api/src/util/tests/reddit.json index 3afc6126..1dd10ee5 100644 --- a/api/src/util/tests/reddit.json +++ b/api/src/util/tests/reddit.json @@ -56,5 +56,23 @@ "code": 200, "status": "tunnel" } + }, + { + "name": "shortened video link", + "url": "https://v.redd.it/ifg2emt5ck0e1", + "params": {}, + "expected": { + "code": 200, + "status": "tunnel" + } + }, + { + "name": "shortened video link (alternative)", + "url": "https://reddit.com/video/ifg2emt5ck0e1", + "params": {}, + "expected": { + "code": 200, + "status": "tunnel" + } } ] \ No newline at end of file From a6240d0192053c8fef2e2642a14017862bdcaa7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Vuong=20=F0=9F=8D=82?= Date: Sat, 29 Mar 2025 20:11:56 +0700 Subject: [PATCH 3/5] api/url: replace user-agent argument with `headers` in redirect helpers --- api/src/misc/utils.js | 4 ++-- api/src/processing/services/instagram.js | 2 +- api/src/processing/services/reddit.js | 21 ++++++--------------- api/src/processing/url.js | 4 ++-- 4 files changed, 11 insertions(+), 20 deletions(-) diff --git a/api/src/misc/utils.js b/api/src/misc/utils.js index 76d7a3eb..a7c523a4 100644 --- a/api/src/misc/utils.js +++ b/api/src/misc/utils.js @@ -1,11 +1,11 @@ import { request } from 'undici'; const redirectStatuses = new Set([301, 302, 303, 307, 308]); -export async function getRedirectingURL(url, dispatcher, userAgent) { +export async function getRedirectingURL(url, dispatcher, headers) { const location = await request(url, { dispatcher, method: 'HEAD', - headers: { 'user-agent': userAgent } + headers: headers }).then(r => { if (redirectStatuses.has(r.statusCode) && r.headers['location']) { return r.headers['location']; diff --git a/api/src/processing/services/instagram.js b/api/src/processing/services/instagram.js index 9cc7dbdf..0fa25527 100644 --- a/api/src/processing/services/instagram.js +++ b/api/src/processing/services/instagram.js @@ -527,7 +527,7 @@ export default function instagram(obj) { // for some reason instagram decides to return HTML // instead of a redirect when requesting with a normal // browser user-agent - 'curl/7.88.1' + {'User-Agent': 'curl/7.88.1'} ).then(match => instagram({ ...obj, ...match, shareId: undefined diff --git a/api/src/processing/services/reddit.js b/api/src/processing/services/reddit.js index 3bd8e88f..e1eba244 100644 --- a/api/src/processing/services/reddit.js +++ b/api/src/processing/services/reddit.js @@ -53,27 +53,18 @@ export default async function(obj) { const accessToken = await getAccessToken(); if (params.shortId) { - let url = await fetch(`https://www.reddit.com/video/${params.shortId}`, { - headers: { - 'User-Agent': genericUserAgent, - 'Authorization': `Bearer ${accessToken}` - } - }).then(r => r.url).catch(() => {}); - - if (!url) return { error: "fetch.fail" }; - - try { - params = extract(normalizeURL(url)).patternMatch; - } catch (error) { - return { error: "fetch.fail" }; - } + params = await resolveRedirectingURL( + `https://www.reddit.com/video/${params.shortId}`, + obj.dispatcher, + {'User-Agent': genericUserAgent, 'Authorization': `Bearer ${accessToken}`} + ); } if (!params.id && params.shareId) { params = await resolveRedirectingURL( `https://www.reddit.com/r/${params.sub}/s/${params.shareId}`, obj.dispatcher, - genericUserAgent + {'User-Agent': genericUserAgent} ); } diff --git a/api/src/processing/url.js b/api/src/processing/url.js index a0f70fed..86c333f6 100644 --- a/api/src/processing/url.js +++ b/api/src/processing/url.js @@ -239,11 +239,11 @@ export function extract(url) { return { host, patternMatch }; } -export async function resolveRedirectingURL(url, dispatcher, userAgent) { +export async function resolveRedirectingURL(url, dispatcher, headers) { const originalService = getHostIfValid(normalizeURL(url)); if (!originalService) return; - const canonicalURL = await getRedirectingURL(url, dispatcher, userAgent); + const canonicalURL = await getRedirectingURL(url, dispatcher, headers); if (!canonicalURL) return; const { host, patternMatch } = extract(normalizeURL(canonicalURL)); From f5df78ffec4c4b5b37aa73a302bd0535719032f7 Mon Sep 17 00:00:00 2001 From: jj Date: Wed, 2 Apr 2025 12:29:18 +0000 Subject: [PATCH 4/5] api/utils: retry getting redirecting url with fetch() if request() fails --- api/src/misc/utils.js | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/api/src/misc/utils.js b/api/src/misc/utils.js index a7c523a4..62bf6351 100644 --- a/api/src/misc/utils.js +++ b/api/src/misc/utils.js @@ -2,16 +2,25 @@ import { request } from 'undici'; const redirectStatuses = new Set([301, 302, 303, 307, 308]); export async function getRedirectingURL(url, dispatcher, headers) { - const location = await request(url, { + const params = { dispatcher, method: 'HEAD', - headers: headers - }).then(r => { + headers, + redirect: 'manual' + }; + + let location = await request(url, params).then(r => { if (redirectStatuses.has(r.statusCode) && r.headers['location']) { return r.headers['location']; } }).catch(() => null); + location ??= await fetch(url, params).then(r => { + if (redirectStatuses.has(r.status) && r.headers.has('location')) { + return r.headers.get('location'); + } + }).catch(() => null); + return location; } From 07f81c5d1d1e27e8278d712e14f61995e2ba6e72 Mon Sep 17 00:00:00 2001 From: jj Date: Wed, 2 Apr 2025 12:35:45 +0000 Subject: [PATCH 5/5] api/reddit: clean up duplicated headers --- api/src/processing/services/reddit.js | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/api/src/processing/services/reddit.js b/api/src/processing/services/reddit.js index e1eba244..0f506eea 100644 --- a/api/src/processing/services/reddit.js +++ b/api/src/processing/services/reddit.js @@ -51,20 +51,23 @@ async function getAccessToken() { export default async function(obj) { let params = obj; const accessToken = await getAccessToken(); + const headers = { + 'user-agent': genericUserAgent, + authorization: accessToken && `Bearer ${accessToken}`, + accept: 'application/json' + }; if (params.shortId) { params = await resolveRedirectingURL( `https://www.reddit.com/video/${params.shortId}`, - obj.dispatcher, - {'User-Agent': genericUserAgent, 'Authorization': `Bearer ${accessToken}`} + obj.dispatcher, headers ); } if (!params.id && params.shareId) { params = await resolveRedirectingURL( `https://www.reddit.com/r/${params.sub}/s/${params.shareId}`, - obj.dispatcher, - {'User-Agent': genericUserAgent} + obj.dispatcher, headers ); } @@ -75,13 +78,7 @@ export default async function(obj) { if (accessToken) url.hostname = 'oauth.reddit.com'; let data = await fetch( - url, { - headers: { - 'User-Agent': genericUserAgent, - accept: 'application/json', - authorization: accessToken && `Bearer ${accessToken}` - } - } + url, { headers } ).then(r => r.json()).catch(() => {}); if (!data || !Array.isArray(data)) {