diff --git a/.github/workflows/docker-develop.yml b/.github/workflows/docker-develop.yml index e89eeae0..43bfc8d1 100644 --- a/.github/workflows/docker-develop.yml +++ b/.github/workflows/docker-develop.yml @@ -1,4 +1,4 @@ -name: Build Docker development image +name: Build development Docker image on: workflow_dispatch: diff --git a/.github/workflows/docker-staging.yml b/.github/workflows/docker-staging.yml new file mode 100644 index 00000000..572a9855 --- /dev/null +++ b/.github/workflows/docker-staging.yml @@ -0,0 +1,55 @@ +name: Build staging Docker image + +on: + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Get release metadata + id: release-meta + run: | + version=$(cat package.json | jq -r .version) + echo "commit_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + echo "version=$version" >> $GITHUB_OUTPUT + echo "major_version=$(echo "$version" | cut -d. -f1)" >> $GITHUB_OUTPUT + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + tags: type=raw,value=staging + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index e25378b3..914edf2f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,4 +1,4 @@ -name: Build Docker image +name: Build release Docker image on: workflow_dispatch: diff --git a/README.md b/README.md index 9a5a05e7..795eb7e3 100644 --- a/README.md +++ b/README.md @@ -36,12 +36,10 @@ this monorepo includes source code for api, frontend, and related packages: - [packages tree](/packages/) it also includes documentation in the [docs tree](/docs/): -- [cobalt api documentation](/docs/api.md) - [how to run a cobalt instance](/docs/run-an-instance.md) -- [how to protect a cobalt instance](/docs/protect-an-instance.md) (recommended if you host a public instance) - -### thank you -cobalt is sponsored by [royalehosting.net](https://royalehosting.net/?partner=cobalt). a part of our infrastructure is hosted on their network. we really appreciate their kindness and support! +- [how to protect a cobalt instance](/docs/protect-an-instance.md) +- [cobalt api instance environment variables](/docs/api-env-variables.md) +- [cobalt api documentation](/docs/api.md) ### ethics cobalt is a tool that makes downloading public content easier. it takes **zero liability**. @@ -55,6 +53,9 @@ same content can be downloaded via dev tools of any modern web browser. ### contributing if you're considering contributing to cobalt, first of all, thank you! check the [contribution guidelines here](/CONTRIBUTING.md) before getting started, they'll help you do your best right away. +### thank you +cobalt is sponsored by [royalehosting.net](https://royalehosting.net/?partner=cobalt). a part of our infrastructure is hosted on their network. we really appreciate their kindness and support! + ### licenses for relevant licensing information, see the [api](api/README.md) and [web](web/README.md) READMEs. unless specified otherwise, the remainder of this repository is licensed under [AGPL-3.0](LICENSE). diff --git a/api/package.json b/api/package.json index 9e58ef16..2b58aee3 100644 --- a/api/package.json +++ b/api/package.json @@ -1,7 +1,7 @@ { "name": "@imput/cobalt-api", "description": "save what you love", - "version": "10.7.5", + "version": "10.9.1", "author": "imput", "exports": "./src/cobalt.js", "type": "module", @@ -11,7 +11,6 @@ "scripts": { "start": "node src/cobalt", "test": "node src/util/test", - "token:youtube": "node src/util/generate-youtube-tokens", "token:jwt": "node src/util/generate-jwt-secret" }, "repository": { @@ -39,7 +38,7 @@ "set-cookie-parser": "2.6.0", "undici": "^5.19.1", "url-pattern": "1.0.3", - "youtubei.js": "^13.0.0", + "youtubei.js": "^13.3.0", "zod": "^3.23.8" }, "optionalDependencies": { diff --git a/api/src/config.js b/api/src/config.js index 191e8441..bb4994c0 100644 --- a/api/src/config.js +++ b/api/src/config.js @@ -1,3 +1,4 @@ +import { Constants } from "youtubei.js"; import { getVersion } from "@imput/version-info"; import { services } from "./processing/service-config.js"; import { supportsReusePort } from "./misc/cluster.js"; @@ -27,6 +28,9 @@ const env = { rateLimitWindow: (process.env.RATELIMIT_WINDOW && parseInt(process.env.RATELIMIT_WINDOW)) || 60, rateLimitMax: (process.env.RATELIMIT_MAX && parseInt(process.env.RATELIMIT_MAX)) || 20, + sessionRateLimitWindow: (process.env.SESSION_RATELIMIT_WINDOW && parseInt(process.env.SESSION_RATELIMIT_WINDOW)) || 60, + sessionRateLimit: (process.env.SESSION_RATELIMIT && parseInt(process.env.SESSION_RATELIMIT)) || 10, + durationLimit: (process.env.DURATION_LIMIT && parseInt(process.env.DURATION_LIMIT)) || 10800, streamLifespan: (process.env.TUNNEL_LIFESPAN && parseInt(process.env.TUNNEL_LIFESPAN)) || 90, @@ -52,6 +56,11 @@ const env = { keyReloadInterval: 900, enabledServices, + + customInnertubeClient: process.env.CUSTOM_INNERTUBE_CLIENT, + ytSessionServer: process.env.YOUTUBE_SESSION_SERVER, + ytSessionReloadInterval: 300, + ytSessionInnertubeClient: process.env.YOUTUBE_SESSION_INNERTUBE_CLIENT, } const genericUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"; @@ -74,6 +83,12 @@ if (env.instanceCount > 1 && !env.redisURL) { throw new Error('SO_REUSEPORT is not supported'); } +if (env.customInnertubeClient && !Constants.SUPPORTED_CLIENTS.includes(env.customInnertubeClient)) { + console.error("CUSTOM_INNERTUBE_CLIENT is invalid. Provided client is not supported."); + console.error(`Supported clients are: ${Constants.SUPPORTED_CLIENTS.join(', ')}\n`); + throw new Error("Invalid CUSTOM_INNERTUBE_CLIENT"); +} + export { env, genericUserAgent, diff --git a/api/src/core/api.js b/api/src/core/api.js index e4d3dfcf..f1b54422 100644 --- a/api/src/core/api.js +++ b/api/src/core/api.js @@ -18,8 +18,10 @@ import { verifyTurnstileToken } from "../security/turnstile.js"; import { friendlyServiceName } from "../processing/service-alias.js"; import { verifyStream, getInternalStream } from "../stream/manage.js"; import { createResponse, normalizeRequest, getIP } from "../processing/request.js"; + import * as APIKeys from "../security/api-keys.js"; import * as Cookies from "../processing/cookie/manager.js"; +import * as YouTubeSession from "../processing/helpers/youtube-session.js"; const git = { branch: await getBranch(), @@ -72,8 +74,8 @@ export const runAPI = async (express, app, __dirname, isPrimary = true) => { const keyGenerator = (req) => hashHmac(getIP(req), 'rate').toString('base64url'); const sessionLimiter = rateLimit({ - windowMs: 60000, - limit: 10, + windowMs: env.sessionRateLimitWindow * 1000, + limit: env.sessionRateLimit, standardHeaders: 'draft-6', legacyHeaders: false, keyGenerator, @@ -89,7 +91,7 @@ export const runAPI = async (express, app, __dirname, isPrimary = true) => { keyGenerator: req => req.rateLimitKey || keyGenerator(req), store: await createStore('api'), handler: handleRateExceeded - }) + }); const apiTunnelLimiter = rateLimit({ windowMs: env.rateLimitWindow * 1000, @@ -101,7 +103,7 @@ export const runAPI = async (express, app, __dirname, isPrimary = true) => { handler: (_, res) => { return res.sendStatus(429) } - }) + }); app.set('trust proxy', ['loopback', 'uniquelocal']); @@ -173,7 +175,7 @@ export const runAPI = async (express, app, __dirname, isPrimary = true) => { return fail(res, "error.api.auth.jwt.invalid"); } - if (!jwt.verify(token)) { + if (!jwt.verify(token, getIP(req, 32))) { return fail(res, "error.api.auth.jwt.invalid"); } @@ -219,7 +221,7 @@ export const runAPI = async (express, app, __dirname, isPrimary = true) => { } try { - res.json(jwt.generate()); + res.json(jwt.generate(getIP(req, 32))); } catch { return fail(res, "error.api.generic"); } @@ -354,7 +356,7 @@ export const runAPI = async (express, app, __dirname, isPrimary = true) => { }, () => { if (isPrimary) { console.log(`\n` + - Bright(Cyan("cobalt ")) + Bright("API ^ω⁠^") + "\n" + + Bright(Cyan("cobalt ")) + Bright("API ^ω^") + "\n" + "~~~~~~\n" + Bright("version: ") + version + "\n" + @@ -376,6 +378,10 @@ export const runAPI = async (express, app, __dirname, isPrimary = true) => { if (env.cookiePath) { Cookies.setup(env.cookiePath); } + + if (env.ytSessionServer) { + YouTubeSession.setup(); + } }); if (isCluster) { diff --git a/api/src/misc/utils.js b/api/src/misc/utils.js index 76d7a3eb..62bf6351 100644 --- a/api/src/misc/utils.js +++ b/api/src/misc/utils.js @@ -1,17 +1,26 @@ import { request } from 'undici'; const redirectStatuses = new Set([301, 302, 303, 307, 308]); -export async function getRedirectingURL(url, dispatcher, userAgent) { - const location = await request(url, { +export async function getRedirectingURL(url, dispatcher, headers) { + const params = { dispatcher, method: 'HEAD', - headers: { 'user-agent': userAgent } - }).then(r => { + headers, + redirect: 'manual' + }; + + let location = await request(url, params).then(r => { if (redirectStatuses.has(r.statusCode) && r.headers['location']) { return r.headers['location']; } }).catch(() => null); + location ??= await fetch(url, params).then(r => { + if (redirectStatuses.has(r.status) && r.headers.has('location')) { + return r.headers.get('location'); + } + }).catch(() => null); + return location; } diff --git a/api/src/processing/cookie/manager.js b/api/src/processing/cookie/manager.js index 25f41c2c..9e23374b 100644 --- a/api/src/processing/cookie/manager.js +++ b/api/src/processing/cookie/manager.js @@ -13,7 +13,6 @@ const VALID_SERVICES = new Set([ 'reddit', 'twitter', 'youtube', - 'youtube_oauth' ]); const invalidCookies = {}; diff --git a/api/src/processing/helpers/youtube-session.js b/api/src/processing/helpers/youtube-session.js new file mode 100644 index 00000000..85f1a6e1 --- /dev/null +++ b/api/src/processing/helpers/youtube-session.js @@ -0,0 +1,81 @@ +import * as cluster from "../../misc/cluster.js"; + +import { Agent } from "undici"; +import { env } from "../../config.js"; +import { Green, Yellow } from "../../misc/console-text.js"; + +const defaultAgent = new Agent(); + +let session; + +const validateSession = (sessionResponse) => { + if (!sessionResponse.potoken) { + throw "no poToken in session response"; + } + + if (!sessionResponse.visitor_data) { + throw "no visitor_data in session response"; + } + + if (!sessionResponse.updated) { + throw "no last update timestamp in session response"; + } + + // https://github.com/iv-org/youtube-trusted-session-generator/blob/c2dfe3f/potoken_generator/main.py#L25 + if (sessionResponse.potoken.length < 160) { + console.error(`${Yellow('[!]')} poToken is too short and might not work (${new Date().toISOString()})`); + } +} + +const updateSession = (newSession) => { + session = newSession; +} + +const loadSession = async () => { + const sessionServerUrl = new URL(env.ytSessionServer); + sessionServerUrl.pathname = "/token"; + + const newSession = await fetch( + sessionServerUrl, + { dispatcher: defaultAgent } + ).then(a => a.json()); + + validateSession(newSession); + + if (!session || session.updated < newSession?.updated) { + cluster.broadcast({ youtube_session: newSession }); + updateSession(newSession); + } +} + +const wrapLoad = (initial = false) => { + loadSession() + .then(() => { + if (initial) { + console.log(`${Green('[✓]')} poToken & visitor_data loaded successfully!`); + } + }) + .catch((e) => { + console.error(`${Yellow('[!]')} Failed loading poToken & visitor_data at ${new Date().toISOString()}.`); + console.error('Error:', e); + }) +} + +export const getYouTubeSession = () => { + return session; +} + +export const setup = () => { + if (cluster.isPrimary) { + wrapLoad(true); + if (env.ytSessionReloadInterval > 0) { + setInterval(wrapLoad, env.ytSessionReloadInterval * 1000); + } + } else if (cluster.isWorker) { + process.on('message', (message) => { + if ('youtube_session' in message) { + updateSession(message.youtube_session); + } + }); + } +} diff --git a/api/src/processing/match.js b/api/src/processing/match.js index e2d6aa07..ee4fdc1a 100644 --- a/api/src/processing/match.js +++ b/api/src/processing/match.js @@ -109,7 +109,7 @@ export default async function({ host, patternMatch, params }) { } if (url.hostname === "music.youtube.com" || isAudioOnly) { - fetchInfo.quality = "max"; + fetchInfo.quality = "1080"; fetchInfo.format = "vp9"; fetchInfo.isAudioOnly = true; fetchInfo.isAudioMuted = false; diff --git a/api/src/processing/request.js b/api/src/processing/request.js index d512bfe5..61bf027b 100644 --- a/api/src/processing/request.js +++ b/api/src/processing/request.js @@ -82,14 +82,13 @@ export function normalizeRequest(request) { )); } -export function getIP(req) { +export function getIP(req, prefix = 56) { const strippedIP = req.ip.replace(/^::ffff:/, ''); const ip = ipaddr.parse(strippedIP); if (ip.kind() === 'ipv4') { return strippedIP; } - const prefix = 56; const v6Bytes = ip.toByteArray(); v6Bytes.fill(0, prefix / 8); diff --git a/api/src/processing/service-config.js b/api/src/processing/service-config.js index 1dc8bf30..87a71c38 100644 --- a/api/src/processing/service-config.js +++ b/api/src/processing/service-config.js @@ -90,7 +90,9 @@ export const services = { "r/u_:user/comments/:id/:title", "r/u_:user/comments/:id/comment/:commentId", - "r/:sub/s/:shareId" + "r/:sub/s/:shareId", + + "video/:shortId", ], subdomains: "*", }, @@ -136,12 +138,13 @@ export const services = { tiktok: { patterns: [ ":user/video/:postId", + "i18n/share/video/:postId", ":shortLink", "t/:shortLink", ":user/photo/:postId", "v/:postId.html" ], - subdomains: ["vt", "vm", "m"], + subdomains: ["vt", "vm", "m", "t"], }, tumblr: { patterns: [ diff --git a/api/src/processing/service-patterns.js b/api/src/processing/service-patterns.js index 8735f123..2412fd46 100644 --- a/api/src/processing/service-patterns.js +++ b/api/src/processing/service-patterns.js @@ -23,7 +23,8 @@ export const testers = { pattern.id?.length <= 16 && !pattern.sub && !pattern.user || (pattern.sub?.length <= 22 && pattern.id?.length <= 16) || (pattern.user?.length <= 22 && pattern.id?.length <= 16) - || (pattern.sub?.length <= 22 && pattern.shareId?.length <= 16), + || (pattern.sub?.length <= 22 && pattern.shareId?.length <= 16) + || (pattern.shortId?.length <= 16), "rutube": pattern => (pattern.id?.length === 32 && pattern.key?.length <= 32) || diff --git a/api/src/processing/services/instagram.js b/api/src/processing/services/instagram.js index 9cc7dbdf..0fa25527 100644 --- a/api/src/processing/services/instagram.js +++ b/api/src/processing/services/instagram.js @@ -527,7 +527,7 @@ export default function instagram(obj) { // for some reason instagram decides to return HTML // instead of a redirect when requesting with a normal // browser user-agent - 'curl/7.88.1' + {'User-Agent': 'curl/7.88.1'} ).then(match => instagram({ ...obj, ...match, shareId: undefined diff --git a/api/src/processing/services/pinterest.js b/api/src/processing/services/pinterest.js index ea4275cb..15566cc4 100644 --- a/api/src/processing/services/pinterest.js +++ b/api/src/processing/services/pinterest.js @@ -23,7 +23,7 @@ export default async function(o) { const videoLink = [...html.matchAll(videoRegex)] .map(([, link]) => link) - .find(a => a.endsWith('.mp4') && a.includes('720p')); + .find(a => a.endsWith('.mp4')); if (videoLink) return { urls: videoLink, diff --git a/api/src/processing/services/reddit.js b/api/src/processing/services/reddit.js index 50c78d35..0f506eea 100644 --- a/api/src/processing/services/reddit.js +++ b/api/src/processing/services/reddit.js @@ -50,12 +50,24 @@ async function getAccessToken() { export default async function(obj) { let params = obj; + const accessToken = await getAccessToken(); + const headers = { + 'user-agent': genericUserAgent, + authorization: accessToken && `Bearer ${accessToken}`, + accept: 'application/json' + }; + + if (params.shortId) { + params = await resolveRedirectingURL( + `https://www.reddit.com/video/${params.shortId}`, + obj.dispatcher, headers + ); + } if (!params.id && params.shareId) { params = await resolveRedirectingURL( `https://www.reddit.com/r/${params.sub}/s/${params.shareId}`, - obj.dispatcher, - genericUserAgent + obj.dispatcher, headers ); } @@ -63,17 +75,10 @@ export default async function(obj) { const url = new URL(`https://www.reddit.com/comments/${params.id}.json`); - const accessToken = await getAccessToken(); if (accessToken) url.hostname = 'oauth.reddit.com'; let data = await fetch( - url, { - headers: { - 'User-Agent': genericUserAgent, - accept: 'application/json', - authorization: accessToken && `Bearer ${accessToken}` - } - } + url, { headers } ).then(r => r.json()).catch(() => {}); if (!data || !Array.isArray(data)) { diff --git a/api/src/processing/services/tiktok.js b/api/src/processing/services/tiktok.js index 6fec01d8..93e07c50 100644 --- a/api/src/processing/services/tiktok.js +++ b/api/src/processing/services/tiktok.js @@ -1,6 +1,6 @@ import Cookie from "../cookie/cookie.js"; -import { extract } from "../url.js"; +import { extract, normalizeURL } from "../url.js"; import { genericUserAgent } from "../../config.js"; import { updateCookie } from "../cookie/manager.js"; import { createStream } from "../../stream/manage.js"; @@ -23,8 +23,8 @@ export default async function(obj) { if (html.startsWith('> 22n) + TWITTER_EPOCH) ); @@ -53,6 +58,25 @@ const getGuestToken = async (dispatcher, forceReload = false) => { } } +const requestSyndication = async(dispatcher, tweetId) => { + // thank you + // https://github.com/yt-dlp/yt-dlp/blob/05c8023a27dd37c49163c0498bf98e3e3c1cb4b9/yt_dlp/extractor/twitter.py#L1334 + const token = (id) => ((Number(id) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, ''); + const syndicationUrl = new URL("https://cdn.syndication.twimg.com/tweet-result"); + + syndicationUrl.searchParams.set("id", tweetId); + syndicationUrl.searchParams.set("token", token(tweetId)); + + const result = await fetch(syndicationUrl, { + headers: { + "user-agent": genericUserAgent + }, + dispatcher + }); + + return result; +} + const requestTweet = async(dispatcher, tweetId, token, cookie) => { const graphqlTweetURL = new URL(graphqlURL); @@ -87,36 +111,24 @@ const requestTweet = async(dispatcher, tweetId, token, cookie) => { let result = await fetch(graphqlTweetURL, { headers, dispatcher }); updateCookie(cookie, result.headers); - // we might have been missing the `ct0` cookie, retry + // we might have been missing the ct0 cookie, retry if (result.status === 403 && result.headers.get('set-cookie')) { - result = await fetch(graphqlTweetURL, { - headers: { - ...headers, - 'x-csrf-token': cookie.values().ct0 - }, - dispatcher - }); + const cookieValues = cookie?.values(); + if (cookieValues?.ct0) { + result = await fetch(graphqlTweetURL, { + headers: { + ...headers, + 'x-csrf-token': cookieValues.ct0 + }, + dispatcher + }); + } } return result } -export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { - const cookie = await getCookie('twitter'); - - let guestToken = await getGuestToken(dispatcher); - if (!guestToken) return { error: "fetch.fail" }; - - let tweet = await requestTweet(dispatcher, id, guestToken); - - // get new token & retry if old one expired - if ([403, 429].includes(tweet.status)) { - guestToken = await getGuestToken(dispatcher, true); - tweet = await requestTweet(dispatcher, id, guestToken) - } - - tweet = await tweet.json(); - +const extractGraphqlMedia = async (tweet, dispatcher, id, guestToken, cookie) => { let tweetTypename = tweet?.data?.tweetResult?.result?.__typename; if (!tweetTypename) { @@ -127,13 +139,13 @@ export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { const reason = tweet?.data?.tweetResult?.result?.reason; switch(reason) { case "Protected": - return { error: "content.post.private" } + return { error: "content.post.private" }; case "NsfwLoggedOut": if (cookie) { tweet = await requestTweet(dispatcher, id, guestToken, cookie); tweet = await tweet.json(); tweetTypename = tweet?.data?.tweetResult?.result?.__typename; - } else return { error: "content.post.age" } + } else return { error: "content.post.age" }; } } @@ -150,7 +162,69 @@ export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { repostedTweet = baseTweet?.retweeted_status_result?.result.tweet.legacy.extended_entities; } - let media = (repostedTweet?.media || baseTweet?.extended_entities?.media); + return (repostedTweet?.media || baseTweet?.extended_entities?.media); +} + +const testResponse = (result) => { + const contentLength = result.headers.get("content-length"); + + if (!contentLength || contentLength === '0') { + return false; + } + + if (!result.headers.get("content-type").startsWith("application/json")) { + return false; + } + + return true; +} + +export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { + const cookie = await getCookie('twitter'); + + let syndication = false; + + let guestToken = await getGuestToken(dispatcher); + if (!guestToken) return { error: "fetch.fail" }; + + // for now we assume that graphql api will come back after some time, + // so we try it first + + let tweet = await requestTweet(dispatcher, id, guestToken); + + // get new token & retry if old one expired + if ([403, 429].includes(tweet.status)) { + guestToken = await getGuestToken(dispatcher, true); + if (cookie) { + tweet = await requestTweet(dispatcher, id, guestToken, cookie); + } else { + tweet = await requestTweet(dispatcher, id, guestToken); + } + } + + const testGraphql = testResponse(tweet); + + // if graphql requests fail, then resort to tweet embed api + if (!testGraphql) { + syndication = true; + tweet = await requestSyndication(dispatcher, id); + + const testSyndication = testResponse(tweet); + + // if even syndication request failed, then cry out loud + if (!testSyndication) { + return { error: "fetch.fail" }; + } + } + + tweet = await tweet.json(); + + let media = + syndication + ? tweet.mediaDetails + : await extractGraphqlMedia(tweet, dispatcher, id, guestToken, cookie); + + if (!media) return { error: "fetch.empty" }; // check if there's a video at given index (/video/) if (index >= 0 && index < media?.length) { @@ -163,7 +237,7 @@ export default async function({ id, index, toGif, dispatcher, alwaysProxy }) { service: "twitter", type: "proxy", url, filename, - }) + }); switch (media?.length) { case undefined: diff --git a/api/src/processing/services/youtube.js b/api/src/processing/services/youtube.js index 5d655318..e1cbf018 100644 --- a/api/src/processing/services/youtube.js +++ b/api/src/processing/services/youtube.js @@ -4,7 +4,8 @@ import { fetch } from "undici"; import { Innertube, Session } from "youtubei.js"; import { env } from "../../config.js"; -import { getCookie, updateCookieValues } from "../cookie/manager.js"; +import { getCookie } from "../cookie/manager.js"; +import { getYouTubeSession } from "../helpers/youtube-session.js"; const PLAYER_REFRESH_PERIOD = 1000 * 60 * 15; // ms @@ -41,43 +42,30 @@ const hlsCodecList = { } } +const clientsWithNoCipher = ['IOS', 'ANDROID', 'YTSTUDIO_ANDROID', 'YTMUSIC_ANDROID']; + const videoQualities = [144, 240, 360, 480, 720, 1080, 1440, 2160, 4320]; -const transformSessionData = (cookie) => { - if (!cookie) - return; - - const values = { ...cookie.values() }; - const REQUIRED_VALUES = ['access_token', 'refresh_token']; - - if (REQUIRED_VALUES.some(x => typeof values[x] !== 'string')) { - return; - } - - if (values.expires) { - values.expiry_date = values.expires; - delete values.expires; - } else if (!values.expiry_date) { - return; - } - - return values; -} - -const cloneInnertube = async (customFetch) => { +const cloneInnertube = async (customFetch, useSession) => { const shouldRefreshPlayer = lastRefreshedAt + PLAYER_REFRESH_PERIOD < new Date(); const rawCookie = getCookie('youtube'); - const rawCookieValues = rawCookie?.values(); const cookie = rawCookie?.toString(); + const sessionTokens = getYouTubeSession(); + const retrieve_player = Boolean(sessionTokens || cookie); + + if (useSession && env.ytSessionServer && !sessionTokens?.potoken) { + throw "no_session_tokens"; + } + if (!innertube || shouldRefreshPlayer) { innertube = await Innertube.create({ fetch: customFetch, - retrieve_player: !!cookie, + retrieve_player, cookie, - po_token: rawCookieValues?.po_token, - visitor_data: rawCookieValues?.visitor_data, + po_token: useSession ? sessionTokens?.potoken : undefined, + visitor_data: useSession ? sessionTokens?.visitor_data : undefined, }); lastRefreshedAt = +new Date(); } @@ -93,73 +81,62 @@ const cloneInnertube = async (customFetch) => { innertube.session.cache ); - const oauthCookie = getCookie('youtube_oauth'); - const oauthData = transformSessionData(oauthCookie); - - if (!session.logged_in && oauthData) { - await session.oauth.init(oauthData); - session.logged_in = true; - } - - if (session.logged_in && oauthData) { - if (session.oauth.shouldRefreshToken()) { - await session.oauth.refreshAccessToken(); - } - - const cookieValues = oauthCookie.values(); - const oldExpiry = new Date(cookieValues.expiry_date); - const newExpiry = new Date(session.oauth.oauth2_tokens.expiry_date); - - if (oldExpiry.getTime() !== newExpiry.getTime()) { - updateCookieValues(oauthCookie, { - ...session.oauth.client_id, - ...session.oauth.oauth2_tokens, - expiry_date: newExpiry.toISOString() - }); - } - } - const yt = new Innertube(session); return yt; } export default async function (o) { + const quality = o.quality === "max" ? 9000 : Number(o.quality); + + let useHLS = o.youtubeHLS; + let innertubeClient = o.innertubeClient || env.customInnertubeClient || "IOS"; + + // HLS playlists from the iOS client don't contain the av1 video format. + if (useHLS && o.format === "av1") { + useHLS = false; + } + + if (useHLS) { + innertubeClient = "IOS"; + } + + // iOS client doesn't have adaptive formats of resolution >1080p, + // so we use the WEB_EMBEDDED client instead for those cases + const useSession = + env.ytSessionServer && ( + ( + !useHLS + && innertubeClient === "IOS" + && ( + (quality > 1080 && o.format !== "h264") + || (quality > 1080 && o.format !== "vp9") + ) + ) + ); + + if (useSession) { + innertubeClient = env.ytSessionInnertubeClient || "WEB_EMBEDDED"; + } + let yt; try { yt = await cloneInnertube( (input, init) => fetch(input, { ...init, dispatcher: o.dispatcher - }) + }), + useSession ); } catch (e) { - if (e.message?.endsWith("decipher algorithm")) { + if (e === "no_session_tokens") { + return { error: "youtube.no_session_tokens" }; + } else if (e.message?.endsWith("decipher algorithm")) { return { error: "youtube.decipher" } } else if (e.message?.includes("refresh access token")) { return { error: "youtube.token_expired" } } else throw e; } - const cookie = getCookie('youtube')?.toString(); - - let useHLS = o.youtubeHLS; - - // HLS playlists don't contain the av1 video format, at least with the iOS client - if (useHLS && o.format === "av1") { - useHLS = false; - } - - let innertubeClient = o.innertubeClient || "ANDROID"; - - if (cookie) { - useHLS = false; - innertubeClient = "WEB_EMBEDDED"; - } - - if (useHLS) { - innertubeClient = "IOS"; - } - let info; try { info = await yt.getBasicInfo(o.id, innertubeClient); @@ -238,8 +215,6 @@ export default async function (o) { } } - const quality = o.quality === "max" ? 9000 : Number(o.quality); - const normalizeQuality = res => { const shortestSide = Math.min(res.height, res.width); return videoQualities.find(qual => qual >= shortestSide); @@ -428,6 +403,10 @@ export default async function (o) { } } + if (video?.drm_families || audio?.drm_families) { + return { error: "youtube.drm" }; + } + const fileMetadata = { title: basicInfo.title.trim(), artist: basicInfo.author.replace("- Topic", "").trim() @@ -474,7 +453,7 @@ export default async function (o) { urls = audio.uri; } - if (innertubeClient === "WEB_EMBEDDED" && innertube) { + if (!clientsWithNoCipher.includes(innertubeClient) && innertube) { urls = audio.decipher(innertube.session.player); } @@ -509,7 +488,7 @@ export default async function (o) { filenameAttributes.resolution = `${video.width}x${video.height}`; filenameAttributes.extension = codecList[codec].container; - if (innertubeClient === "WEB_EMBEDDED" && innertube) { + if (!clientsWithNoCipher.includes(innertubeClient) && innertube) { video = video.decipher(innertube.session.player); audio = audio.decipher(innertube.session.player); } else { diff --git a/api/src/processing/url.js b/api/src/processing/url.js index 82299999..86c333f6 100644 --- a/api/src/processing/url.js +++ b/api/src/processing/url.js @@ -106,6 +106,14 @@ function aliasURL(url) { url.pathname = `/share/${idPart.slice(-32)}`; } break; + + case "redd": + /* reddit short video links can be treated by changing https://v.redd.it/ + to https://reddit.com/video/.*/ + if (url.hostname === "v.redd.it" && parts.length === 2) { + url = new URL(`https://www.reddit.com/video/${parts[1]}`); + } + break; } return url; @@ -231,11 +239,11 @@ export function extract(url) { return { host, patternMatch }; } -export async function resolveRedirectingURL(url, dispatcher, userAgent) { +export async function resolveRedirectingURL(url, dispatcher, headers) { const originalService = getHostIfValid(normalizeURL(url)); if (!originalService) return; - const canonicalURL = await getRedirectingURL(url, dispatcher, userAgent); + const canonicalURL = await getRedirectingURL(url, dispatcher, headers); if (!canonicalURL) return; const { host, patternMatch } = extract(normalizeURL(canonicalURL)); diff --git a/api/src/security/jwt.js b/api/src/security/jwt.js index 91d6cf9e..557f0b68 100644 --- a/api/src/security/jwt.js +++ b/api/src/security/jwt.js @@ -6,12 +6,19 @@ import { env } from "../config.js"; const toBase64URL = (b) => Buffer.from(b).toString("base64url"); const fromBase64URL = (b) => Buffer.from(b, "base64url").toString(); -const makeHmac = (header, payload) => - createHmac("sha256", env.jwtSecret) - .update(`${header}.${payload}`) - .digest("base64url"); +const makeHmac = (data) => { + return createHmac("sha256", env.jwtSecret) + .update(data) + .digest("base64url"); +} -const generate = () => { +const sign = (header, payload) => + makeHmac(`${header}.${payload}`); + +const getIPHash = (ip) => + makeHmac(ip).slice(0, 8); + +const generate = (ip) => { const exp = Math.floor(new Date().getTime() / 1000) + env.jwtLifetime; const header = toBase64URL(JSON.stringify({ @@ -21,10 +28,11 @@ const generate = () => { const payload = toBase64URL(JSON.stringify({ jti: nanoid(8), + sub: getIPHash(ip), exp, })); - const signature = makeHmac(header, payload); + const signature = sign(header, payload); return { token: `${header}.${payload}.${signature}`, @@ -32,7 +40,7 @@ const generate = () => { }; } -const verify = (jwt) => { +const verify = (jwt, ip) => { const [header, payload, signature] = jwt.split(".", 3); const timestamp = Math.floor(new Date().getTime() / 1000); @@ -40,17 +48,16 @@ const verify = (jwt) => { return false; } - const verifySignature = makeHmac(header, payload); + const verifySignature = sign(header, payload); if (verifySignature !== signature) { return false; } - if (timestamp >= JSON.parse(fromBase64URL(payload)).exp) { - return false; - } + const data = JSON.parse(fromBase64URL(payload)); - return true; + return getIPHash(ip) === data.sub + && timestamp <= data.exp; } export default { diff --git a/api/src/stream/internal.js b/api/src/stream/internal.js index f55a1b91..8c97c656 100644 --- a/api/src/stream/internal.js +++ b/api/src/stream/internal.js @@ -53,14 +53,25 @@ async function handleYoutubeStream(streamInfo, res) { const cleanup = () => (res.end(), closeRequest(streamInfo.controller)); try { - const req = await fetch(streamInfo.url, { - headers: getHeaders('youtube'), - method: 'HEAD', - dispatcher: streamInfo.dispatcher, - signal - }); + let req, attempts = 3; + while (attempts--) { + req = await fetch(streamInfo.url, { + headers: getHeaders('youtube'), + method: 'HEAD', + dispatcher: streamInfo.dispatcher, + signal + }); + + streamInfo.url = req.url; + if (req.status === 403 && streamInfo.transplant) { + try { + await streamInfo.transplant(streamInfo.dispatcher); + } catch { + break; + } + } else break; + } - streamInfo.url = req.url; const size = BigInt(req.headers.get('content-length')); if (req.status !== 200 || !size) { diff --git a/api/src/util/generate-youtube-tokens.js b/api/src/util/generate-youtube-tokens.js deleted file mode 100644 index 5585705a..00000000 --- a/api/src/util/generate-youtube-tokens.js +++ /dev/null @@ -1,38 +0,0 @@ -import { Innertube } from 'youtubei.js'; -import { Red } from '../misc/console-text.js' - -const bail = (...msg) => { - console.error(...msg); - throw new Error(msg); -}; - -const tube = await Innertube.create(); - -tube.session.once( - 'auth-pending', - ({ verification_url, user_code }) => { - console.log(`${Red('[!]')} The token generated by this script is sensitive and you should not share it with anyone!`); - console.log(` By using this token, you are risking your Google account getting terminated.`); - console.log(` You should ${Red('NOT')} use your personal account!`); - console.log(); - console.log(`Open ${verification_url} in a browser and enter ${user_code} when asked for the code.`); - } -); - -tube.session.once('auth-error', (err) => bail('An error occurred:', err)); -tube.session.once('auth', ({ credentials }) => { - if (!credentials.access_token) { - bail('something went wrong'); - } - - console.log( - 'add this cookie to the youtube_oauth array in your cookies file:', - JSON.stringify( - Object.entries(credentials) - .map(([k, v]) => `${k}=${v instanceof Date ? v.toISOString() : v}`) - .join('; ') - ) - ); -}); - -await tube.session.signIn(); \ No newline at end of file diff --git a/api/src/util/tests/facebook.json b/api/src/util/tests/facebook.json index d0c8cc7b..70e2db68 100644 --- a/api/src/util/tests/facebook.json +++ b/api/src/util/tests/facebook.json @@ -46,7 +46,7 @@ }, { "name": "shared video link", - "url": "https://www.facebook.com/share/v/NEf87jbPTvFE8LsL/", + "url": "https://www.facebook.com/share/v/6EJK4Z8EAEAHtz8K/", "params": {}, "expected": { "code": 200, diff --git a/api/src/util/tests/instagram.json b/api/src/util/tests/instagram.json index 1df87b9a..4adcf6f8 100644 --- a/api/src/util/tests/instagram.json +++ b/api/src/util/tests/instagram.json @@ -123,6 +123,7 @@ { "name": "private instagram post", "url": "https://www.instagram.com/p/C5_A1TQNPrYw4c2g9KAUTPUl8RVHqiAdAcOOSY0", + "canFail": true, "params": {}, "expected": { "code": 400, diff --git a/api/src/util/tests/reddit.json b/api/src/util/tests/reddit.json index 3afc6126..1dd10ee5 100644 --- a/api/src/util/tests/reddit.json +++ b/api/src/util/tests/reddit.json @@ -56,5 +56,23 @@ "code": 200, "status": "tunnel" } + }, + { + "name": "shortened video link", + "url": "https://v.redd.it/ifg2emt5ck0e1", + "params": {}, + "expected": { + "code": 200, + "status": "tunnel" + } + }, + { + "name": "shortened video link (alternative)", + "url": "https://reddit.com/video/ifg2emt5ck0e1", + "params": {}, + "expected": { + "code": 200, + "status": "tunnel" + } } ] \ No newline at end of file diff --git a/api/src/util/tests/twitter.json b/api/src/util/tests/twitter.json index 4fc5900f..4139e39d 100644 --- a/api/src/util/tests/twitter.json +++ b/api/src/util/tests/twitter.json @@ -169,6 +169,15 @@ "status": "tunnel" } }, + { + "name": "gif", + "url": "https://x.com/thelastromances/status/1897839691212202479", + "params": {}, + "expected": { + "code": 200, + "status": "tunnel" + } + }, { "name": "inexistent post", "url": "https://twitter.com/test/status/9487653", diff --git a/api/src/util/tests/xiaohongshu.json b/api/src/util/tests/xiaohongshu.json index 0cca9393..a169cc23 100644 --- a/api/src/util/tests/xiaohongshu.json +++ b/api/src/util/tests/xiaohongshu.json @@ -1,7 +1,8 @@ [ { - "name": "long link video", - "url": "https://www.xiaohongshu.com/discovery/item/6789065900000000210035fc?source=webshare&xhsshare=pc_web&xsec_token=CBustnz_Twf1BSybpe5-D-BzUb-Bx28DPLb418TN9S9Kk&xsec_source=pc_share", + "name": "video (might have expired)", + "url": "https://www.xiaohongshu.com/explore/67cc17a3000000000e00726a?xsec_token=CBSFRtbF57so920elY1kbIX4fE1nhrwlpGZs9m6pIFpwo=", + "canFail": true, "params": {}, "expected": { "code": 200, @@ -9,8 +10,9 @@ } }, { - "name": "picker with multiple live photos", - "url": "https://www.xiaohongshu.com/explore/67847fa1000000000203e6ed?xsec_token=CBzyP7Y44PPpsM20lgxqrIIJMHqOLemusDsRcmsX0cTpk", + "name": "picker with multiple live photos (might have expired)", + "url": "https://www.xiaohongshu.com/explore/67c691b4000000000d0159cc?xsec_token=CB8p1eyB5DiFkwlUpy1BTeVsI9oOve6ppNjuDzo8V8p5w=", + "canFail": true, "params": {}, "expected": { "code": 200, @@ -18,8 +20,9 @@ } }, { - "name": "one photo", + "name": "one photo (might have expired)", "url": "https://www.xiaohongshu.com/explore/676e132d000000000b016f68?xsec_token=ABRv6LKzizOFeSaf2HnnBkdBqniB5Ak1fI8tMAHzO31jA", + "canFail": true, "params": {}, "expected": { "code": 200, @@ -27,7 +30,7 @@ } }, { - "name": "short link, might expire eventually", + "name": "short link (might have expired)", "url": "https://xhslink.com/a/czn4z6c1tic4", "canFail": true, "params": {}, diff --git a/api/src/util/tests/youtube.json b/api/src/util/tests/youtube.json index 0655e683..cb4964be 100644 --- a/api/src/util/tests/youtube.json +++ b/api/src/util/tests/youtube.json @@ -189,6 +189,7 @@ { "name": "hls video (h264, 1440p)", "url": "https://www.youtube.com/watch?v=vPwaXytZcgI", + "canFail": true, "params": { "youtubeVideoCodec": "h264", "videoQuality": "1440", @@ -202,6 +203,7 @@ { "name": "hls video (vp9, 360p)", "url": "https://www.youtube.com/watch?v=vPwaXytZcgI", + "canFail": true, "params": { "youtubeVideoCodec": "vp9", "videoQuality": "360", @@ -215,6 +217,7 @@ { "name": "hls video (audio mode)", "url": "https://www.youtube.com/watch?v=vPwaXytZcgI", + "canFail": true, "params": { "downloadMode": "audio", "youtubeHLS": true @@ -227,6 +230,7 @@ { "name": "hls video (audio mode, best format)", "url": "https://www.youtube.com/watch?v=vPwaXytZcgI", + "canFail": true, "params": { "downloadMode": "audio", "youtubeHLS": true, diff --git a/docs/api-env-variables.md b/docs/api-env-variables.md new file mode 100644 index 00000000..34de4b0a --- /dev/null +++ b/docs/api-env-variables.md @@ -0,0 +1,228 @@ +# cobalt api instance environment variables +you can customize your processing instance's behavior using these environment variables. all of them but `API_URL` are optional. +this document is not final and will expand over time. feel free to improve it! + +### general vars +| name | default | value example | +|:--------------------|:----------|:--------------------------------------| +| API_URL | | `https://api.url.example/` | +| API_PORT | `9000` | `1337` | +| COOKIE_PATH | | `/cookies.json` | +| PROCESSING_PRIORITY | | `10` | +| API_INSTANCE_COUNT | | `6` | +| API_REDIS_URL | | `redis://localhost:6379` | +| DISABLED_SERVICES | | `bilibili,youtube` | + +[*view details*](#general) + +### networking vars +| name | default | value example | +|:--------------------|:----------|:--------------------------------------| +| API_LISTEN_ADDRESS | `0.0.0.0` | `127.0.0.1` | +| API_EXTERNAL_PROXY | | `http://user:password@127.0.0.1:8080` | +| FREEBIND_CIDR | | `2001:db8::/32` | + +[*view details*](#networking) + +### limit vars +| name | default | value example | +|:-------------------------|:--------|:--------------| +| DURATION_LIMIT | `10800` | `18000` | +| TUNNEL_LIFESPAN | `90` | `120` | +| RATELIMIT_WINDOW | `60` | `120` | +| RATELIMIT_MAX | `20` | `30` | +| SESSION_RATELIMIT_WINDOW | `60` | `60` | +| SESSION_RATELIMIT | `10` | `10` | + +[*view details*](#limits) + +### security vars +| name | default | value example | +|:------------------|:--------|:--------------------------------------| +| CORS_WILDCARD | `1` | `0` | +| CORS_URL | | `https://web.url.example` | +| TURNSTILE_SITEKEY | | `1x00000000000000000000BB` | +| TURNSTILE_SECRET | | `1x0000000000000000000000000000000AA` | +| JWT_SECRET | | see [details](#security) | +| JWT_EXPIRY | `120` | `240` | +| API_KEY_URL | | `file://keys.json` | +| API_AUTH_REQUIRED | | `1` | + +[*view details*](#security) + +### service-specific vars +| name | value example | +|:---------------------------------|:-------------------------| +| CUSTOM_INNERTUBE_CLIENT | `IOS` | +| YOUTUBE_SESSION_SERVER | `http://localhost:8080/` | +| YOUTUBE_SESSION_INNERTUBE_CLIENT | `WEB_EMBEDDED` | + +[*view details*](#service-specific) + +## general +[*jump to the table*](#general-vars) + +### API_URL +> [!NOTE] +> API_URL is required to run the API instance. + +the URL from which your instance will be accessible. can be external or internal, but it must be a valid URL or else tunnels will not work. + +the value is a URL. + +### API_PORT +port from which the API server will be accessible. + +the value is a number from 1024 to 65535. + +### COOKIE_PATH +path to the `cookies.json` file relative to the current working directory of your cobalt instance (usually the main (src/api) folder). + +### PROCESSING_PRIORITY +`nice` value for ffmpeg subprocesses. available only on unix systems. + +note: the higher the nice value, the lower the priority. you can [read more about nice here](https://en.wikipedia.org/wiki/Nice_(Unix)). + +the value is a number. + +### API_INSTANCE_COUNT +supported only on linux and node.js `>=23.1.0`. when configured, cobalt will spawn multiple sub-instances amongst which requests will be balanced. `API_REDIS_URL` is required to use this option. + +the value is a number. + +### API_REDIS_URL +when configured, cobalt will use this redis instance for tunnel cache. required when `API_INSTANCE_COUNT` is more than 1, because else sub-instance wouldn't be able to share cache. + +the value is a URL. + +### DISABLED_SERVICES +comma-separated list which disables certain services from being used. + +the value is a string of cobalt-supported services. + +## networking +[*jump to the table*](#networking-vars) + +### API_LISTEN_ADDRESS +defines the local address for the api instance. if you are using a docker container, you usually don't need to configure this. + +the value is a local IP address. + +### API_EXTERNAL_PROXY +URL of the proxy that will be passed to [`ProxyAgent`](https://undici.nodejs.org/#/docs/api/ProxyAgent) and used for all external requests. HTTP(S) only. + +if some feature breaks when using a proxy, please make a new issue about it! + +the value is a URL. + +### FREEBIND_CIDR +IPv6 prefix used for randomly assigning addresses to cobalt requests. available only on linux systems. + +setting a `FREEBIND_CIDR` allows cobalt to pick a random IP for every download and use it for all requests it makes for that particular download. + +to use freebind in cobalt, you need to follow its [setup instructions](https://github.com/imputnet/freebind.js?tab=readme-ov-file#setup) first. + +if you want to use this option and run cobalt in a docker container, you also need to set the `API_LISTEN_ADDRESS` env variable to `127.0.0.1` and set `network_mode` for the container to `host`. + +the value is an IPv6 range. + +## limits +[*jump to the table*](#limit-vars) + +### DURATION_LIMIT +media duration limit, in **seconds** + +the value is a number. + +### TUNNEL_LIFESPAN +the duration for which tunnel info is stored in ram, **in seconds**. + +it's recommended to keep this value either default or as low as possible to preserve efficiency and user privacy. + +the value is a number. + +### RATELIMIT_WINDOW +rate limit time window for api requests, but not session requests, in **seconds**. + +the value is a number. + +### RATELIMIT_MAX +amount of api requests to be allowed within the time window of `RATELIMIT_WINDOW`. + +the value is a number. + +### SESSION_RATELIMIT_WINDOW +rate limit time window for session creation requests, in **seconds**. + +the value is a number. + +### SESSION_RATELIMIT +amount of session requests to be allowed within the time window of `SESSION_RATELIMIT_WINDOW`. + +the value is a number. + +## security +[*jump to the table*](#security-vars) + +> [!NOTE] +> in order to enable turnstile bot protection, `TURNSTILE_SITEKEY`, `TURNSTILE_SECRET`, and `JWT_SECRET` must be set. all three at once. + +### CORS_WILDCARD +defines whether cross-origin resource sharing is enabled. when enabled, your instance will be accessible from foreign web pages. + +the value is a number. 0: disabled. 1: enabled. + +### CORS_URL +configures the [cross-origin resource sharing origin](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Access-Control-Allow-Origin). your instance will be available only from this URL if `CORS_WILDCARD` is set to `0`. + +the value is a URL. + +### TURNSTILE_SITEKEY +[cloudflare turnstile](https://www.cloudflare.com/products/turnstile/) sitekey used by the web client to request & solve a challenge to prove that the user is not a bot. + +the value is a specific key. + +### TURNSTILE_SECRET +[cloudflare turnstile](https://www.cloudflare.com/products/turnstile/) secret used by the processing instance to verify that the client solved the challenge successfully. + +the value is a specific key. + +### JWT_SECRET +the secret used for issuing JWT tokens for request authentication. the value must be a random, secure, and long string (over 16 characters). + +the value is a specific key. + +### JWT_EXPIRY +the duration of how long a cobalt-issued JWT token will remain valid, in seconds. + +the value is a number. + +### API_KEY_URL +the URL to the the external or local key database. for local files you have to specify a local path using the `file://` protocol. + +see [the api key section](/docs/protect-an-instance.md#api-key-file-format) in the "how to protect your cobalt instance" document for more details. + +the value is a URL. + +### API_AUTH_REQUIRED +when set to `1`, the user always needs to be authenticated in some way before they can access the API (either via an api key or via turnstile, if enabled). + +the value is a number. + +## service-specific +[*jump to the table*](#service-specific-vars) + +### CUSTOM_INNERTUBE_CLIENT +innertube client that will be used instead of the default one. + +the value is a string. + +### YOUTUBE_SESSION_SERVER +URL to an instance of [yt-session-generator](https://github.com/imputnet/yt-session-generator). used for automatically pulling `poToken` & `visitor_data` for youtube. can be local or remote. + +the value is a URL. + +### YOUTUBE_SESSION_INNERTUBE_CLIENT +innertube client that's compatible with botguard's (web) `poToken` and `visitor_data`. + +the value is a string. diff --git a/docs/configure-for-youtube.md b/docs/configure-for-youtube.md deleted file mode 100644 index fe286d86..00000000 --- a/docs/configure-for-youtube.md +++ /dev/null @@ -1,33 +0,0 @@ -# how to configure a cobalt instance for youtube -if you get various errors when attempting to download videos that are: -publicly available, not region locked, and not age-restricted; -then your instance's ip address may have bad reputation. - -in this case you have to use disposable google accounts. -there's no other known workaround as of time of writing this document. - -> [!CAUTION] -> **NEVER** use your personal google account for downloading videos via any means. -> you can use any google accounts that you're willing to sacrifice, -> but be prepared to have them **permanently suspended**. -> -> we recommend that you use accounts that don't link back to your personal google account or identity, just in case. -> -> use incognito mode when signing in. -> we also recommend using vpn/proxy services (such as [mullvad](https://mullvad.net/)). - -1. if you haven't done it already, clone the cobalt repo, go to the cloned directory, and run `pnpm install` - -2. run `pnpm -C api token:youtube` - -3. follow instructions, use incognito mode in your browser when signing in. -i cannot stress this enough, but again, **DO NOT USE YOUR PERSONAL GOOGLE ACCOUNT**. - -4. once you have the oauth token, add it to `youtube_oauth` in your cookies file. -you can see an [example here](/docs/examples/cookies.example.json). -you can have several account tokens in this file, if you like. - -5. all done! enjoy freedom. - -### liability -you're responsible for any damage done to any of your google accounts or any other damages. you do this by yourself and at your own risk. diff --git a/docs/examples/cookies.example.json b/docs/examples/cookies.example.json index 7996adeb..d788b2dd 100644 --- a/docs/examples/cookies.example.json +++ b/docs/examples/cookies.example.json @@ -11,7 +11,7 @@ "twitter": [ "auth_token=; ct0=" ], - "youtube_oauth": [ - "" + "youtube": [ + "cookie=; b=" ] } diff --git a/docs/examples/docker-compose.example.yml b/docs/examples/docker-compose.example.yml index e56c0a21..b2ad73c1 100644 --- a/docs/examples/docker-compose.example.yml +++ b/docs/examples/docker-compose.example.yml @@ -41,3 +41,13 @@ services: command: --cleanup --scope cobalt --interval 900 --include-restarting volumes: - /var/run/docker.sock:/var/run/docker.sock + + # if needed, use this image for automatically generating poToken & visitor_data + # yt-session-generator: + # image: ghcr.io/imputnet/yt-session-generator:webserver + + # init: true + # restart: unless-stopped + # container_name: yt-session-generator + # labels: + # - com.centurylinklabs.watchtower.scope=cobalt diff --git a/docs/protect-an-instance.md b/docs/protect-an-instance.md index 9b4131c1..30584102 100644 --- a/docs/protect-an-instance.md +++ b/docs/protect-an-instance.md @@ -114,7 +114,7 @@ if you want to use your instance outside of web interface, you'll need an api ke > > if api keys leak, you'll have to update/remove all UUIDs to revoke them. -1. create a `keys.json` file following [the schema and example here](/docs//run-an-instance.md#api-key-file-format). +1. create a `keys.json` file following [the schema and example down below](#api-key-file-format). 2. expose the `keys.json` to the docker container: ```yml @@ -148,3 +148,55 @@ environment: ### why not make keys exclusive by default? keys may be useful for going around rate limiting, while keeping the rest of api rate limited, with no turnstile in place. + +## api key file format +the file is a JSON-serialized object with the following structure: +```typescript + +type KeyFileContents = Record< + UUIDv4String, + { + name?: string, + limit?: number | "unlimited", + ips?: (CIDRString | IPString)[], + userAgents?: string[] + } +>; +``` + +where *`UUIDv4String`* is a stringified version of a UUIDv4 identifier. +- **name** is a field for your own reference, it is not used by cobalt anywhere. + +- **`limit`** specifies how many requests the API key can make during the window specified in the `RATELIMIT_WINDOW` env. + - when omitted, the limit specified in `RATELIMIT_MAX` will be used. + - it can be also set to `"unlimited"`, in which case the API key bypasses all rate limits. + +- **`ips`** contains an array of allowlisted IP ranges, which can be specified both as individual ips or CIDR ranges (e.g. *`["192.168.42.69", "2001:db8::48", "10.0.0.0/8", "fe80::/10"]`*). + - when specified, only requests from these ip ranges can use the specified api key. + - when omitted, any IP can be used to make requests with that API key. + +- **`userAgents`** contains an array of allowed user agents, with support for wildcards (e.g. *`["cobaltbot/1.0", "Mozilla/5.0 * Chrome/*"]`*). + - when specified, requests with a `user-agent` that does not appear in this array will be rejected. + - when omitted, any user agent can be specified to make requests with that API key. + +- if both `ips` and `userAgents` are set, the tokens will be limited by both parameters. +- if cobalt detects any problem with your key file, it will be ignored and a warning will be printed to the console. + +an example key file could look like this: +```json +{ + "b5c7160a-b655-4c7a-b500-de839f094550": { + "limit": 10, + "ips": ["10.0.0.0/8", "192.168.42.42"], + "userAgents": ["*Chrome*"] + }, + "b00b1234-a3e5-99b1-c6d1-dba4512ae190": { + "limit": "unlimited", + "ips": ["192.168.1.2"], + "userAgents": ["cobaltbot/1.0"] + } +} +``` + +if you are configuring a key file, **do not use the UUID from the example** but instead generate your own. you can do this by running the following command if you have node.js installed: +`node -e "console.log(crypto.randomUUID())"` diff --git a/docs/run-an-instance.md b/docs/run-an-instance.md index ea31cfc5..9aa7c909 100644 --- a/docs/run-an-instance.md +++ b/docs/run-an-instance.md @@ -1,4 +1,6 @@ # how to run a cobalt instance +this tutorial will help you run your own cobalt processing instance. if your instance is public-facing, we highly recommend that you also [protect it from abuse](/docs/protect-an-instance.md) using turnstile or api keys or both. + ## using docker compose and package from github (recommended) to run the cobalt docker package, you need to have `docker` and `docker-compose` installed and configured. @@ -54,91 +56,5 @@ sudo apt install nscd sudo service nscd start ``` -## list of environment variables for api -| variable name | default | example | description | -|:----------------------|:----------|:------------------------|:------------| -| `API_PORT` | `9000` | `9000` | changes port from which api server is accessible. | -| `API_LISTEN_ADDRESS` | `0.0.0.0` | `127.0.0.1` | changes address from which api server is accessible. **if you are using docker, you usually don't need to configure this.** | -| `API_URL` | ➖ | `https://api.cobalt.tools/` | changes url from which api server is accessible.
***REQUIRED TO RUN THE API***. | -| `API_NAME` | `unknown` | `ams-1` | api server name that is shown in `/api/serverInfo`. | -| `API_EXTERNAL_PROXY` | ➖ | `http://user:password@127.0.0.1:8080`| url of the proxy that will be passed to [`ProxyAgent`](https://undici.nodejs.org/#/docs/api/ProxyAgent) and used for all external requests. HTTP(S) only. | -| `CORS_WILDCARD` | `1` | `0` | toggles cross-origin resource sharing.
`0`: disabled. `1`: enabled. | -| `CORS_URL` | not used | `https://cobalt.tools` | cross-origin resource sharing url. api will be available only from this url if `CORS_WILDCARD` is set to `0`. | -| `COOKIE_PATH` | not used | `/cookies.json` | path for cookie file relative to main folder. | -| `PROCESSING_PRIORITY` | not used | `10` | changes `nice` value* for ffmpeg subprocess. available only on unix systems. | -| `FREEBIND_CIDR` | ➖ | `2001:db8::/32` | IPv6 prefix used for randomly assigning addresses to cobalt requests. only supported on linux systems. see below for more info. | -| `RATELIMIT_WINDOW` | `60` | `120` | rate limit time window in **seconds**. | -| `RATELIMIT_MAX` | `20` | `30` | max requests per time window. requests above this amount will be blocked for the rate limit window duration. | -| `DURATION_LIMIT` | `10800` | `18000` | max allowed video duration in **seconds**. | -| `TUNNEL_LIFESPAN` | `90` | `120` | the duration for which tunnel info is stored in ram, **in seconds**. | -| `TURNSTILE_SITEKEY` | ➖ | `1x00000000000000000000BB` | [cloudflare turnstile](https://www.cloudflare.com/products/turnstile/) sitekey used by browser clients to request a challenge.\*\* | -| `TURNSTILE_SECRET` | ➖ | `1x0000000000000000000000000000000AA` | [cloudflare turnstile](https://www.cloudflare.com/products/turnstile/) secret used by cobalt to verify the client successfully solved the challenge.\*\* | -| `JWT_SECRET` | ➖ | ➖ | the secret used for issuing JWT tokens for request authentication. to choose a value, generate a random, secure, long string (ideally >=16 characters).\*\* | -| `JWT_EXPIRY` | `120` | `240` | the duration of how long a cobalt-issued JWT token will remain valid, in seconds. | -| `API_KEY_URL` | ➖ | `file://keys.json` | the location of the api key database. for loading API keys, cobalt supports HTTP(S) urls, or local files by specifying a local path using the `file://` protocol. see the "api key file format" below for more details. | -| `API_AUTH_REQUIRED` | ➖ | `1` | when set to `1`, the user always needs to be authenticated in some way before they can access the API (either via an api key or via turnstile, if enabled). | -| `API_REDIS_URL` | ➖ | `redis://localhost:6379` | when set, cobalt uses redis instead of internal memory for the tunnel cache. | -| `API_INSTANCE_COUNT` | ➖ | `2` | supported only on Linux and node.js `>=23.1.0`. when configured, cobalt will spawn multiple sub-instances amongst which requests will be balanced. | -| `DISABLED_SERVICES` | ➖ | `bilibili,youtube` | comma-separated list which disables certain services from being used. | - -\* the higher the nice value, the lower the priority. [read more here](https://en.wikipedia.org/wiki/Nice_(Unix)). - -\*\* in order to enable turnstile bot protection, all three **`TURNSTILE_SITEKEY`, `TURNSTILE_SECRET` and `JWT_SECRET`** need to be set. - -#### FREEBIND_CIDR -setting a `FREEBIND_CIDR` allows cobalt to pick a random IP for every download and use it for all -requests it makes for that particular download. to use freebind in cobalt, you need to follow its [setup instructions](https://github.com/imputnet/freebind.js?tab=readme-ov-file#setup) first. if you configure this option while running cobalt -in a docker container, you also need to set the `API_LISTEN_ADDRESS` env to `127.0.0.1`, and set -`network_mode` for the container to `host`. - -## api key file format -the file is a JSON-serialized object with the following structure: -```typescript - -type KeyFileContents = Record< - UUIDv4String, - { - name?: string, - limit?: number | "unlimited", - ips?: (CIDRString | IPString)[], - userAgents?: string[] - } ->; -``` - -where *`UUIDv4String`* is a stringified version of a UUIDv4 identifier. -- **name** is a field for your own reference, it is not used by cobalt anywhere. - -- **`limit`** specifies how many requests the API key can make during the window specified in the `RATELIMIT_WINDOW` env. - - when omitted, the limit specified in `RATELIMIT_MAX` will be used. - - it can be also set to `"unlimited"`, in which case the API key bypasses all rate limits. - -- **`ips`** contains an array of allowlisted IP ranges, which can be specified both as individual ips or CIDR ranges (e.g. *`["192.168.42.69", "2001:db8::48", "10.0.0.0/8", "fe80::/10"]`*). - - when specified, only requests from these ip ranges can use the specified api key. - - when omitted, any IP can be used to make requests with that API key. - -- **`userAgents`** contains an array of allowed user agents, with support for wildcards (e.g. *`["cobaltbot/1.0", "Mozilla/5.0 * Chrome/*"]`*). - - when specified, requests with a `user-agent` that does not appear in this array will be rejected. - - when omitted, any user agent can be specified to make requests with that API key. - -- if both `ips` and `userAgents` are set, the tokens will be limited by both parameters. -- if cobalt detects any problem with your key file, it will be ignored and a warning will be printed to the console. - -an example key file could look like this: -```json -{ - "b5c7160a-b655-4c7a-b500-de839f094550": { - "limit": 10, - "ips": ["10.0.0.0/8", "192.168.42.42"], - "userAgents": ["*Chrome*"] - }, - "b00b1234-a3e5-99b1-c6d1-dba4512ae190": { - "limit": "unlimited", - "ips": ["192.168.1.2"], - "userAgents": ["cobaltbot/1.0"] - } -} -``` - -if you are configuring a key file, **do not use the UUID from the example** but instead generate your own. you can do this by running the following command if you have node.js installed: -`node -e "console.log(crypto.randomUUID())"` +## list of environment variables +[this section has moved](/docs/api-env-variables.md) to a dedicated document that is way easier to understand and maintain. go check it out! diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f21a795d..c20a3a8d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -56,8 +56,8 @@ importers: specifier: 1.0.3 version: 1.0.3 youtubei.js: - specifier: ^13.0.0 - version: 13.0.0 + specifier: ^13.3.0 + version: 13.3.0 zod: specifier: ^3.23.8 version: 3.23.8 @@ -188,8 +188,8 @@ packages: resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==} engines: {node: '>=6.0.0'} - '@bufbuild/protobuf@2.1.0': - resolution: {integrity: sha512-+2Mx67Y3skJ4NCD/qNSdBJNWtu6x6Qr53jeNg+QcwiL6mt0wK+3jwHH2x1p7xaYH6Ve2JKOVn0OxU35WsmqI9A==} + '@bufbuild/protobuf@2.2.5': + resolution: {integrity: sha512-/g5EzJifw5GF8aren8wZ/G5oMuPoGeS6MQD3ca8ddcvdXR5UELUfdTZITCGNhNXynY/AYl3Z4plmxdj/tRl/hQ==} '@datastructures-js/heap@4.3.3': resolution: {integrity: sha512-UcUu/DLh/aM4W3C8zZfwxxm6/6FIZUlm3mcAXuNOCa6Aj4iizNvNXQyb8DjZQH2jKSQbMRyNlngP6TPimuGjpQ==} @@ -1468,8 +1468,8 @@ packages: resolution: {integrity: sha512-bZsjR/iRjl1Nk1UkjGpAzLNfQtzuijhn2g+pbZb98HQ1Gk8vM9hfbxeMBP+M2/UUdwj0RqGG3mlvk2MsAqwvEw==} engines: {node: 20 || >=22} - jintr@3.2.0: - resolution: {integrity: sha512-psD1yf05kMKDNsUdW1l5YhO59pHScQ6OIHHb8W5SKSM2dCOFPsqolmIuSHgVA8+3Dc47NJR181CXZ4alCAPTkA==} + jintr@3.3.0: + resolution: {integrity: sha512-ZsaajJ4Hr5XR0tSPhOZOTjFhxA0qscKNSOs41NRjx7ZOGwpfdp8NKIBEUtvUPbA37JXyv1sJlgeOOZHjr3h76Q==} joycon@3.1.1: resolution: {integrity: sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==} @@ -2286,8 +2286,8 @@ packages: resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} engines: {node: '>=10'} - youtubei.js@13.0.0: - resolution: {integrity: sha512-b1QkN9bfgphK+5tI4qteSK54kNxmPhoedvMw0jl4uSn+L8gbDbJ4z52amNuYNcOdp4X/SI3JuUb+f5V0DPJ8Vw==} + youtubei.js@13.3.0: + resolution: {integrity: sha512-tbl7rxltpgKoSsmfGUe9JqWUAzv6HFLqrOn0N85EbTn5DLt24EXrjClnXdxyr3PBARMJ3LC4vbll100a0ABsYw==} zod@3.23.8: resolution: {integrity: sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==} @@ -2299,7 +2299,7 @@ snapshots: '@jridgewell/gen-mapping': 0.3.5 '@jridgewell/trace-mapping': 0.3.25 - '@bufbuild/protobuf@2.1.0': {} + '@bufbuild/protobuf@2.2.5': {} '@datastructures-js/heap@4.3.3': {} @@ -3519,7 +3519,7 @@ snapshots: dependencies: '@isaacs/cliui': 8.0.2 - jintr@3.2.0: + jintr@3.3.0: dependencies: acorn: 8.14.0 @@ -4242,10 +4242,10 @@ snapshots: yocto-queue@0.1.0: {} - youtubei.js@13.0.0: + youtubei.js@13.3.0: dependencies: - '@bufbuild/protobuf': 2.1.0 - jintr: 3.2.0 + '@bufbuild/protobuf': 2.2.5 + jintr: 3.3.0 tslib: 2.6.3 undici: 5.28.4 diff --git a/web/i18n/en/error.json b/web/i18n/en/error.json index 2788d9e4..2c347951 100644 --- a/web/i18n/en/error.json +++ b/web/i18n/en/error.json @@ -67,5 +67,7 @@ "api.youtube.token_expired": "couldn't get this video because the youtube token expired and i couldn't refresh it. try again in a few seconds, but if it still doesn't work, tell the instance owner about this error!", "api.youtube.no_hls_streams": "couldn't find any matching HLS streams for this video. try downloading it without HLS!", "api.youtube.api_error": "youtube updated something about its api and i couldn't get any info about this video. try again in a few seconds, but if this issue sticks, please report it!", - "api.youtube.temporary_disabled": "youtube downloading is temporarily disabled due to restrictions from youtube's side. we're already looking for ways to go around them.\n\nwe apologize for the inconvenience and are doing our best to restore this functionality. check cobalt's socials or github for timely updates!" + "api.youtube.temporary_disabled": "youtube downloading is temporarily disabled due to restrictions from youtube's side. we're already looking for ways to go around them.\n\nwe apologize for the inconvenience and are doing our best to restore this functionality. check cobalt's socials or github for timely updates!", + "api.youtube.drm": "this youtube video is protected by widevine DRM, so i can't download it. try a different link!", + "api.youtube.no_session_tokens": "couldn't get required session tokens for youtube. this may be caused by a restriction on youtube's side. try again in a few seconds, but if this issue sticks, please report it!" } diff --git a/web/package.json b/web/package.json index 24b59501..96900d0c 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "@imput/cobalt-web", - "version": "10.7.5", + "version": "10.9", "type": "module", "private": true, "scripts": { diff --git a/web/src/components/dialog/SavingDialog.svelte b/web/src/components/dialog/SavingDialog.svelte index 03127353..8f881c75 100644 --- a/web/src/components/dialog/SavingDialog.svelte +++ b/web/src/components/dialog/SavingDialog.svelte @@ -145,7 +145,7 @@ } .dialog-inner-container:focus-visible { - box-shadow: none; + box-shadow: none!important; } .dialog-inner-container { diff --git a/web/src/lib/api/api.ts b/web/src/lib/api/api.ts index 89fba727..07829480 100644 --- a/web/src/lib/api/api.ts +++ b/web/src/lib/api/api.ts @@ -3,7 +3,7 @@ import { get } from "svelte/store"; import settings from "$lib/state/settings"; import lazySettingGetter from "$lib/settings/lazy-get"; -import { getSession } from "$lib/api/session"; +import { getSession, resetSession } from "$lib/api/session"; import { currentApiURL } from "$lib/api/api-url"; import { turnstileEnabled, turnstileSolved } from "$lib/state/turnstile"; import cachedInfo from "$lib/state/server-info"; @@ -43,10 +43,10 @@ const getAuthorization = async () => { } } -const request = async (url: string) => { +const request = async (url: string, justRetried = false) => { const getSetting = lazySettingGetter(get(settings)); - const request = { + const requestBody = { url, downloadMode: getSetting("save", "downloadMode"), @@ -100,7 +100,7 @@ const request = async (url: string) => { method: "POST", redirect: "manual", signal: AbortSignal.timeout(20000), - body: JSON.stringify(request), + body: JSON.stringify(requestBody), headers: { "Accept": "application/json", "Content-Type": "application/json", @@ -119,9 +119,31 @@ const request = async (url: string) => { } }); + if ( + response?.status === 'error' + && response?.error.code === 'error.api.auth.jwt.invalid' + && !justRetried + ) { + resetSession(); + await waitForTurnstile().catch(() => {}); + return request(url, true); + } + return response; } +const waitForTurnstile = async () => { + await getAuthorization(); + return new Promise(resolve => { + const unsub = turnstileSolved.subscribe(solved => { + if (solved) { + unsub(); + resolve(); + } + }); + }); +} + const probeCobaltTunnel = async (url: string) => { const request = await fetch(`${url}&p=1`).catch(() => {}); if (request?.status === 200) { diff --git a/web/src/lib/api/session.ts b/web/src/lib/api/session.ts index 5b3e542b..40304672 100644 --- a/web/src/lib/api/session.ts +++ b/web/src/lib/api/session.ts @@ -62,3 +62,5 @@ export const getSession = async () => { } return newSession; } + +export const resetSession = () => cache = undefined;