api: initial subtitles functionality with youtube support

this took way more effort than i expected it to do, mostly because of youtube locking everything down to shit

local processing doesn't function with subtitles yet, wasm needs to be updated
This commit is contained in:
wukko 2025-06-18 20:19:19 +06:00
parent 967552b26b
commit 259a0758f1
No known key found for this signature in database
GPG Key ID: 3E30B3F26C7B4AA2
5 changed files with 149 additions and 38 deletions

View File

@ -31,7 +31,8 @@ export default function({
createFilename(r.filenameAttributes, filenameStyle, isAudioOnly, isAudioMuted) : r.filename,
fileMetadata: !disableMetadata ? r.fileMetadata : false,
requestIP,
originalRequest: r.originalRequest
originalRequest: r.originalRequest,
subtitles: r.subtitles,
},
params = {};

View File

@ -114,6 +114,7 @@ export default async function({ host, patternMatch, params, isSession, isApiKey
isAudioMuted,
dubLang: params.youtubeDubLang,
youtubeHLS,
subtitleLang: params.subtitleLang,
}
if (url.hostname === "music.youtube.com" || isAudioOnly) {

View File

@ -87,6 +87,83 @@ const cloneInnertube = async (customFetch, useSession) => {
return yt;
}
const getHlsVariants = async (hlsManifest, dispatcher) => {
if (!hlsManifest) {
return { error: "youtube.no_hls_streams" };
}
const fetchedHlsManifest =
await fetch(hlsManifest, { dispatcher })
.then(r => r.status === 200 ? r.text() : undefined)
.catch(() => {});
if (!fetchedHlsManifest) {
return { error: "youtube.no_hls_streams" };
}
const variants = HLS.parse(fetchedHlsManifest).variants.sort(
(a, b) => Number(b.bandwidth) - Number(a.bandwidth)
);
if (!variants || variants.length === 0) {
return { error: "youtube.no_hls_streams" };
}
return variants;
}
const getSubtitles = async (info, dispatcher, subtitleLang) => {
const preferredCap = info.captions.caption_tracks.find(caption =>
caption.kind !== 'asr' && caption.language_code.startsWith(subtitleLang)
);
const captionsUrl = preferredCap?.base_url;
if (!captionsUrl) return;
if (!captionsUrl.includes("exp=xpe")) {
let url = new URL(captionsUrl);
url.searchParams.set('fmt', 'vtt');
return {
url: url.toString(),
language: preferredCap.language_code,
}
}
// if we have exp=xpe in the url, then captions are
// locked down and can't be accessed without a yummy potoken,
// so instead we just use subtitles from HLS
const hlsVariants = await getHlsVariants(
info.streaming_data.hls_manifest_url,
dispatcher
);
if (hlsVariants?.error) return;
// all variants usually have the same set of subtitles
const hlsSubtitles = hlsVariants[0]?.subtitles;
if (!hlsSubtitles?.length) return;
const preferredHls = hlsSubtitles.find(
subtitle => subtitle.language.startsWith(subtitleLang)
);
if (!preferredHls) return;
const fetchedHlsSubs =
await fetch(preferredHls.uri, { dispatcher })
.then(r => r.status === 200 ? r.text() : undefined)
.catch(() => {});
const parsedSubs = HLS.parse(fetchedHlsSubs);
if (!parsedSubs) return;
return {
url: parsedSubs.segments[0]?.uri,
language: preferredHls.language,
}
}
export default async function (o) {
const quality = o.quality === "max" ? 9000 : Number(o.quality);
@ -98,7 +175,8 @@ export default async function (o) {
useHLS = false;
}
if (useHLS) {
// we can get subtitles reliably only from the iOS client
if (useHLS || o.subtitleLang) {
innertubeClient = "IOS";
}
@ -222,37 +300,16 @@ export default async function (o) {
return videoQualities.find(qual => qual >= shortestSide);
}
let video, audio, dubbedLanguage,
let video, audio, subtitles, dubbedLanguage,
codec = o.format || "h264", itag = o.itag;
if (useHLS) {
const hlsManifest = info.streaming_data.hls_manifest_url;
if (!hlsManifest) {
return { error: "youtube.no_hls_streams" };
}
const fetchedHlsManifest = await fetch(hlsManifest, {
dispatcher: o.dispatcher,
}).then(r => {
if (r.status === 200) {
return r.text();
} else {
throw new Error("couldn't fetch the HLS playlist");
}
}).catch(() => { });
if (!fetchedHlsManifest) {
return { error: "youtube.no_hls_streams" };
}
const variants = HLS.parse(fetchedHlsManifest).variants.sort(
(a, b) => Number(b.bandwidth) - Number(a.bandwidth)
const variants = await getHlsVariants(
info.streaming_data.hls_manifest_url,
o.dispatcher
);
if (!variants || variants.length === 0) {
return { error: "youtube.no_hls_streams" };
}
if (variants?.error) return variants;
const matchHlsCodec = codecs => (
codecs.includes(hlsCodecList[codec].videoCodec)
@ -403,6 +460,13 @@ export default async function (o) {
if (!video) video = sorted_formats[codec].bestVideo;
}
if (o.subtitleLang && !o.isAudioOnly && info.captions?.caption_tracks?.length) {
const videoSubtitles = await getSubtitles(info, o.dispatcher, o.subtitleLang);
if (videoSubtitles) {
subtitles = videoSubtitles;
}
}
}
if (video?.drm_families || audio?.drm_families) {
@ -426,6 +490,10 @@ export default async function (o) {
}
}
if (subtitles) {
fileMetadata.sublanguage = subtitles.language;
}
const filenameAttributes = {
service: "youtube",
id: o.id,
@ -508,6 +576,7 @@ export default async function (o) {
video,
audio,
],
subtitles: subtitles?.url,
filenameAttributes,
fileMetadata,
isHLS: useHLS,

View File

@ -41,7 +41,10 @@ export function createStream(obj) {
audioFormat: obj.audioFormat,
isHLS: obj.isHLS || false,
originalRequest: obj.originalRequest
originalRequest: obj.originalRequest,
// url to a subtitle file
subtitles: obj.subtitles,
};
// FIXME: this is now a Promise, but it is not awaited
@ -94,6 +97,18 @@ export function createProxyTunnels(info) {
);
}
if (info.subtitles) {
proxyTunnels.push(
createStream({
url: info.subtitles,
type: "proxy",
service: `${info?.service}-subtitles`,
headers: info?.headers,
requestIP: info?.requestIP
})
);
}
return proxyTunnels;
}
@ -111,7 +126,7 @@ export function getInternalTunnelFromURL(url) {
return getInternalTunnel(id);
}
export function createInternalStream(url, obj = {}) {
export function createInternalStream(url, obj = {}, isSubtitles) {
assert(typeof url === 'string');
let dispatcher = obj.dispatcher;
@ -132,9 +147,12 @@ export function createInternalStream(url, obj = {}) {
headers = new Map(Object.entries(obj.headers));
}
// subtitles don't need special treatment unlike big media files
const service = isSubtitles ? `${obj.service}-subtitles` : obj.service;
internalStreamCache.set(streamID, {
url,
service: obj.service,
service,
headers,
controller,
dispatcher,
@ -245,6 +263,14 @@ function wrapStream(streamInfo) {
}
} else throw 'invalid urls';
if (streamInfo.subtitles) {
streamInfo.subtitles = createInternalStream(
streamInfo.subtitles,
streamInfo,
/*isSubtitles=*/true
);
}
return streamInfo;
}

View File

@ -25,6 +25,7 @@ const metadataTags = [
"album_artist",
"track",
"date",
"sublanguage"
];
const convertMetadataToFFmpeg = (metadata) => {
@ -32,6 +33,10 @@ const convertMetadataToFFmpeg = (metadata) => {
for (const [ name, value ] of Object.entries(metadata)) {
if (metadataTags.includes(name)) {
if (name === "sublanguage") {
args.push('-metadata:s:s:0', `language=${value}`);
continue;
}
args.push('-metadata', `${name}=${value.replace(/[\u0000-\u0009]/g, "")}`); // skipcq: JS-0004
} else {
throw `${name} metadata tag is not supported.`;
@ -109,9 +114,6 @@ const merge = async (streamInfo, res) => {
streamInfo.urls.map(destroyInternalStream)
);
const headers = getHeaders(streamInfo.service);
const rawHeaders = toRawHeaders(headers);
try {
if (streamInfo.urls.length !== 2) return shutdown();
@ -119,13 +121,21 @@ const merge = async (streamInfo, res) => {
let args = [
'-loglevel', '-8',
'-headers', rawHeaders,
'-i', streamInfo.urls[0],
'-headers', rawHeaders,
'-i', streamInfo.urls[1],
];
if (streamInfo.subtitles) {
args.push(
'-i', streamInfo.subtitles,
'-map', '2:s'
);
};
args.push(
'-map', '0:v',
'-map', '1:a',
]
);
args = args.concat(ffmpegArgs[format]);
@ -137,8 +147,12 @@ const merge = async (streamInfo, res) => {
}
}
if (streamInfo.subtitles && format === "mp4") {
args.push('-c:s', 'mov_text');
}
if (streamInfo.metadata) {
args = args.concat(convertMetadataToFFmpeg(streamInfo.metadata))
args = args.concat(convertMetadataToFFmpeg(streamInfo.metadata));
}
args.push('-f', format, 'pipe:3');