From 47d15c2592c1127b0cc25c3017061d9b5cfd2453 Mon Sep 17 00:00:00 2001 From: vectflow Date: Sat, 29 Jun 2024 00:47:33 -0400 Subject: [PATCH 1/4] feat(linkedin): service to scrape linkedin video post for cdn urls serving the video --- src/modules/processing/services/linkedin.js | 62 +++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 src/modules/processing/services/linkedin.js diff --git a/src/modules/processing/services/linkedin.js b/src/modules/processing/services/linkedin.js new file mode 100644 index 00000000..71acefd8 --- /dev/null +++ b/src/modules/processing/services/linkedin.js @@ -0,0 +1,62 @@ +import { genericUserAgent } from "../../config.js"; + +const qualityMatch = { + "mp4-640p-30fp-crf28": 640, + "mp4-720p-30fp-crf28": 720 +}; + +export default async function (obj) { + const html = await fetch( + `https://www.linkedin.com/feed/update/urn:li:activity:${obj.postId}`, + { headers: { "user-agent": genericUserAgent } } + ) + .then((res) => res.text()) + .catch(() => {}); + + if (!html) { + return { error: "ErrorCouldntFetch" }; + } + + let data; + try { + const json = html + .split('data-sources="')[1] + .split('" data-poster-url="')[0] + .replaceAll(""", '"') + .replaceAll("&", "&"); + data = JSON.parse(json); + } catch (error) { + return { error: "ErrorCouldntFetch" }; + } + + let fallbackUrl; + const quality = obj.quality === "max" || obj.quality >= 720 ? 720 : 640; + const filenameBase = `linkedin_${obj.postId}`; + + for (const source of data) { + const videoQuality = qualityMatch[source.src.split("/")[6]]; + + if (videoQuality === quality) { + return { + urls: source.src, + filename: `${filenameBase}.mp4`, + audioFilename: `${filenameBase}_audio` + }; + // will prioritize using known quality over unknown quality if no matching quality + } else if (!videoQuality && !fallbackUrl) { + fallbackUrl = source.src; + } else { + fallbackUrl = source.src; + } + } + + if (fallbackUrl) { + return { + urls: fallbackUrl, + filename: `${filenameBase}.mp4`, + audioFilename: `${filenameBase}_audio` + }; + } + + return { error: "ErrorEmptyDownload" }; +} From 7f5a33c6756adbdb5ba79dc15a67c944894318b6 Mon Sep 17 00:00:00 2001 From: vectflow Date: Sat, 29 Jun 2024 00:50:53 -0400 Subject: [PATCH 2/4] feat(linkedin): correctly normalize and parse linkedin post urls --- src/modules/processing/match.js | 7 +++++++ src/modules/processing/servicesConfig.json | 6 +++++- src/modules/processing/servicesPatternTesters.js | 3 +++ src/modules/processing/url.js | 6 ++++++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/modules/processing/match.js b/src/modules/processing/match.js index 3e38c4db..118c4e69 100644 --- a/src/modules/processing/match.js +++ b/src/modules/processing/match.js @@ -25,6 +25,7 @@ import twitch from "./services/twitch.js"; import rutube from "./services/rutube.js"; import dailymotion from "./services/dailymotion.js"; import loom from "./services/loom.js"; +import linkedin from "./services/linkedin.js"; let freebind; @@ -193,6 +194,12 @@ export default async function(host, patternMatch, lang, obj) { id: patternMatch.id }); break; + case "linkedin": + r = await linkedin({ + postId: patternMatch.id, + quality: obj.vQuality + }); + break; default: return createResponse("error", { t: loc(lang, 'ErrorUnsupported') diff --git a/src/modules/processing/servicesConfig.json b/src/modules/processing/servicesConfig.json index d727b9a5..de6fd265 100644 --- a/src/modules/processing/servicesConfig.json +++ b/src/modules/processing/servicesConfig.json @@ -33,7 +33,6 @@ "vk": { "alias": "vk video & clips", "patterns": ["video:userId_:videoId", "clip:userId_:videoId", "clips:duplicate?z=clip:userId_:videoId"], - "subdomains": ["m"], "enabled": true }, "ok": { @@ -118,6 +117,11 @@ "alias": "loom videos", "patterns": ["share/:id"], "enabled": true + }, + "linkedin": { + "alias": "linkedin videos", + "patterns": ["feed/update/urn\\:li\\:activity\\:(:id)"], + "enabled": true } } } diff --git a/src/modules/processing/servicesPatternTesters.js b/src/modules/processing/servicesPatternTesters.js index ddeea31f..6cc38675 100644 --- a/src/modules/processing/servicesPatternTesters.js +++ b/src/modules/processing/servicesPatternTesters.js @@ -9,6 +9,9 @@ export const testers = { patternMatch.postId?.length <= 12 || (patternMatch.username?.length <= 30 && patternMatch.storyId?.length <= 24), + "linkedin": (patternMatch) => + patternMatch.id?.length === 19, + "loom": (patternMatch) => patternMatch.id?.length <= 32, diff --git a/src/modules/processing/url.js b/src/modules/processing/url.js index 111f1f6f..c2f861b1 100644 --- a/src/modules/processing/url.js +++ b/src/modules/processing/url.js @@ -70,6 +70,12 @@ function aliasURL(url) { url.hostname = 'instagram.com'; } break; + case "linkedin": + if (parts[1] === "posts") { + const postId = parts.pop().split("-").at(-2) + url = new URL(`https://linkedin.com/feed/update/urn:li:activity:${postId}`) + } + break; } return url From 80b4108a481e55ae74eae4c6d5377edad20d9d25 Mon Sep 17 00:00:00 2001 From: vectflow Date: Sat, 29 Jun 2024 00:51:47 -0400 Subject: [PATCH 3/4] test(linkedin): tests for linkedin functionality --- src/util/tests.json | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/util/tests.json b/src/util/tests.json index 501ac2c0..97687a1d 100644 --- a/src/util/tests.json +++ b/src/util/tests.json @@ -1160,5 +1160,42 @@ "code": 200, "status": "stream" } + }], + "linkedin": [{ + "name": "regular video (share link)", + "url": "https://www.linkedin.com/posts/jasonyoong_in-the-early-days-of-a-startup-sam-altman-activity-7211912701411827712-oR9m?utm_source=share", + "params": {}, + "expected": { + "code": 200, + "status": "stream" + } + }, { + "name": "regular video (feed link)", + "url": "https://www.linkedin.com/feed/update/urn:li:activity:7211912701411827712/", + "params": {}, + "expected": { + "code": 200, + "status": "stream" + } + }, { + "name": "regular video (isAudioMuted)", + "url": "https://www.linkedin.com/feed/update/urn:li:activity:7211912701411827712/", + "params": { + "isAudioMuted": true + }, + "expected": { + "code": 200, + "status": "stream" + } + }, { + "name": "regular video (isAudioOnly)", + "url": "https://www.linkedin.com/feed/update/urn:li:activity:7211912701411827712/", + "params": { + "isAudioOnly": true + }, + "expected": { + "code": 200, + "status": "stream" + } }] } From b730d9ac08b9045ee2794f5806193d2af0c5ab8c Mon Sep 17 00:00:00 2001 From: vectflow Date: Sat, 29 Jun 2024 00:53:44 -0400 Subject: [PATCH 4/4] docs(linkedin): add linkedin service to readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index d2bb064c..5fd1dd7d 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ this list is not final and keeps expanding over time. if support for a service y | bilibili.com & bilibili.tv | ✅ | ✅ | ✅ | ➖ | ➖ | | dailymotion | ✅ | ✅ | ✅ | ✅ | ✅ | | instagram posts & reels | ✅ | ✅ | ✅ | ➖ | ➖ | +| linkedin | ✅ | ✅ | ✅ | ❌ | ❌ | | loom | ✅ | ❌ | ✅ | ✅ | ➖ | | ok video | ✅ | ❌ | ✅ | ✅ | ✅ | | pinterest | ✅ | ✅ | ✅ | ➖ | ➖ | @@ -45,6 +46,7 @@ this list is not final and keeps expanding over time. if support for a service y | service | notes or features | | :-------- | :----- | | instagram | supports reels, photos, and videos. lets you pick what to save from multi-media posts. | +| linkedin | supports post and feed links. | | pinterest | supports photos, gifs, videos and stories. | | reddit | supports gifs and videos. | | rutube | supports yappy & private links. |