1
0
mirror of https://git.sr.ht/~cadence/bibliogram synced 2025-12-14 02:35:06 +00:00

Allow got as request backend

This commit is contained in:
Cadence Fish
2020-03-15 19:50:29 +13:00
parent a861df2662
commit 3efc4928a5
10 changed files with 439 additions and 51 deletions

View File

@@ -71,41 +71,41 @@ function fetchUserFromHTML(username) {
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => {
}).then(async g => {
const res = await g.response()
if (res.status === 404) {
throw constants.symbols.NOT_FOUND
} else {
return res.text().then(text => {
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User")
const sharedData = extractSharedData(text)
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user)
history.report("user", true)
if (constants.caching.db_user_id) {
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)
db.prepare(
"REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
).run({
username: user.data.username,
user_id: user.data.id,
created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(),
updated: Date.now(),
updated_version: constants.database_version,
biography: user.data.biography || null,
post_count: user.posts || 0,
following_count: user.following || 0,
followed_by_count: user.followedBy || 0,
external_url: user.data.external_url || null,
full_name: user.data.full_name || null,
is_private: +user.data.is_private,
is_verified: +user.data.is_verified,
profile_pic_url: user.data.profile_pic_url
})
}
return user
})
const text = await g.text()
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User")
const sharedData = extractSharedData(text)
const user = new User(sharedData.entry_data.ProfilePage[0].graphql.user)
history.report("user", true)
if (constants.caching.db_user_id) {
const existing = db.prepare("SELECT created, updated_version FROM Users WHERE username = ?").get(user.data.username)
db.prepare(
"REPLACE INTO Users (username, user_id, created, updated, updated_version, biography, post_count, following_count, followed_by_count, external_url, full_name, is_private, is_verified, profile_pic_url) VALUES "
+"(@username, @user_id, @created, @updated, @updated_version, @biography, @post_count, @following_count, @followed_by_count, @external_url, @full_name, @is_private, @is_verified, @profile_pic_url)"
).run({
username: user.data.username,
user_id: user.data.id,
created: existing && existing.updated_version === constants.database_version ? existing.created : Date.now(),
updated: Date.now(),
updated_version: constants.database_version,
biography: user.data.biography || null,
post_count: user.posts || 0,
following_count: user.following || 0,
followed_by_count: user.followedBy || 0,
external_url: user.data.external_url || null,
full_name: user.data.full_name || null,
is_private: +user.data.is_private,
is_verified: +user.data.is_verified,
profile_pic_url: user.data.profile_pic_url
})
}
return user
}
}).catch(error => {
if (error === constants.symbols.INSTAGRAM_DEMANDS_LOGIN || error === constants.symbols.RATE_LIMITED) {
@@ -202,8 +202,7 @@ function fetchTimelinePage(userID, after) {
return requestCache.getOrFetchPromise(`page/${userID}/${after}`, () => {
return switcher.request("timeline_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
}).then(g => g.json()).then(root => {
/** @type {import("./types").PagedEdges<import("./types").TimelineEntryN2>} */
const timeline = root.data.user.edge_owner_to_timeline_media
history.report("timeline", true)
@@ -259,7 +258,6 @@ function fetchShortcodeData(shortcode) {
return requestCache.getOrFetchPromise("shortcode/"+shortcode, () => {
return switcher.request("post_graphql", `https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
/** @type {import("./types").TimelineEntryN3} */
const data = root.data.shortcode_media

View File

@@ -20,6 +20,7 @@ let constants = {
reel_graphql: true
}
},
request_backend: "node-fetch", // one of: "node-fetch", "got"
// After setting your privacy policy, I suggest you read src/site/html/.well-known/dnt-policy.txt. If you comply with it,
// change this to `true` to serve it, which will make extensions like Privacy Badger automatically whitelist the domain.
does_not_track: false,

View File

@@ -1,16 +1,39 @@
const fetch = require("node-fetch").default
const NodeFetch = require("./requestbackends/node-fetch")
const Got = require("./requestbackends/got")
const constants = require("../constants")
const userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
const backendStatusLineMap = new Map([
["node-fetch", "NF "],
["got", "GOT"]
])
/**
* @returns {import("./requestbackends/reference")}
*/
function request(url, options = {}, settings = {}) {
if (settings.statusLine === undefined) settings.statusLine = "OUT"
if (settings.log === undefined) settings.log = true
if (settings.log) console.log(` -> [${settings.statusLine}] ${url}`) // todo: make more like pinski?
// @ts-ignore
return fetch(url, Object.assign({
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
},
redirect: "manual"
}, options))
if (settings.log) console.log(` -> [${settings.statusLine}-${backendStatusLineMap.get(constants.request_backend)}] ${url}`) // todo: make more like pinski?
if (constants.request_backend === "node-fetch") {
return new NodeFetch(url, Object.assign({
headers: {
"User-Agent": userAgent
},
redirect: "manual"
}, options))
} else if (constants.request_backend === "got") {
return new Got(url, Object.assign({
headers: {
"User-Agent": userAgent
},
followRedirect: false
}, options))
} else {
throw new Error("Invalid value for setting `request_backend`.")
}
}
module.exports.request = request

View File

@@ -0,0 +1,46 @@
try {
var got = require("got").default
} catch (e) {}
class Got {
constructor(url, options, stream) {
if (!got) throw new Error("`got` is not installed, either install it or set a different request backend.")
this.url = url
this.options = options
}
stream() {
return Promise.resolve(got.stream(this.url, this.options))
}
send() {
if (!this.instance) {
this.instance = got(this.url, this.options)
}
return this
}
/**
* @returns {Promise<import("./reference").GrabResponse>}
*/
response() {
return this.send().instance.then(res => ({
status: res.statusCode
}))
}
async check(test) {
await this.send().response().then(res => test(res))
return this
}
json() {
return this.send().instance.json()
}
text() {
return this.send().instance.text()
}
}
module.exports = Got

View File

@@ -0,0 +1,30 @@
const fetch = require("node-fetch").default
class NodeFetch {
constructor(url, options) {
this.instance = fetch(url, options)
}
stream() {
return this.instance.then(res => res.body)
}
response() {
return this.instance
}
json() {
return this.instance.then(res => res.json())
}
text() {
return this.instance.then(res => res.text())
}
async check(test) {
await this.response().then(res => test(res))
return this
}
}
module.exports = NodeFetch

View File

@@ -0,0 +1,45 @@
/**
* @typedef GrabResponse
* @property {number} status
*/
// @ts-nocheck
class GrabReference {
/**
* @param {string} url
* @param {any} options
*/
constructor(url, options) {
throw new Error("This is the reference class, do not instantiate it.")
}
// Please help me type this
/**
* @returns {Promise<any>}
*/
stream() {}
/**
* @returns {Promise<GrabResponse>}
*/
response() {}
/**
* @returns {Promise<any>}
*/
json() {}
/**
* @returns {Promise<string>}
*/
text() {}
/**
* @param {(res: GrabResponse) => any}
* @returns {Promise<Reference>}
*/
check(test) {}
}
module.exports = GrabReference

View File

@@ -21,15 +21,14 @@ class TorSwitcher {
* If the test function fails, its error will be rejected here.
* Only include rate limit logic in the test function!
* @param {string} url
* @param {(res: import("node-fetch").Response) => Promise<T>} test
* @returns {Promise<T>}
* @template T the return value of the test function
* @param {(res: import("./requestbackends/reference").GrabResponse) => any} test
* @returns {Promise<import("./requestbackends/reference")>}
*/
request(type, url, test) {
if (this.torManager && constants.tor.for[type]) {
return this.torManager.request(url, test)
} else {
return request(url).then(res => test(res))
return request(url).check(test)
}
}
}

View File

@@ -36,18 +36,25 @@ module.exports = [
Some thumbnails aren't square and would otherwise be stretched on the page without this.
If I cropped the images client side, it would have to be done with CSS background-image, which means no <img srcset>.
*/
return request(verifyResult.url, {}, {log: false}).then(res => {
return request(verifyResult.url, {}, {log: false}).stream().then(body => {
const converter = sharp().resize(width, width, {position: "entropy"})
body.on("error", error => {
console.error("Response stream emitted an error:", error)
})
converter.on("error", error => {
console.error("Sharp instance emitted an error:", error)
})
const piped = body.pipe(converter)
piped.on("error", error => {
console.error("Piped stream emitted na error:", error)
})
return {
statusCode: 200,
contentType: "image/jpeg",
headers: {
"Cache-Control": constants.caching.image_cache_control
},
stream: res.body.pipe(converter)
stream: piped
}
})
} else {