1
0
mirror of https://git.sr.ht/~cadence/bibliogram synced 2025-12-14 18:45:06 +00:00

Optional Tor support

This commit is contained in:
Cadence Fish
2020-02-03 00:43:56 +13:00
parent b944042fe0
commit 96fa4758c0
11 changed files with 597 additions and 125 deletions

View File

@@ -1,5 +1,6 @@
const constants = require("./constants")
const {request} = require("./utils/request")
const switcher = require("./utils/torswitcher")
const {extractSharedData} = require("./utils/body")
const {TtlCache, RequestCache} = require("./cache")
const RequestHistory = require("./structures/RequestHistory")
@@ -50,17 +51,19 @@ function fetchTimelinePage(userID, after) {
after: after
}))
return requestCache.getOrFetchPromise("page/"+after, () => {
return request(`https://www.instagram.com/graphql/query/?${p.toString()}`).then(res => res.json()).then(root => {
if (!root.data) {
return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
/** @type {import("./types").PagedEdges<import("./types").TimelineEntryN2>} */
const timeline = root.data.user.edge_owner_to_timeline_media
history.report("timeline", true)
return timeline
}).catch(error => {
if (error === constants.symbols.RATE_LIMITED) {
history.report("timeline", false)
console.error("missing data from timeline request, 429?", root) //todo: please make this better.
throw new Error("missing data from timeline request, 429?")
} else {
/** @type {import("./types").PagedEdges<import("./types").TimelineEntryN2>} */
const timeline = root.data.user.edge_owner_to_timeline_media
history.report("timeline", true)
return timeline
}
throw error
})
})
}
@@ -105,26 +108,28 @@ function fetchShortcodeData(shortcode) {
p.set("query_hash", constants.external.shortcode_query_hash)
p.set("variables", JSON.stringify({shortcode}))
return requestCache.getOrFetchPromise("shortcode/"+shortcode, () => {
return request(`https://www.instagram.com/graphql/query/?${p.toString()}`).then(res => res.json()).then(root => {
if (!root.data) {
history.report("post", false)
console.error("missing data from post request, 429?", root) //todo: please make this better.
throw new Error("missing data from post request, 429?")
return switcher.request(`https://www.instagram.com/graphql/query/?${p.toString()}`, async res => {
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(res => res.json()).then(root => {
/** @type {import("./types").TimelineEntryN3} */
const data = root.data.shortcode_media
if (data == null) {
// the thing doesn't exist
throw constants.symbols.NOT_FOUND
} else {
/** @type {import("./types").TimelineEntryN3} */
const data = root.data.shortcode_media
if (data == null) {
// the thing doesn't exist
throw constants.symbols.NOT_FOUND
} else {
history.report("post", true)
if (constants.caching.db_post_n3) {
db.prepare("REPLACE INTO Posts (shortcode, id, id_as_numeric, username, json) VALUES (@shortcode, @id, @id_as_numeric, @username, @json)")
.run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)})
}
return data
history.report("post", true)
if (constants.caching.db_post_n3) {
db.prepare("REPLACE INTO Posts (shortcode, id, id_as_numeric, username, json) VALUES (@shortcode, @id, @id_as_numeric, @username, @json)")
.run({shortcode: data.shortcode, id: data.id, id_as_numeric: data.id, username: data.owner.username, json: JSON.stringify(data)})
}
return data
}
}).catch(error => {
if (error === constants.symbols.RATE_LIMITED) {
history.report("post", false)
}
throw error
})
})
}

View File

@@ -7,6 +7,8 @@
let constants = {
// Things that server owners _should_ change!
website_origin: "http://localhost:10407",
use_tor: false, // Whether to enable Tor support at all
tor_password: null, // No effect without `use_tor = true`. If `null`, node will run its own Tor process instead.
// Things that server owners _could_ change if they want to.
settings: {
@@ -25,6 +27,7 @@ let constants = {
external: {
user_query_hash: "c9100bf9110dd6361671f113dd02e7d6",
timeline_query_hash: "e769aa130647d2354c40ea6a439bfc08",
timeline_query_hash_2: "42323d64886122307be10013ad2dcc44", // https://github.com/rarcega/instagram-scraper/blob/dc022081dbefc81500c5f70cce5c70cfd2816e3c/instagram_scraper/constants.py#L30
shortcode_query_hash: "2b0673e0dc4580674a88d426fe00ea90",
timeline_fetch_first: 12,
username_regex: "[\\w.]+",
@@ -45,7 +48,8 @@ let constants = {
TYPE_GALLERY_VIDEO: Symbol("TYPE_GALLERY_VIDEO"),
NOT_FOUND: Symbol("NOT_FOUND"),
NO_SHARED_DATA: Symbol("NO_SHARED_DATA"),
INSTAGRAM_DEMANDS_LOGIN: Symbol("INSTAGRAM_DEMANDS_LOGIN")
INSTAGRAM_DEMANDS_LOGIN: Symbol("INSTAGRAM_DEMANDS_LOGIN"),
RATE_LIMITED: Symbol("RATE_LIMITED")
},
database_version: 1

View File

@@ -1,13 +1,16 @@
const fetch = require("node-fetch").default
function request(url) {
console.log("-> [OUT]", String(url)) // todo: make more like pinski?
return fetch(url, {
function request(url, options = {}, settings = {}) {
if (settings.statusLine === undefined) settings.statusLine = "OUT"
if (settings.log === undefined) settings.log = true
if (settings.log) console.log(`-> [${settings.statusLine}] ${url}`) // todo: make more like pinski?
// @ts-ignore
return fetch(url, Object.assign({
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
},
redirect: "manual"
})
}, options))
}
module.exports.request = request

79
src/lib/utils/tor.js Normal file
View File

@@ -0,0 +1,79 @@
const SocksProxyAgent = require("socks-proxy-agent")
const {connect} = require("net");
const constants = require("../constants")
const {request} = require("./request")
class TorManager {
/**
* @param {import("@deadcanaries/granax/lib/controller")} tor
* @param {number} port
*/
constructor(tor, port) {
this.tor = tor
this.port = port
this.agent = new SocksProxyAgent("socks5://localhost:"+this.port)
}
async request(url, test) {
let result = null
while (!result) {
const req = await request(url, {agent: this.agent}, {log: true, statusLine: "TOR"})
try {
result = await test(req)
} catch (e) {
await this.newCircuit()
}
}
return result
}
newCircuit() {
return new Promise(resolve => {
this.tor.cleanCircuits(() => resolve())
})
}
}
try {
var granax = require("@deadcanaries/granax")
} catch (e) {}
/** @type {Promise<TorManager>} */
module.exports = new Promise(resolve => {
if (granax) {
/** @type {import("@deadcanaries/granax/lib/controller")} */
// @ts-ignore
let tor
if (constants.tor_password == null) {
// @ts-ignore
tor = new granax()
} else {
tor = new granax.TorController(connect(9051), {authOnConnect: false})
tor.authenticate(`"${constants.tor_password}"`, err => {
if (err) console.log("Tor auth error:", err)
})
}
console.log("Starting tor...")
tor.once("ready", () => {
tor.getInfo("net/listeners/socks", (err, result) => {
if (err) throw err
// result is string containing something like "127.0.0.1:36977"
// yes, the string contains double quotes!
const port = +result.match(/:(\d+)/)[1]
const torManager = new TorManager(tor, port)
console.log("Tor is ready, using SOCKS port "+port)
resolve(torManager)
})
})
tor.on("error", function() {
console.log("Tor error!")
console.log(...arguments)
})
} else {
console.log("Note: Tor functionality not installed. You may wish to run `npm install @deadcanaries/granax`. (78+ MB download required.)")
resolve(null)
}
})

View File

@@ -0,0 +1,41 @@
const constants = require("../constants")
const {request} = require("./request")
class TorSwitcher {
constructor() {
this.torManager = null
}
setManager(torManager) {
this.torManager = torManager
}
/**
* Request from the URL.
* The test function will be called with the response object.
* If the test function succeeds, its return value will be returned here.
* If the test function fails, its error will be rejected here.
* Only include rate limit logic in the test function!
* @param {string} url
* @param {(res: import("node-fetch").Response) => Promise<T>} test
* @returns {Promise<T>}
* @template T the return value of the test function
*/
request(url, test) {
if (this.torManager) {
return this.torManager.request(url, test)
} else {
return request(url).then(res => test(res))
}
}
}
const switcher = new TorSwitcher()
if (constants.use_tor) {
require("./tor").then(torManager => {
if (torManager) switcher.setManager(torManager)
})
}
module.exports = switcher

View File

@@ -3,8 +3,6 @@ const {request} = require("../../lib/utils/request")
const {proxy} = require("pinski/plugins")
const sharp = require("sharp")
const VERIFY_SUCCESS = Symbol("VERIFY_SUCCESS")
/**
* Check that a resource is on Instagram.
* @param {URL} completeURL
@@ -38,7 +36,7 @@ module.exports = [
Some thumbnails aren't square and would otherwise be stretched on the page without this.
If I cropped the images client side, it would have to be done with CSS background-image, which means no <img srcset>.
*/
return request(verifyResult.url).then(res => {
return request(verifyResult.url, {}, {log: false}).then(res => {
const converter = sharp().resize(width, width, {position: "entropy"})
return {
statusCode: 200,

View File

@@ -175,6 +175,7 @@ body
color: #111
background-color: rgba(40, 40, 40, 0.25)
text-decoration: none
overflow: hidden
@include sized
&:hover

View File

@@ -17,6 +17,9 @@ subdirs("pug", (err, dirs) => {
pinski.addAPIDir("html/static/js/templates/api")
pinski.addSassDir("sass")
pinski.addAPIDir("api")
pinski.muteLogsStartingWith("/imageproxy")
pinski.muteLogsStartingWith("/videoproxy")
pinski.muteLogsStartingWith("/static")
pinski.startServer()
pinski.enableWS()