Skip to content

Instantly share code, notes, and snippets.

@luavixen
Last active December 22, 2022 01:42
Show Gist options
  • Save luavixen/347cfff0038b159da51496d73ad6a09d to your computer and use it in GitHub Desktop.
Save luavixen/347cfff0038b159da51496d73ad6a09d to your computer and use it in GitHub Desktop.
Mirrors posts from federated users to your instance.
//
// Mastodownload v1.1
// Mirrors posts from federated users to your instance.
//
// Requires Node.js 18 or higher.
//
// Usage:
// $ mkdir mastodownload && cd mastodownload
// $ wget 'https://gist.githubusercontent.com/luavixen/347cfff0038b159da51496d73ad6a09d/raw/mastodownload.js'
// $ npm install debug
// $ node ./mastodownload.js
//
// CONFIG //////////////////////////////////////////////////
var targets = [
{ account: '@[email protected]' },
{ account: '@[email protected]' },
{ account: '@[email protected]' },
{ account: '@[email protected]', max: 120 },
{ account: '@[email protected]', max: 100 },
{ account: '@[email protected]' },
]
var concurrency = 2
var downloadReplies = true
var downloadReblogs = true
var retryAttempts = 3
var retryTimeout = 5000
var instanceHost = 'vixen.zone'
var instanceToken = 'Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
var agent = 'Mozilla/5.0 (compatible; SillyFoxCrawlingThroughYourPosts/1.1; +https://gist.githubusercontent.com/luavixen/347cfff0038b159da51496d73ad6a09d)'
// IMPLEMENTATION //////////////////////////////////////////
var debug = require('debug')
debug.enable('*')
var retryable = [408, 425, 429, 502, 503, 504]
var request = async function (...options) {
var err
var response, body
try {
response = await fetch(...options)
} catch (cause) {
err = new Error('Request fetch failed: ' + cause?.message, { cause })
err.retry = true
err.response = null
throw err
}
try {
body = await response.json()
} catch (cause) {
err = new Error('Request decode failed: ' + cause?.message, { cause })
err.retry = false
err.response = response
throw err
}
var message = body?.error
if (message != null || !response.ok) {
err = new Error(`Request failed: "${message}"`)
err.retry = retryable.includes(response.status)
err.response = response
throw err
}
return body
}
var requestSearch = async function (log, query) {
var url = new URL(`https://${instanceHost}/api/v2/search`)
url.searchParams.set('type', 'statuses')
url.searchParams.set('limit', '1')
url.searchParams.set('resolve', 'true')
url.searchParams.set('q', query)
log('searching: ' + query)
try {
return await request(url, {
headers: {
'Accept': 'application/json',
'Authorization': instanceToken,
'User-Agent': agent,
}
})
} catch (err) {
err.query = query
throw err
}
}
var requestLookup = async function (log, host, handle) {
var url = `https://${host}/api/v1/accounts/lookup?acct=` + encodeURIComponent(handle)
log('looking up: ' + url)
return await request(url, { headers: { 'User-Agent': agent } })
}
var requestListing = async function* (log, host, id) {
var url = new URL(`https://${host}/api/v1/accounts/${id}/statuses`)
url.searchParams.set('limit', '40')
if (!downloadReplies) {
url.searchParams.set('exclude_replies', 'true')
}
if (!downloadReblogs) {
url.searchParams.set('exclude_reblogs', 'true')
}
for (;;) {
log('fetching statuses: ' + url)
var statuses = await request(url, { headers: { 'User-Agent': agent } })
if (statuses.length < 1) {
return
}
for (var status of statuses) {
yield status
}
url.searchParams.set('max_id', statuses.at(-1).id)
}
}
var timeout = function (timeout) {
return new Promise((resolve) => setTimeout(resolve, timeout))
}
var download = async function (log, host, handle, max) {
var user = await requestLookup(log, host, handle)
var count = 0
for await (var status of requestListing(log, host, user.id)) {
var query = status.url
if (query == null || !query) continue
var result = /(\d+)\/activity$/.exec(query)
if (result) {
log('rewrite reblog: ' + query)
query = `https://${host}/@${handle}/${result[1]}`
}
var attempts = 0
for (;;) {
try {
await requestSearch(log, query)
break
} catch (err) {
if (err.retry && ++attempts < retryAttempts) {
log('retrying after delay: %O', err)
await timeout(attempts * retryTimeout)
log('retrying now')
continue
} else {
throw err
}
}
}
count++
if (max && max <= count) {
break
}
}
log('done')
}
var task = async function (location, max) {
var log = debug(location.handle + '@' + location.host)
log('starting task')
try {
await download(log, location.host, location.handle, max)
log('task complete')
} catch (err) {
log('task failed: %O', err)
}
}
var parse = function (account) {
var result = /@?([\w._-]+)@([\w._-]+)/.exec(account)
if (result) {
return {
host: result[2],
handle: result[1],
}
} else {
throw new SyntaxError(`Invalid account "${account}"`)
}
}
var main = async function () {
var list = targets.map((target) => {
return {
location: parse(target.account),
max: target.max && isFinite(target.max) ? Math.floor(target.max) : 80,
}
})
var runner = async function () {
for (;;) {
var entry = list.shift()
if (entry == null) return
await task(entry.location, entry.max)
}
}
var runners = []
for (var i = 0; i < concurrency; i++) {
runners.push(runner())
}
await Promise.all(runners)
}
main().catch((err) => console.error(err))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment