Last active
December 22, 2022 01:42
-
-
Save luavixen/347cfff0038b159da51496d73ad6a09d to your computer and use it in GitHub Desktop.
Mirrors posts from federated users to your instance.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Mastodownload v1.1 | |
// Mirrors posts from federated users to your instance. | |
// | |
// Requires Node.js 18 or higher. | |
// | |
// Usage: | |
// $ mkdir mastodownload && cd mastodownload | |
// $ wget 'https://gist.githubusercontent.com/luavixen/347cfff0038b159da51496d73ad6a09d/raw/mastodownload.js' | |
// $ npm install debug | |
// $ node ./mastodownload.js | |
// | |
// CONFIG ////////////////////////////////////////////////// | |
var targets = [ | |
{ account: '@[email protected]' }, | |
{ account: '@[email protected]' }, | |
{ account: '@[email protected]' }, | |
{ account: '@[email protected]', max: 120 }, | |
{ account: '@[email protected]', max: 100 }, | |
{ account: '@[email protected]' }, | |
] | |
var concurrency = 2 | |
var downloadReplies = true | |
var downloadReblogs = true | |
var retryAttempts = 3 | |
var retryTimeout = 5000 | |
var instanceHost = 'vixen.zone' | |
var instanceToken = 'Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXX' | |
var agent = 'Mozilla/5.0 (compatible; SillyFoxCrawlingThroughYourPosts/1.1; +https://gist.githubusercontent.com/luavixen/347cfff0038b159da51496d73ad6a09d)' | |
// IMPLEMENTATION ////////////////////////////////////////// | |
var debug = require('debug') | |
debug.enable('*') | |
var retryable = [408, 425, 429, 502, 503, 504] | |
var request = async function (...options) { | |
var err | |
var response, body | |
try { | |
response = await fetch(...options) | |
} catch (cause) { | |
err = new Error('Request fetch failed: ' + cause?.message, { cause }) | |
err.retry = true | |
err.response = null | |
throw err | |
} | |
try { | |
body = await response.json() | |
} catch (cause) { | |
err = new Error('Request decode failed: ' + cause?.message, { cause }) | |
err.retry = false | |
err.response = response | |
throw err | |
} | |
var message = body?.error | |
if (message != null || !response.ok) { | |
err = new Error(`Request failed: "${message}"`) | |
err.retry = retryable.includes(response.status) | |
err.response = response | |
throw err | |
} | |
return body | |
} | |
var requestSearch = async function (log, query) { | |
var url = new URL(`https://${instanceHost}/api/v2/search`) | |
url.searchParams.set('type', 'statuses') | |
url.searchParams.set('limit', '1') | |
url.searchParams.set('resolve', 'true') | |
url.searchParams.set('q', query) | |
log('searching: ' + query) | |
try { | |
return await request(url, { | |
headers: { | |
'Accept': 'application/json', | |
'Authorization': instanceToken, | |
'User-Agent': agent, | |
} | |
}) | |
} catch (err) { | |
err.query = query | |
throw err | |
} | |
} | |
var requestLookup = async function (log, host, handle) { | |
var url = `https://${host}/api/v1/accounts/lookup?acct=` + encodeURIComponent(handle) | |
log('looking up: ' + url) | |
return await request(url, { headers: { 'User-Agent': agent } }) | |
} | |
var requestListing = async function* (log, host, id) { | |
var url = new URL(`https://${host}/api/v1/accounts/${id}/statuses`) | |
url.searchParams.set('limit', '40') | |
if (!downloadReplies) { | |
url.searchParams.set('exclude_replies', 'true') | |
} | |
if (!downloadReblogs) { | |
url.searchParams.set('exclude_reblogs', 'true') | |
} | |
for (;;) { | |
log('fetching statuses: ' + url) | |
var statuses = await request(url, { headers: { 'User-Agent': agent } }) | |
if (statuses.length < 1) { | |
return | |
} | |
for (var status of statuses) { | |
yield status | |
} | |
url.searchParams.set('max_id', statuses.at(-1).id) | |
} | |
} | |
var timeout = function (timeout) { | |
return new Promise((resolve) => setTimeout(resolve, timeout)) | |
} | |
var download = async function (log, host, handle, max) { | |
var user = await requestLookup(log, host, handle) | |
var count = 0 | |
for await (var status of requestListing(log, host, user.id)) { | |
var query = status.url | |
if (query == null || !query) continue | |
var result = /(\d+)\/activity$/.exec(query) | |
if (result) { | |
log('rewrite reblog: ' + query) | |
query = `https://${host}/@${handle}/${result[1]}` | |
} | |
var attempts = 0 | |
for (;;) { | |
try { | |
await requestSearch(log, query) | |
break | |
} catch (err) { | |
if (err.retry && ++attempts < retryAttempts) { | |
log('retrying after delay: %O', err) | |
await timeout(attempts * retryTimeout) | |
log('retrying now') | |
continue | |
} else { | |
throw err | |
} | |
} | |
} | |
count++ | |
if (max && max <= count) { | |
break | |
} | |
} | |
log('done') | |
} | |
var task = async function (location, max) { | |
var log = debug(location.handle + '@' + location.host) | |
log('starting task') | |
try { | |
await download(log, location.host, location.handle, max) | |
log('task complete') | |
} catch (err) { | |
log('task failed: %O', err) | |
} | |
} | |
var parse = function (account) { | |
var result = /@?([\w._-]+)@([\w._-]+)/.exec(account) | |
if (result) { | |
return { | |
host: result[2], | |
handle: result[1], | |
} | |
} else { | |
throw new SyntaxError(`Invalid account "${account}"`) | |
} | |
} | |
var main = async function () { | |
var list = targets.map((target) => { | |
return { | |
location: parse(target.account), | |
max: target.max && isFinite(target.max) ? Math.floor(target.max) : 80, | |
} | |
}) | |
var runner = async function () { | |
for (;;) { | |
var entry = list.shift() | |
if (entry == null) return | |
await task(entry.location, entry.max) | |
} | |
} | |
var runners = [] | |
for (var i = 0; i < concurrency; i++) { | |
runners.push(runner()) | |
} | |
await Promise.all(runners) | |
} | |
main().catch((err) => console.error(err)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment