Last active
March 21, 2025 03:44
-
-
Save futurGH/2ee18d385eff3ba98f5b35b9dcac0aed to your computer and use it in GitHub Desktop.
atproto relay resources -- docker compose file + a script that uses https://github.com/mary-ext/atproto-scraping to subscribe to all known PDSes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: "3.9" | |
volumes: | |
relay: | |
name: relay | |
services: | |
relay: | |
ports: | |
- 2470:2470 | |
build: | |
context: ./indigo | |
dockerfile: cmd/relay/Dockerfile | |
command: /relay | |
environment: | |
- RELAY_ADMIN_KEY=Password1 | |
- RELAY_DEFAULT_REPO_LIMIT=600_000 # ~lines up with the number of repos on each bsky.social PDS | |
volumes: | |
- relay:/data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
declare global { | |
namespace NodeJS { | |
interface ProcessEnv { | |
BGS_ADDRESS: string; | |
BGS_ADMIN_KEY: string; | |
} | |
} | |
} | |
for (const envVar of ["BGS_ADDRESS", "BGS_ADMIN_KEY"]) { | |
if (!process.env[envVar]) throw new Error(`Missing env var ${envVar}`); | |
} | |
async function main() { | |
const pdses = (await fetchPdses()).map((url) => new URL(url)); | |
console.log("Requesting crawls..."); | |
await Promise.all(pdses.map(async (url) => { | |
try { | |
const res = await fetch(`${process.env.BGS_ADDRESS}/admin/pds/requestCrawl`, { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
Authorization: `Bearer ${process.env.BGS_ADMIN_KEY}`, | |
}, | |
body: JSON.stringify({ | |
hostname: "https://" + url.hostname, | |
per_second: 200, | |
per_hour: 150 * 60 * 60, | |
per_day: 120 * 60 * 60 * 24, | |
crawl_rate: 50, | |
repo_limit: 1_000_000, | |
}), | |
}); | |
if (!res.ok) { | |
console.error( | |
`Error requesting crawl for ${url.hostname}: ${res.status} ${res.statusText} — ${await res | |
.json().then((r: any) => r?.error || "unknown error")}`, | |
); | |
} | |
} catch (err) { | |
console.error(`Network error requesting crawl for ${url.hostname}: ${err}`); | |
} | |
})); | |
console.log("Done crawling!"); | |
} | |
async function fetchPdses(): Promise<Array<string>> { | |
const data = await fetch( | |
"https://raw.githubusercontent.com/mary-ext/atproto-scraping/refs/heads/trunk/state.json", | |
).then((res) => res.ok ? res.json() as any : null); | |
if (!data?.pdses) throw new Error("Failed to fetch PDSes"); | |
const pdses = Object.keys(data.pdses).filter((pds) => pds.startsWith("https://")); | |
return pdses; | |
} | |
void main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment