Skip to content

Instantly share code, notes, and snippets.

@futurGH
Last active March 21, 2025 03:44
Show Gist options
  • Save futurGH/2ee18d385eff3ba98f5b35b9dcac0aed to your computer and use it in GitHub Desktop.
Save futurGH/2ee18d385eff3ba98f5b35b9dcac0aed to your computer and use it in GitHub Desktop.
atproto relay resources -- docker compose file + a script that uses https://github.com/mary-ext/atproto-scraping to subscribe to all known PDSes
version: "3.9"
volumes:
relay:
name: relay
services:
relay:
ports:
- 2470:2470
build:
context: ./indigo
dockerfile: cmd/relay/Dockerfile
command: /relay
environment:
- RELAY_ADMIN_KEY=Password1
- RELAY_DEFAULT_REPO_LIMIT=600_000 # ~lines up with the number of repos on each bsky.social PDS
volumes:
- relay:/data
declare global {
namespace NodeJS {
interface ProcessEnv {
BGS_ADDRESS: string;
BGS_ADMIN_KEY: string;
}
}
}
for (const envVar of ["BGS_ADDRESS", "BGS_ADMIN_KEY"]) {
if (!process.env[envVar]) throw new Error(`Missing env var ${envVar}`);
}
async function main() {
const pdses = (await fetchPdses()).map((url) => new URL(url));
console.log("Requesting crawls...");
await Promise.all(pdses.map(async (url) => {
try {
const res = await fetch(`${process.env.BGS_ADDRESS}/admin/pds/requestCrawl`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${process.env.BGS_ADMIN_KEY}`,
},
body: JSON.stringify({
hostname: "https://" + url.hostname,
per_second: 200,
per_hour: 150 * 60 * 60,
per_day: 120 * 60 * 60 * 24,
crawl_rate: 50,
repo_limit: 1_000_000,
}),
});
if (!res.ok) {
console.error(
`Error requesting crawl for ${url.hostname}: ${res.status} ${res.statusText} — ${await res
.json().then((r: any) => r?.error || "unknown error")}`,
);
}
} catch (err) {
console.error(`Network error requesting crawl for ${url.hostname}: ${err}`);
}
}));
console.log("Done crawling!");
}
async function fetchPdses(): Promise<Array<string>> {
const data = await fetch(
"https://raw.githubusercontent.com/mary-ext/atproto-scraping/refs/heads/trunk/state.json",
).then((res) => res.ok ? res.json() as any : null);
if (!data?.pdses) throw new Error("Failed to fetch PDSes");
const pdses = Object.keys(data.pdses).filter((pds) => pds.startsWith("https://"));
return pdses;
}
void main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment