|
/* |
|
* Discord Visible-DOM AI Exporter |
|
* License: MIT |
|
* |
|
* Paste this whole file into DevTools Console while viewing a Discord channel. |
|
* It collects only messages that Discord has already rendered in your browser. |
|
* It does not extract tokens, make network requests, call Discord APIs, bypass permissions, or access hidden history. |
|
* |
|
* Common commands: |
|
* discordAIDump.run() // auto-capture older + newer rendered messages |
|
* discordAIDump.run({ download: true }) // auto-capture, then download useful files |
|
* discordAIDump.stopCapture() // stop any auto capture/scroll |
|
* discordAIDump.downloadJson() // full archival JSON, including chunks + audit |
|
* discordAIDump.downloadMessagesJsonl() // one message per line for embeddings/RAG |
|
* discordAIDump.downloadChunksJsonl() // transcript chunks for RAG/context windows |
|
* discordAIDump.downloadMarkdown() // readable transcript |
|
* discordAIDump.configure({ redact: true }) // strip obvious PII/secrets at export time |
|
* discordAIDump.audit() // inspect quality/completeness warnings |
|
* discordAIDump.stats() // current collection stats |
|
* discordAIDump.downloadAll() // JSON + JSONL + chunks + MD + audit |
|
* discordAIDump.selfTest() // quick in-page diagnostics |
|
* |
|
* Optional pre-paste config: |
|
* window.discordAIDumpConfig = { autoCaptureOnStart: false }; // disable automatic scroll/capture if desired |
|
* |
|
* Notes: |
|
* - Discord virtualizes message history. Unvisited messages are not in the DOM. |
|
* - The progress bar is viewport position, not percent of total channel history. |
|
* - Export only messages you are allowed to access and handle exported data carefully. |
|
* - This is for personal/authorized archival, not platform-scale scraping or model training. |
|
* - Redaction is best-effort. Review exported files before sharing. |
|
*/ |
|
(() => { |
|
"use strict"; |
|
|
|
const GLOBAL_NAME = "discordAIDump"; |
|
const APP_ID = "__discord_ai_dump_panel"; |
|
const STYLE_ID = "__discord_ai_dump_style"; |
|
const DISCORD_EPOCH_MS = 1420070400000n; |
|
const MAX_ERROR_LOG = 50; |
|
|
|
/** @type {Required<DiscordAIDumpConfig>} */ |
|
const DEFAULT_CONFIG = Object.freeze({ |
|
autoStart: true, |
|
autoCaptureOnStart: true, |
|
autoCaptureDownloadOnDone: false, |
|
autoCaptureDirections: ["up", "down"], |
|
autoCaptureDownloadFormats: ["json", "messagesJsonl", "chunksJsonl", "markdown", "audit"], |
|
resetOnRouteChange: true, |
|
collectOnMutation: true, |
|
collectOnScroll: true, |
|
collectIntervalMs: 1500, |
|
collectDebounceMs: 250, |
|
chunkTargetChars: 12000, |
|
chunkOverlapMessages: 5, |
|
autoScrollStepPx: 1100, |
|
autoScrollDelayMs: 700, |
|
autoScrollIdleRounds: 14, |
|
maxAutoScrollRounds: 25000, |
|
includeRawDomText: false, |
|
includeEmbeds: true, |
|
includeReactions: true, |
|
includeMedia: true, |
|
includeMentions: true, |
|
includeReplyPreviews: true, |
|
includeSystemUiTextFallback: false, |
|
redact: false, |
|
redactAuthorNames: false, |
|
redactIds: false, |
|
redactUrls: false, |
|
stripUrlQueryParamsWhenRedacting: true, |
|
dropMediaUrlsWhenRedacting: false, |
|
includeInternalUserLinksInAIText: false, |
|
includeEnvironmentMetadata: false, |
|
maxAuditIssues: 200, |
|
maxMessages: 0, |
|
includeErrorStacks: false, |
|
debug: false, |
|
}); |
|
|
|
/** |
|
* @typedef {Object} DiscordAIDumpConfig |
|
* @property {boolean=} autoStart |
|
* @property {boolean=} autoCaptureOnStart |
|
* @property {boolean=} autoCaptureDownloadOnDone |
|
* @property {string[]=} autoCaptureDirections |
|
* @property {string[]=} autoCaptureDownloadFormats |
|
* @property {boolean=} resetOnRouteChange |
|
* @property {boolean=} collectOnMutation |
|
* @property {boolean=} collectOnScroll |
|
* @property {number=} collectIntervalMs |
|
* @property {number=} collectDebounceMs |
|
* @property {number=} chunkTargetChars |
|
* @property {number=} chunkOverlapMessages |
|
* @property {number=} autoScrollStepPx |
|
* @property {number=} autoScrollDelayMs |
|
* @property {number=} autoScrollIdleRounds |
|
* @property {number=} maxAutoScrollRounds |
|
* @property {boolean=} includeRawDomText |
|
* @property {boolean=} includeEmbeds |
|
* @property {boolean=} includeReactions |
|
* @property {boolean=} includeMedia |
|
* @property {boolean=} includeMentions |
|
* @property {boolean=} includeReplyPreviews |
|
* @property {boolean=} includeSystemUiTextFallback |
|
* @property {boolean=} redact |
|
* @property {boolean=} redactAuthorNames |
|
* @property {boolean=} redactIds |
|
* @property {boolean=} redactUrls |
|
* @property {boolean=} stripUrlQueryParamsWhenRedacting |
|
* @property {boolean=} dropMediaUrlsWhenRedacting |
|
* @property {boolean=} includeInternalUserLinksInAIText |
|
* @property {boolean=} includeEnvironmentMetadata |
|
* @property {number=} maxAuditIssues |
|
* @property {number=} maxMessages |
|
* @property {boolean=} includeErrorStacks |
|
* @property {boolean=} debug |
|
*/ |
|
|
|
const nowIso = () => new Date().toISOString(); |
|
|
|
const LEGACY_UI_IDS = [ |
|
"__discord_ai_dump_ui", |
|
"__discord_ai_dump_style", |
|
APP_ID, |
|
STYLE_ID, |
|
]; |
|
|
|
const previous = window[GLOBAL_NAME]; |
|
if (previous?.stop) { |
|
try { |
|
previous.stop({ removeUi: true }); |
|
} catch (error) { |
|
console.warn(`[${GLOBAL_NAME}] Failed to stop previous instance before starting:`, error); |
|
} |
|
} |
|
for (const id of LEGACY_UI_IDS) document.getElementById(id)?.remove(); |
|
|
|
const BOOTSTRAP_CONFIG = |
|
window.discordAIDumpConfig && typeof window.discordAIDumpConfig === "object" ? window.discordAIDumpConfig : {}; |
|
|
|
const state = { |
|
running: false, |
|
startedAt: nowIso(), |
|
updatedAt: nowIso(), |
|
routeKey: null, |
|
routeHistory: [], |
|
config: { ...DEFAULT_CONFIG, ...BOOTSTRAP_CONFIG }, |
|
messagesById: new Map(), |
|
aliasById: new Map(), |
|
lastCollectStats: null, |
|
lastError: null, |
|
errorLog: [], |
|
collectScheduled: false, |
|
intervalHandle: null, |
|
observer: null, |
|
scrollElement: null, |
|
scrollHandler: null, |
|
scrollerCache: null, |
|
drag: null, |
|
auto: { |
|
active: false, |
|
direction: null, |
|
rounds: 0, |
|
idleRounds: 0, |
|
startedAt: null, |
|
stoppedAt: null, |
|
reason: null, |
|
}, |
|
capture: { |
|
active: false, |
|
step: null, |
|
startedAt: null, |
|
stoppedAt: null, |
|
reason: null, |
|
downloadOnDone: false, |
|
}, |
|
}; |
|
|
|
const debug = (...args) => { |
|
if (state.config.debug) console.debug("[discordAIDump]", ...args); |
|
}; |
|
|
|
const recordError = (scope, error) => { |
|
const item = { |
|
at: nowIso(), |
|
scope, |
|
message: error?.message || String(error), |
|
stack: state.config.includeErrorStacks ? error?.stack || null : null, |
|
}; |
|
|
|
state.lastError = item; |
|
state.errorLog.push(item); |
|
if (state.errorLog.length > MAX_ERROR_LOG) state.errorLog.shift(); |
|
if (state.config.debug) console.warn("[discordAIDump]", scope, error); |
|
updateUiSafe(); |
|
return item; |
|
}; |
|
|
|
const safe = (scope, fallback, fn) => { |
|
try { |
|
return fn(); |
|
} catch (error) { |
|
recordError(scope, error); |
|
return fallback; |
|
} |
|
}; |
|
|
|
const sleep = (ms) => new Promise((resolve) => window.setTimeout(resolve, ms)); |
|
|
|
const cssEscape = (value) => { |
|
if (window.CSS?.escape) return window.CSS.escape(String(value)); |
|
return String(value).replace(/[^a-zA-Z0-9_-]/g, "\\$&"); |
|
}; |
|
|
|
const qsa = (selector, root = document) => Array.from(root.querySelectorAll(selector)); |
|
|
|
const cleanText = (value) => |
|
String(value ?? "") |
|
.replace(/\u00a0/g, " ") |
|
.replace(/\u200b/g, "") |
|
.replace(/[\u202a-\u202e]/g, "") |
|
.replace(/\r/g, "") |
|
.replace(/[ \t]+\n/g, "\n") |
|
.replace(/\n[ \t]+/g, "\n") |
|
.replace(/[ \t]{2,}/g, " ") |
|
.replace(/\n{4,}/g, "\n\n\n") |
|
.trim(); |
|
|
|
const normalizeForAI = (value) => |
|
cleanText(value) |
|
.replace(/[ \t]+/g, " ") |
|
.replace(/\n{3,}/g, "\n\n") |
|
.trim(); |
|
|
|
const estimateTokens = (text) => Math.max(1, Math.ceil(String(text || "").length / 4)); |
|
const toArray = (value) => (Array.isArray(value) ? value : []); |
|
|
|
const uniqueBy = (items, keyFn) => { |
|
const out = new Map(); |
|
for (const item of items) { |
|
if (!item) continue; |
|
const key = keyFn(item); |
|
if (key && !out.has(key)) out.set(key, item); |
|
} |
|
return Array.from(out.values()); |
|
}; |
|
|
|
const compactObject = (obj) => { |
|
if (obj === undefined) return undefined; |
|
if (obj === null || typeof obj !== "object") return obj; |
|
if (Array.isArray(obj)) return obj.map(compactObject).filter((x) => x !== undefined); |
|
|
|
const out = {}; |
|
for (const [key, value] of Object.entries(obj)) { |
|
if (value === undefined) continue; |
|
out[key] = compactObject(value); |
|
} |
|
return out; |
|
}; |
|
|
|
const deepClone = (value) => { |
|
if (typeof structuredClone === "function") return structuredClone(value); |
|
return JSON.parse(JSON.stringify(value)); |
|
}; |
|
|
|
const textOf = (node) => cleanText(node?.innerText || node?.textContent || ""); |
|
|
|
const allSnowflakes = (value) => String(value || "").match(/\b\d{17,22}\b/g) || []; |
|
const lastSnowflake = (value) => allSnowflakes(value).at(-1) || null; |
|
|
|
const aliasHash = (value) => { |
|
// FNV-1a-ish 32-bit alias, deterministic and short. Not cryptographic. |
|
let hash = 0x811c9dc5; |
|
for (const char of String(value)) { |
|
hash ^= char.charCodeAt(0); |
|
hash = Math.imul(hash, 0x01000193) >>> 0; |
|
} |
|
return hash.toString(36).padStart(7, "0"); |
|
}; |
|
|
|
const aliasId = (id, prefix = "id") => { |
|
if (!id) return id; |
|
const value = String(id); |
|
if (!state.aliasById.has(value)) state.aliasById.set(value, `${prefix}_${aliasHash(value)}`); |
|
return state.aliasById.get(value); |
|
}; |
|
|
|
const redactIdsInString = (value) => |
|
typeof value === "string" ? value.replace(/\b\d{17,22}\b/g, (match) => aliasId(match)) : value; |
|
|
|
const redactString = (value) => |
|
String(value ?? "") |
|
.replace(/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi, "[REDACTED_EMAIL]") |
|
.replace(/\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g, "[REDACTED_PHONE]") |
|
.replace(/\b(?:mfa\.)?[A-Za-z0-9_-]{24}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27,}\b/g, "[REDACTED_TOKEN_LIKE]") |
|
.replace(/\b(?:sk|pk|rk|xox[baprs]|gh[pousr]|glpat)-[A-Za-z0-9_\-]{12,}\b/g, "[REDACTED_SECRET_LIKE]") |
|
.replace(/\b[A-Za-z0-9_\-]{72,}\b/g, "[REDACTED_LONG_SECRET_LIKE]"); |
|
|
|
const stripQuery = (url) => { |
|
try { |
|
const parsed = new URL(url, location.href); |
|
parsed.search = ""; |
|
parsed.hash = ""; |
|
return parsed.toString(); |
|
} catch { |
|
return url; |
|
} |
|
}; |
|
|
|
const redactUrlForExport = (url, { media = false } = {}) => { |
|
if (!state.config.redact) return url; |
|
if (state.config.dropMediaUrlsWhenRedacting && media) return "[REDACTED_MEDIA_URL]"; |
|
if (state.config.redactUrls) return "[REDACTED_URL]"; |
|
|
|
let out = state.config.stripUrlQueryParamsWhenRedacting ? stripQuery(url) : url; |
|
if (state.config.redactIds) out = redactIdsInString(out); |
|
return out; |
|
}; |
|
|
|
const redactTextForExport = (value) => { |
|
let out = String(value ?? ""); |
|
if (state.config.stripUrlQueryParamsWhenRedacting) { |
|
out = out.replace(/https?:\/\/[^\s<>\"']+/gi, (match) => stripQuery(match)); |
|
} |
|
out = redactString(out); |
|
if (state.config.redactIds) out = redactIdsInString(out); |
|
return out; |
|
}; |
|
|
|
const getRouteInfo = () => { |
|
const parts = location.pathname.split("/").filter(Boolean); |
|
const isChannelsRoute = parts[0] === "channels"; |
|
const guildOrDmId = isChannelsRoute ? parts[1] ?? null : null; |
|
const parentChannelId = isChannelsRoute ? parts[2] ?? null : null; |
|
const threadId = isChannelsRoute ? parts[3] ?? null : null; |
|
const isDm = guildOrDmId === "@me"; |
|
const channelId = threadId || parentChannelId || null; |
|
|
|
const heading = |
|
document.querySelector("h1") || |
|
document.querySelector('[role="heading"][aria-level="1"]') || |
|
document.querySelector('[aria-label][role="heading"]'); |
|
|
|
const title = cleanText( |
|
heading?.innerText || heading?.getAttribute?.("aria-label") || document.title.replace(/\s*\|\s*Discord\s*$/i, "") |
|
); |
|
|
|
return { |
|
page_url: location.href, |
|
route_key: [guildOrDmId, parentChannelId, threadId].filter(Boolean).join(":") || location.pathname, |
|
guild_id: isDm ? null : guildOrDmId, |
|
dm_id: isDm ? parentChannelId : null, |
|
parent_channel_id: threadId ? parentChannelId : null, |
|
channel_id: channelId, |
|
thread_id: threadId || null, |
|
title, |
|
}; |
|
}; |
|
|
|
const redactRouteForExport = (route) => { |
|
if (!state.config.redact) return route; |
|
|
|
const clone = deepClone(route || {}); |
|
clone.page_url = clone.page_url ? redactUrlForExport(clone.page_url) : clone.page_url; |
|
|
|
if (state.config.redactIds) { |
|
for (const key of ["route_key", "guild_id", "dm_id", "parent_channel_id", "channel_id", "thread_id"]) { |
|
if (clone[key]) clone[key] = redactIdsInString(clone[key]); |
|
} |
|
} |
|
|
|
return clone; |
|
}; |
|
|
|
const snowflakeTimestamp = (id) => { |
|
try { |
|
const ms = Number((BigInt(String(id)) >> 22n) + DISCORD_EPOCH_MS); |
|
const date = new Date(ms); |
|
return Number.isNaN(date.valueOf()) ? null : date.toISOString(); |
|
} catch { |
|
return null; |
|
} |
|
}; |
|
|
|
const messageIdFromHref = (href) => String(href || "").match(/\/channels\/[^/]+\/\d{17,22}\/(\d{17,22})/)?.[1] || null; |
|
|
|
const messageIdFromNode = (node) => { |
|
if (!node) return null; |
|
|
|
const selfContentId = node.matches?.('[id^="message-content-"]') ? node.id : null; |
|
const nestedContentId = node.querySelector?.('[id^="message-content-"]')?.id || null; |
|
const contentId = [selfContentId, nestedContentId] |
|
.filter(Boolean) |
|
.map((id) => id.match(/^message-content-(\d{17,22})$/)?.[1]) |
|
.find(Boolean); |
|
if (contentId) return contentId; |
|
|
|
const direct = [node.id, node.getAttribute?.("data-list-item-id"), node.getAttribute?.("aria-describedby")] |
|
.map(lastSnowflake) |
|
.find(Boolean); |
|
if (direct) return direct; |
|
|
|
return qsa('a[href*="/channels/"]', node).map((anchor) => messageIdFromHref(anchor.href)).find(Boolean) || null; |
|
}; |
|
|
|
const isUsableScroller = (el) => { |
|
if (!el || !document.contains(el)) return false; |
|
if (el === document.documentElement || el === document.body || el === document.scrollingElement) return true; |
|
return el.scrollHeight > el.clientHeight + 20; |
|
}; |
|
|
|
const findScroller = ({ force = false } = {}) => { |
|
const routeKey = getRouteInfo().route_key; |
|
if (!force && state.scrollerCache?.routeKey === routeKey && isUsableScroller(state.scrollerCache.el)) { |
|
return state.scrollerCache.el; |
|
} |
|
|
|
const preferred = [ |
|
'[data-list-id="chat-messages"]', |
|
'[aria-label*="Messages"]', |
|
'[id^="chat-messages-"]', |
|
'[id^="message-content-"]', |
|
] |
|
.map((selector) => document.querySelector(selector)) |
|
.find(Boolean); |
|
|
|
if (preferred) { |
|
let el = preferred.parentElement; |
|
while (el && el !== document.body) { |
|
const style = getComputedStyle(el); |
|
const scrollable = /(auto|scroll)/.test(style.overflowY) && el.scrollHeight > el.clientHeight + 100; |
|
if (scrollable) { |
|
state.scrollerCache = { el, routeKey, foundAt: nowIso(), method: "ancestor" }; |
|
return el; |
|
} |
|
el = el.parentElement; |
|
} |
|
} |
|
|
|
const candidates = qsa("main, section, div") |
|
.filter((el) => { |
|
const style = getComputedStyle(el); |
|
return /(auto|scroll)/.test(style.overflowY) && el.scrollHeight > el.clientHeight + 250; |
|
}) |
|
.map((el) => ({ |
|
el, |
|
score: el.clientHeight + Math.min(el.scrollHeight - el.clientHeight, 5000), |
|
})) |
|
.sort((a, b) => b.score - a.score); |
|
|
|
const fallback = candidates[0]?.el || document.scrollingElement || document.documentElement; |
|
state.scrollerCache = { el: fallback, routeKey, foundAt: nowIso(), method: candidates[0] ? "candidate_scan" : "document" }; |
|
return fallback; |
|
}; |
|
|
|
const scoreMessageNode = (node, id) => { |
|
if (!node) return -Infinity; |
|
|
|
const idText = `${node.id || ""} ${node.getAttribute?.("data-list-item-id") || ""}`; |
|
const exactContent = node.querySelector?.(`#${cssEscape(`message-content-${id}`)}`); |
|
const contentCount = qsa('[id^="message-content-"]', node).length; |
|
const textLength = textOf(node).length; |
|
|
|
let score = 0; |
|
if (allSnowflakes(idText).at(-1) === id) score += 120; |
|
else if (idText.includes(id)) score += 80; |
|
if (exactContent) score += 40; |
|
if (node.matches?.('li, [role="listitem"]')) score += 15; |
|
if (contentCount === 1) score += 20; |
|
if (contentCount > 1) score -= 20 * (contentCount - 1); |
|
score += Math.min(textLength, 250) / 50; |
|
return score; |
|
}; |
|
|
|
const findMessageNodes = () => { |
|
const candidates = []; |
|
|
|
qsa('[id^="chat-messages-"], [data-list-item-id*="chat-messages"]').forEach((node) => { |
|
if (messageIdFromNode(node)) candidates.push(node); |
|
}); |
|
|
|
qsa('[id^="message-content-"]').forEach((contentEl) => { |
|
const direct = contentEl.closest('[id^="chat-messages-"], [data-list-item-id*="chat-messages"]'); |
|
const fallback = contentEl.closest('li, [role="listitem"], article, div'); |
|
if (direct || fallback) candidates.push(direct || fallback); |
|
}); |
|
|
|
const bestById = new Map(); |
|
for (const candidate of candidates) { |
|
const id = messageIdFromNode(candidate); |
|
if (!id) continue; |
|
|
|
const existing = bestById.get(id); |
|
if (!existing || scoreMessageNode(candidate, id) > scoreMessageNode(existing, id)) bestById.set(id, candidate); |
|
} |
|
|
|
return Array.from(bestById.values()).sort((a, b) => { |
|
const ay = a.getBoundingClientRect().top; |
|
const by = b.getBoundingClientRect().top; |
|
return ay - by || String(messageIdFromNode(a)).localeCompare(String(messageIdFromNode(b))); |
|
}); |
|
}; |
|
|
|
const extractUserIdFromHref = (href) => String(href || "").match(/\/users\/(\d{17,22})/)?.[1] ?? null; |
|
|
|
const extractAuthor = (node, id) => { |
|
const exact = node.querySelector?.(`#${cssEscape(`message-username-${id}`)}`); |
|
const candidate = |
|
exact || |
|
node.querySelector?.('[id^="message-username-"]') || |
|
node.querySelector?.('h3 [class*="username" i]') || |
|
node.querySelector?.('[class*="username" i]') || |
|
node.querySelector?.('[data-slate-node="element"] a[href*="/users/"]'); |
|
|
|
const name = textOf(candidate); |
|
const userHref = qsa('a[href*="/users/"]', node).map((anchor) => anchor.href).find(Boolean) || null; |
|
const userId = extractUserIdFromHref(userHref); |
|
|
|
if (!name && !userId) return null; |
|
return { |
|
id: userId, |
|
name: name || null, |
|
inferred: false, |
|
source: exact ? "exact_username_id" : candidate ? "dom_username" : "user_link", |
|
}; |
|
}; |
|
|
|
const makeInferredAuthor = (author, source = "visual_group_inference") => { |
|
if (!author) return null; |
|
return { ...author, inferred: true, source }; |
|
}; |
|
|
|
const getMessageContentElement = (node, id) => { |
|
const exact = node.querySelector?.(`#${cssEscape(`message-content-${id}`)}`); |
|
if (exact) return exact; |
|
|
|
const contentEls = qsa('[id^="message-content-"]', node); |
|
return contentEls.find((el) => el.id.endsWith(id)) || contentEls[0] || null; |
|
}; |
|
|
|
const extractSystemUiTextFallback = (node) => { |
|
const clone = node.cloneNode(true); |
|
qsa( |
|
[ |
|
"time", |
|
"button", |
|
'[role="button"]', |
|
'[id^="message-username-"]', |
|
'[class*="timestamp" i]', |
|
'[class*="reaction" i]', |
|
'[class*="button" i]', |
|
].join(","), |
|
clone |
|
).forEach((el) => el.remove()); |
|
|
|
return cleanText(clone.innerText || clone.textContent); |
|
}; |
|
|
|
const extensionFromUrl = (url) => { |
|
try { |
|
const pathname = new URL(url, location.href).pathname; |
|
return pathname.match(/\.([a-z0-9]{1,12})$/i)?.[1]?.toLowerCase() || null; |
|
} catch { |
|
return null; |
|
} |
|
}; |
|
|
|
const filenameFromUrl = (url) => { |
|
try { |
|
const pathname = decodeURIComponent(new URL(url, location.href).pathname); |
|
return pathname.split("/").filter(Boolean).at(-1) || null; |
|
} catch { |
|
return null; |
|
} |
|
}; |
|
|
|
const classifyUrl = (url) => { |
|
const lower = String(url || "").toLowerCase(); |
|
const ext = extensionFromUrl(url); |
|
|
|
if (/discord(?:app)?\.com\/users\/\d{17,22}/.test(lower)) return "internal_user"; |
|
if (/discord(?:app)?\.com\/channels\//.test(lower)) return "internal_channel_or_message"; |
|
if (/\/(attachments|ephemeral-attachments)\//.test(lower)) return "attachment"; |
|
if (/\/(emojis|emoji)\//.test(lower)) return "emoji"; |
|
if (/\/stickers\//.test(lower)) return "sticker"; |
|
if (/\/(avatars|embed\/avatars)\//.test(lower)) return "avatar"; |
|
if (/discord\.com\/users\/\d{17,22}/.test(lower)) return "internal_user"; |
|
if (/discord\.com\/channels\/[^/]+\/\d{17,22}\/\d{17,22}/.test(lower)) return "internal_message"; |
|
if (/discord\.com\/channels\/[^/]+\/\d{17,22}/.test(lower)) return "internal_channel"; |
|
if (["png", "jpg", "jpeg", "webp", "gif", "avif", "svg"].includes(ext)) return "image"; |
|
if (["mp4", "webm", "mov", "m4v"].includes(ext)) return "video"; |
|
if (["mp3", "wav", "ogg", "m4a", "flac"].includes(ext)) return "audio"; |
|
if (ext) return "file"; |
|
return "link"; |
|
}; |
|
|
|
const extractLinks = (node) => |
|
uniqueBy( |
|
qsa("a[href]", node) |
|
.map((anchor) => { |
|
const href = anchor.href; |
|
if (!href || href.startsWith("javascript:")) return null; |
|
return { |
|
href, |
|
text: textOf(anchor) || cleanText(anchor.getAttribute("aria-label")) || null, |
|
kind: classifyUrl(href), |
|
}; |
|
}) |
|
.filter(Boolean), |
|
(x) => `${x.href}|${x.text || ""}` |
|
); |
|
|
|
const extractMedia = (node) => { |
|
if (!state.config.includeMedia) return []; |
|
|
|
return uniqueBy( |
|
qsa("img[src], video[src], audio[src], source[src]", node) |
|
.map((el) => { |
|
const url = el.currentSrc || el.src || el.getAttribute("src"); |
|
if (!url || url.startsWith("data:")) return null; |
|
|
|
const kind = classifyUrl(url); |
|
return { |
|
kind, |
|
url, |
|
alt: cleanText(el.getAttribute("alt")) || null, |
|
filename: filenameFromUrl(url), |
|
}; |
|
}) |
|
.filter(Boolean) |
|
.filter((x) => x.kind !== "avatar"), |
|
(x) => x.url |
|
); |
|
}; |
|
|
|
const extractAttachments = (links, media) => |
|
uniqueBy( |
|
[ |
|
...links |
|
.filter((x) => x.kind === "attachment" || /cdn\.discordapp\.com\/attachments|media\.discordapp\.net\/attachments/.test(x.href)) |
|
.map((x) => ({ |
|
kind: classifyUrl(x.href), |
|
url: x.href, |
|
filename: filenameFromUrl(x.href) || x.text || null, |
|
text: x.text || null, |
|
})), |
|
...media |
|
.filter((x) => ["attachment", "image", "video", "audio", "file"].includes(x.kind)) |
|
.map((x) => ({ |
|
kind: x.kind, |
|
url: x.url, |
|
filename: x.filename, |
|
text: x.alt || null, |
|
})), |
|
], |
|
(x) => x.url |
|
); |
|
|
|
const extractMentions = (node, contentText) => { |
|
if (!state.config.includeMentions) return []; |
|
|
|
const userLinks = qsa('a[href*="/users/"]', node).map((anchor) => ({ |
|
type: "user", |
|
id: extractUserIdFromHref(anchor.href), |
|
text: textOf(anchor) || null, |
|
})); |
|
|
|
const channelLinks = qsa('a[href*="/channels/"]', node).map((anchor) => { |
|
const match = anchor.href.match(/\/channels\/([^/]+)\/(\d{17,22})(?:\/(\d{17,22}))?/); |
|
return { |
|
type: "channel_or_message", |
|
guild_id: match?.[1] || null, |
|
channel_id: match?.[2] || null, |
|
message_id: match?.[3] || null, |
|
text: textOf(anchor) || null, |
|
href: anchor.href, |
|
}; |
|
}); |
|
|
|
const textual = Array.from(new Set((contentText.match(/(^|\s)([@#][^\s@#:,;]{2,80})/g) || []).map((x) => x.trim()))).map((text) => ({ |
|
type: text.startsWith("#") ? "channel_text" : "user_or_role_text", |
|
text, |
|
})); |
|
|
|
return uniqueBy([...userLinks, ...channelLinks, ...textual], (x) => JSON.stringify(x)); |
|
}; |
|
|
|
const extractReplyPreview = (node) => { |
|
if (!state.config.includeReplyPreviews) return null; |
|
|
|
const reply = |
|
node.querySelector('[class*="repliedMessage" i]') || |
|
node.querySelector('[class*="reply" i][role="button"]') || |
|
node.querySelector('[aria-label*="Reply" i]'); |
|
|
|
if (!reply) return null; |
|
|
|
const href = reply.querySelector?.('a[href*="/channels/"]')?.href || null; |
|
const messageId = href ? messageIdFromHref(href) : null; |
|
const userName = textOf(reply.querySelector?.('[id^="message-username-"], [class*="username" i]')) || null; |
|
|
|
return compactObject({ |
|
message_id: messageId, |
|
user_name: userName, |
|
preview_text: cleanText(reply.innerText || reply.textContent) || null, |
|
href, |
|
}); |
|
}; |
|
|
|
const extractEmbeds = (node) => { |
|
if (!state.config.includeEmbeds) return []; |
|
|
|
const candidates = qsa( |
|
[ |
|
'article[class*="embed" i]', |
|
'[class*="embedWrapper" i]', |
|
'[class*="embedFull" i]', |
|
'[class*="embedGrid" i]', |
|
'[class*="gridContainer" i]', |
|
].join(","), |
|
node |
|
); |
|
|
|
return uniqueBy( |
|
candidates |
|
.map((el, index) => { |
|
const links = extractLinks(el); |
|
const media = extractMedia(el); |
|
const text = cleanText(el.innerText || el.textContent); |
|
if (!text && !links.length && !media.length) return null; |
|
|
|
const titleEl = |
|
el.querySelector('[class*="embedTitle" i]') || el.querySelector("strong") || el.querySelector("a[href]"); |
|
const descriptionEl = el.querySelector('[class*="embedDescription" i]'); |
|
|
|
return compactObject({ |
|
index, |
|
title: textOf(titleEl) || null, |
|
description: textOf(descriptionEl) || null, |
|
text, |
|
links, |
|
media, |
|
}); |
|
}) |
|
.filter(Boolean), |
|
(x) => `${x.title || ""}|${x.text || ""}|${toArray(x.links).map((link) => link.href).join(",")}` |
|
); |
|
}; |
|
|
|
const extractReactions = (node) => { |
|
if (!state.config.includeReactions) return []; |
|
|
|
const candidates = qsa('button[aria-label], [role="button"][aria-label]', node).filter((el) => { |
|
const label = el.getAttribute("aria-label") || ""; |
|
const text = textOf(el); |
|
return /reaction|reacted|emoji/i.test(label) && !/reply|more|edit|delete|copy|thread/i.test(label + text); |
|
}); |
|
|
|
return uniqueBy( |
|
candidates |
|
.map((el) => ({ |
|
label: cleanText(el.getAttribute("aria-label")), |
|
text: textOf(el) || null, |
|
})) |
|
.filter((x) => x.label || x.text), |
|
(x) => `${x.label || ""}|${x.text || ""}` |
|
); |
|
}; |
|
|
|
const isEdited = (node) => |
|
qsa("span, time", node).some((el) => { |
|
const label = cleanText(el.getAttribute("aria-label") || ""); |
|
const text = textOf(el); |
|
return text === "(edited)" || /\bedited\b/i.test(label); |
|
}); |
|
|
|
const buildPermalink = (messageId, route = getRouteInfo()) => { |
|
if (!route.channel_id) return null; |
|
const guildPart = route.guild_id || "@me"; |
|
return `https://discord.com/channels/${guildPart}/${route.channel_id}/${messageId}`; |
|
}; |
|
|
|
const messageToAIText = (message) => { |
|
const timestamp = message.timestamp || message.timestamp_from_snowflake || "unknown_time"; |
|
const author = message.author?.name || message.author?.id || "Unknown"; |
|
const header = `[${timestamp}] ${author} (${message.id})`; |
|
const body = message.content?.normalized || message.content?.text || "[no text content]"; |
|
|
|
const extras = []; |
|
if (message.reply_to?.message_id || message.reply_to?.preview_text) { |
|
extras.push( |
|
`Reply preview: ${[message.reply_to.user_name, message.reply_to.preview_text, message.reply_to.message_id] |
|
.filter(Boolean) |
|
.join(" | ")}` |
|
); |
|
} |
|
if (message.attachments?.length) extras.push(`Attachments: ${message.attachments.map((x) => x.url).join(" ")}`); |
|
|
|
const nonAttachmentLinks = toArray(message.links).filter((link) => { |
|
if (toArray(message.attachments).some((attachment) => attachment.url === link.href)) return false; |
|
if (!state.config.includeInternalUserLinksInAIText && link.kind === "internal_user") return false; |
|
return !["avatar", "emoji", "sticker"].includes(link.kind); |
|
}); |
|
if (nonAttachmentLinks.length) extras.push(`Links: ${nonAttachmentLinks.map((x) => x.href).join(" ")}`); |
|
|
|
if (message.embeds?.length) { |
|
const embedText = message.embeds |
|
.map((embed) => normalizeForAI([embed.title, embed.description, embed.text].filter(Boolean).join(" | "))) |
|
.filter(Boolean) |
|
.join(" || "); |
|
if (embedText) extras.push(`Embeds: ${embedText}`); |
|
} |
|
|
|
return [header, body, ...extras].filter(Boolean).join("\n"); |
|
}; |
|
|
|
const parseMessageNode = (node, inheritedAuthor = null, route = getRouteInfo()) => |
|
safe("parseMessageNode", null, () => { |
|
const warnings = []; |
|
const id = messageIdFromNode(node); |
|
if (!id) return null; |
|
|
|
const contentEl = getMessageContentElement(node, id); |
|
const directContentText = textOf(contentEl); |
|
const fallbackText = state.config.includeSystemUiTextFallback ? extractSystemUiTextFallback(node) : ""; |
|
const contentText = directContentText || fallbackText || ""; |
|
const normalized = normalizeForAI(contentText); |
|
const rawDomText = state.config.includeRawDomText ? extractSystemUiTextFallback(node) : undefined; |
|
|
|
const explicitAuthor = extractAuthor(node, id); |
|
const author = explicitAuthor || makeInferredAuthor(inheritedAuthor); |
|
if (!author) warnings.push("author_not_found"); |
|
if (!contentEl && fallbackText) warnings.push("content_selector_missing_used_fallback_text"); |
|
if (!contentText) warnings.push("empty_text_maybe_media_only_or_selector_miss"); |
|
|
|
const timeEl = node.querySelector("time[datetime]"); |
|
const timestampFromElement = timeEl?.getAttribute("datetime") || timeEl?.dateTime || null; |
|
const timestampFromSnowflake = snowflakeTimestamp(id); |
|
if (!timestampFromElement) warnings.push("timestamp_element_not_found_used_snowflake_fallback"); |
|
|
|
const links = extractLinks(node); |
|
const media = extractMedia(node); |
|
const attachments = extractAttachments(links, media); |
|
const embeds = extractEmbeds(node); |
|
const replyTo = extractReplyPreview(node); |
|
const mentions = extractMentions(node, normalized); |
|
const reactions = extractReactions(node); |
|
|
|
const message = compactObject({ |
|
index: -1, |
|
id, |
|
type: "discord_message", |
|
url: buildPermalink(id, route), |
|
timestamp: timestampFromElement || timestampFromSnowflake, |
|
timestamp_from_snowflake: timestampFromSnowflake, |
|
author, |
|
content: { |
|
text: contentText, |
|
normalized, |
|
raw_dom_text: state.config.includeRawDomText ? rawDomText || "" : undefined, |
|
}, |
|
reply_to: replyTo, |
|
mentions, |
|
links, |
|
attachments, |
|
media, |
|
embeds, |
|
reactions, |
|
flags: { |
|
edited: isEdited(node), |
|
author_inferred: Boolean(author?.inferred), |
|
partial: Boolean(warnings.length), |
|
}, |
|
ai: { |
|
text: "", |
|
token_estimate: 0, |
|
}, |
|
source: { |
|
route_key: route.route_key, |
|
guild_id: route.guild_id, |
|
dm_id: route.dm_id, |
|
channel_id: route.channel_id, |
|
parent_channel_id: route.parent_channel_id, |
|
thread_id: route.thread_id, |
|
}, |
|
collection: { |
|
first_seen_at: nowIso(), |
|
last_seen_at: nowIso(), |
|
extraction_warnings: warnings, |
|
}, |
|
}); |
|
|
|
message.ai.text = messageToAIText(message); |
|
message.ai.token_estimate = estimateTokens(message.ai.text); |
|
return message; |
|
}); |
|
|
|
const preferLonger = (a, b) => { |
|
if (!a) return b || ""; |
|
if (!b) return a || ""; |
|
return b.length > a.length ? b : a; |
|
}; |
|
|
|
const mergeAuthor = (oldAuthor, newAuthor) => { |
|
if (!oldAuthor) return newAuthor || null; |
|
if (!newAuthor) return oldAuthor; |
|
if (oldAuthor.inferred && !newAuthor.inferred) return newAuthor; |
|
if (!oldAuthor.name && newAuthor.name) return newAuthor; |
|
if (!oldAuthor.id && newAuthor.id) return { ...oldAuthor, id: newAuthor.id }; |
|
return oldAuthor; |
|
}; |
|
|
|
const mergeWarnings = (oldWarnings = [], newWarnings = [], mergedAuthor = null) => { |
|
const warnings = Array.from(new Set([...toArray(oldWarnings), ...toArray(newWarnings)])); |
|
if (mergedAuthor?.name || mergedAuthor?.id) return warnings.filter((x) => x !== "author_not_found"); |
|
return warnings; |
|
}; |
|
|
|
const mergeMessage = (oldMessage, newMessage) => { |
|
if (!oldMessage) return newMessage; |
|
|
|
const author = mergeAuthor(oldMessage.author, newMessage.author); |
|
const merged = { |
|
...oldMessage, |
|
...newMessage, |
|
timestamp: oldMessage.timestamp || newMessage.timestamp, |
|
timestamp_from_snowflake: oldMessage.timestamp_from_snowflake || newMessage.timestamp_from_snowflake, |
|
author, |
|
content: { |
|
...oldMessage.content, |
|
...newMessage.content, |
|
text: preferLonger(oldMessage.content?.text, newMessage.content?.text), |
|
normalized: preferLonger(oldMessage.content?.normalized, newMessage.content?.normalized), |
|
raw_dom_text: preferLonger(oldMessage.content?.raw_dom_text, newMessage.content?.raw_dom_text), |
|
}, |
|
reply_to: oldMessage.reply_to || newMessage.reply_to, |
|
mentions: uniqueBy([...toArray(oldMessage.mentions), ...toArray(newMessage.mentions)], (x) => JSON.stringify(x)), |
|
links: uniqueBy([...toArray(oldMessage.links), ...toArray(newMessage.links)], (x) => `${x.href}|${x.text || ""}`), |
|
attachments: uniqueBy([...toArray(oldMessage.attachments), ...toArray(newMessage.attachments)], (x) => x.url), |
|
media: uniqueBy([...toArray(oldMessage.media), ...toArray(newMessage.media)], (x) => x.url), |
|
embeds: uniqueBy([...toArray(oldMessage.embeds), ...toArray(newMessage.embeds)], (x) => `${x.title || ""}|${x.text || ""}`), |
|
reactions: uniqueBy([...toArray(oldMessage.reactions), ...toArray(newMessage.reactions)], (x) => `${x.label || ""}|${x.text || ""}`), |
|
flags: { |
|
edited: Boolean(oldMessage.flags?.edited || newMessage.flags?.edited), |
|
author_inferred: Boolean(author?.inferred), |
|
partial: false, |
|
}, |
|
source: newMessage.source || oldMessage.source || null, |
|
collection: { |
|
first_seen_at: oldMessage.collection?.first_seen_at || newMessage.collection?.first_seen_at || nowIso(), |
|
last_seen_at: nowIso(), |
|
extraction_warnings: mergeWarnings(oldMessage.collection?.extraction_warnings, newMessage.collection?.extraction_warnings, author), |
|
}, |
|
}; |
|
|
|
merged.flags.partial = Boolean(merged.collection.extraction_warnings.length); |
|
merged.ai = { |
|
text: messageToAIText(merged), |
|
token_estimate: 0, |
|
}; |
|
merged.ai.token_estimate = estimateTokens(merged.ai.text); |
|
return compactObject(merged); |
|
}; |
|
|
|
const sortedMessages = () => |
|
Array.from(state.messagesById.values()).sort((a, b) => { |
|
const ta = a.timestamp || a.timestamp_from_snowflake || ""; |
|
const tb = b.timestamp || b.timestamp_from_snowflake || ""; |
|
return ta.localeCompare(tb) || a.id.localeCompare(b.id); |
|
}); |
|
|
|
const enforceMaxMessages = () => { |
|
const maxMessages = Number(state.config.maxMessages || 0); |
|
if (!Number.isFinite(maxMessages) || maxMessages <= 0 || state.messagesById.size <= maxMessages) return 0; |
|
|
|
const overflow = state.messagesById.size - maxMessages; |
|
for (const message of sortedMessages().slice(0, overflow)) state.messagesById.delete(message.id); |
|
return overflow; |
|
}; |
|
|
|
const unbindObservers = () => { |
|
if (state.observer) state.observer.disconnect(); |
|
if (state.scrollElement && state.scrollHandler) state.scrollElement.removeEventListener("scroll", state.scrollHandler); |
|
state.observer = null; |
|
state.scrollHandler = null; |
|
state.scrollElement = null; |
|
}; |
|
|
|
const isInternalNode = (node) => { |
|
if (!node || node.nodeType !== Node.ELEMENT_NODE) return false; |
|
return Boolean(node.id === APP_ID || node.id === STYLE_ID || node.closest?.(`#${cssEscape(APP_ID)}`)); |
|
}; |
|
|
|
const isDocumentShellNode = (node) => node === document || node === document.documentElement || node === document.body; |
|
|
|
const shouldIgnoreMutation = (mutation) => { |
|
const changedNodes = [...Array.from(mutation.addedNodes), ...Array.from(mutation.removedNodes)]; |
|
if (isInternalNode(mutation.target)) return true; |
|
return changedNodes.length > 0 && isDocumentShellNode(mutation.target) && changedNodes.every(isInternalNode); |
|
}; |
|
|
|
const handleMutations = (mutations) => { |
|
if (mutations.every(shouldIgnoreMutation)) return; |
|
scheduleCollect(); |
|
}; |
|
|
|
const bindObservers = ({ force = false } = {}) => { |
|
const scroller = findScroller({ force }); |
|
if (!force && state.scrollElement === scroller && (state.observer || !state.config.collectOnMutation)) return; |
|
|
|
unbindObservers(); |
|
state.scrollElement = scroller; |
|
|
|
if (state.config.collectOnMutation && typeof MutationObserver !== "undefined") { |
|
state.observer = new MutationObserver(handleMutations); |
|
state.observer.observe(scroller || document.body, { childList: true, subtree: true, characterData: true }); |
|
} |
|
|
|
if (state.config.collectOnScroll && scroller?.addEventListener) { |
|
state.scrollHandler = scheduleCollect; |
|
scroller.addEventListener("scroll", state.scrollHandler, { passive: true }); |
|
} |
|
}; |
|
|
|
const handleRouteChange = () => { |
|
const route = getRouteInfo(); |
|
if (!state.routeKey) { |
|
state.routeKey = route.route_key; |
|
return route; |
|
} |
|
|
|
if (route.route_key !== state.routeKey) { |
|
const previousRouteKey = state.routeKey; |
|
state.routeHistory.push({ at: nowIso(), from: previousRouteKey, to: route.route_key }); |
|
state.routeKey = route.route_key; |
|
state.scrollerCache = null; |
|
|
|
if (state.config.resetOnRouteChange) { |
|
state.messagesById.clear(); |
|
state.aliasById.clear(); |
|
state.startedAt = nowIso(); |
|
state.lastCollectStats = null; |
|
} |
|
|
|
bindObservers({ force: true }); |
|
} |
|
|
|
return route; |
|
}; |
|
|
|
const collect = () => |
|
safe("collect", { total: state.messagesById.size, added: 0, updated: 0, skipped: 0, visible_nodes: 0 }, () => { |
|
const route = handleRouteChange(); |
|
const nodes = findMessageNodes(); |
|
let visibleAuthor = null; |
|
let added = 0; |
|
let updated = 0; |
|
let skipped = 0; |
|
|
|
for (const node of nodes) { |
|
const id = messageIdFromNode(node); |
|
if (!id) { |
|
skipped += 1; |
|
continue; |
|
} |
|
|
|
const explicitAuthor = extractAuthor(node, id); |
|
const authorForParse = explicitAuthor || visibleAuthor; |
|
const parsed = parseMessageNode(node, authorForParse, route); |
|
|
|
if (!parsed) { |
|
skipped += 1; |
|
continue; |
|
} |
|
|
|
if (explicitAuthor) visibleAuthor = explicitAuthor; |
|
else if (parsed.author && !parsed.author.inferred) visibleAuthor = parsed.author; |
|
|
|
const old = state.messagesById.get(parsed.id); |
|
const merged = mergeMessage(old, parsed); |
|
state.messagesById.set(parsed.id, merged); |
|
|
|
if (old) updated += 1; |
|
else added += 1; |
|
} |
|
|
|
const trimmed = enforceMaxMessages(); |
|
|
|
const stats = { |
|
at: nowIso(), |
|
route_key: route.route_key, |
|
visible_nodes: nodes.length, |
|
added, |
|
updated, |
|
skipped, |
|
trimmed, |
|
total: state.messagesById.size, |
|
}; |
|
|
|
state.lastCollectStats = stats; |
|
state.updatedAt = stats.at; |
|
updateUiSafe(); |
|
debug("collect", stats); |
|
return stats; |
|
}); |
|
|
|
const scheduleCollect = () => { |
|
if (state.collectScheduled) return; |
|
state.collectScheduled = true; |
|
|
|
window.setTimeout(() => { |
|
const run = () => { |
|
state.collectScheduled = false; |
|
collect(); |
|
}; |
|
|
|
if (window.requestIdleCallback) window.requestIdleCallback(run, { timeout: 1000 }); |
|
else window.requestAnimationFrame(run); |
|
}, state.config.collectDebounceMs); |
|
}; |
|
|
|
const getScrollInfo = () => { |
|
const scroller = findScroller(); |
|
const max = Math.max(0, scroller.scrollHeight - scroller.clientHeight); |
|
const top = Math.max(0, scroller.scrollTop); |
|
const ratio = max ? Math.min(1, Math.max(0, top / max)) : 1; |
|
|
|
return { |
|
top, |
|
max, |
|
ratio, |
|
pct: Math.round(ratio * 100), |
|
at_top: top <= 3, |
|
at_bottom: max - top <= 3, |
|
scroll_height: scroller.scrollHeight, |
|
client_height: scroller.clientHeight, |
|
scroller_method: state.scrollerCache?.method || null, |
|
}; |
|
}; |
|
|
|
const makeChunks = (messages = sortedMessages()) => { |
|
const chunks = []; |
|
let current = []; |
|
let currentChars = 0; |
|
|
|
const pushChunk = () => { |
|
if (!current.length) return; |
|
const first = current[0]; |
|
const last = current[current.length - 1]; |
|
const text = current.map((m) => m.ai?.text || messageToAIText(m)).join("\n\n"); |
|
|
|
chunks.push({ |
|
type: "discord_transcript_chunk", |
|
chunk_id: `discord_chunk_${String(chunks.length + 1).padStart(5, "0")}`, |
|
ordinal: chunks.length, |
|
message_start_id: first.id, |
|
message_end_id: last.id, |
|
timestamp_start: first.timestamp || first.timestamp_from_snowflake || null, |
|
timestamp_end: last.timestamp || last.timestamp_from_snowflake || null, |
|
message_ids: current.map((m) => m.id), |
|
message_count: current.length, |
|
char_count: text.length, |
|
token_estimate: estimateTokens(text), |
|
text, |
|
}); |
|
}; |
|
|
|
for (const message of messages) { |
|
const text = message.ai?.text || messageToAIText(message); |
|
const chars = text.length + 2; |
|
|
|
if (current.length && currentChars + chars > state.config.chunkTargetChars) { |
|
pushChunk(); |
|
current = current.slice(-state.config.chunkOverlapMessages); |
|
currentChars = current.reduce((sum, m) => sum + (m.ai?.text || messageToAIText(m)).length + 2, 0); |
|
} |
|
|
|
current.push(message); |
|
currentChars += chars; |
|
} |
|
|
|
pushChunk(); |
|
return chunks; |
|
}; |
|
|
|
const redactAuthorForExport = (author) => { |
|
if (!author) return author; |
|
const clone = { ...author }; |
|
if (state.config.redact && state.config.redactAuthorNames && clone.name) clone.name = "[REDACTED_AUTHOR]"; |
|
if (state.config.redact && state.config.redactIds && clone.id) clone.id = aliasId(clone.id, "user"); |
|
return clone; |
|
}; |
|
|
|
const redactKnownIdFields = (obj) => { |
|
if (!state.config.redact || !state.config.redactIds || !obj || typeof obj !== "object") return obj; |
|
const clone = deepClone(obj); |
|
|
|
const visit = (value, key = "") => { |
|
if (Array.isArray(value)) return value.map((item) => visit(item, key)); |
|
if (!value || typeof value !== "object") { |
|
if (typeof value === "string" && /(^|_)(id|ids)$/.test(key)) return aliasId(value); |
|
if (typeof value === "string") return redactIdsInString(value); |
|
return value; |
|
} |
|
|
|
for (const [childKey, childValue] of Object.entries(value)) value[childKey] = visit(childValue, childKey); |
|
return value; |
|
}; |
|
|
|
return visit(clone); |
|
}; |
|
|
|
const redactMessageForExport = (message) => { |
|
if (!state.config.redact) return message; |
|
|
|
const clone = deepClone(message); |
|
clone.id = state.config.redactIds ? aliasId(clone.id, "msg") : clone.id; |
|
clone.url = clone.url ? redactUrlForExport(clone.url) : clone.url; |
|
clone.author = redactAuthorForExport(clone.author); |
|
|
|
if (clone.content) { |
|
clone.content.text = redactTextForExport(clone.content.text || ""); |
|
clone.content.normalized = normalizeForAI(redactTextForExport(clone.content.normalized || clone.content.text || "")); |
|
if (clone.content.raw_dom_text) clone.content.raw_dom_text = redactTextForExport(clone.content.raw_dom_text); |
|
} |
|
|
|
if (clone.reply_to?.preview_text) clone.reply_to.preview_text = redactTextForExport(clone.reply_to.preview_text); |
|
if (state.config.redactIds && clone.reply_to?.message_id) clone.reply_to.message_id = aliasId(clone.reply_to.message_id, "msg"); |
|
if (clone.reply_to?.href) clone.reply_to.href = redactUrlForExport(clone.reply_to.href); |
|
|
|
clone.links = toArray(clone.links).map((x) => ({ |
|
...x, |
|
href: x.href ? redactUrlForExport(x.href) : x.href, |
|
text: x.text ? redactTextForExport(x.text) : x.text, |
|
})); |
|
clone.media = toArray(clone.media).map((x) => ({ |
|
...x, |
|
url: x.url ? redactUrlForExport(x.url, { media: true }) : x.url, |
|
alt: x.alt ? redactTextForExport(x.alt) : x.alt, |
|
})); |
|
clone.attachments = toArray(clone.attachments).map((x) => ({ |
|
...x, |
|
url: x.url ? redactUrlForExport(x.url, { media: true }) : x.url, |
|
text: x.text ? redactTextForExport(x.text) : x.text, |
|
})); |
|
clone.mentions = redactKnownIdFields(toArray(clone.mentions)); |
|
clone.source = redactRouteForExport(clone.source); |
|
|
|
if (clone.embeds?.length) { |
|
clone.embeds = clone.embeds.map((embed) => ({ |
|
...embed, |
|
title: embed.title ? redactTextForExport(embed.title) : embed.title, |
|
description: embed.description ? redactTextForExport(embed.description) : embed.description, |
|
text: embed.text ? redactTextForExport(embed.text) : embed.text, |
|
links: toArray(embed.links).map((x) => ({ ...x, href: x.href ? redactUrlForExport(x.href) : x.href })), |
|
media: toArray(embed.media).map((x) => ({ ...x, url: x.url ? redactUrlForExport(x.url, { media: true }) : x.url })), |
|
})); |
|
} |
|
|
|
clone.ai = { |
|
text: messageToAIText(clone), |
|
token_estimate: 0, |
|
}; |
|
clone.ai.token_estimate = estimateTokens(clone.ai.text); |
|
return compactObject(clone); |
|
}; |
|
|
|
const exportMessages = () => sortedMessages().map(redactMessageForExport).map((message, index) => ({ ...message, index })); |
|
|
|
const auditMessages = (messages = sortedMessages()) => { |
|
const issues = []; |
|
const ids = new Set(); |
|
let previousTs = null; |
|
|
|
for (const message of messages) { |
|
if (ids.has(message.id)) issues.push({ severity: "error", type: "duplicate_id", message_id: message.id }); |
|
ids.add(message.id); |
|
|
|
if (!message.timestamp && !message.timestamp_from_snowflake) issues.push({ severity: "warn", type: "missing_timestamp", message_id: message.id }); |
|
if (!message.author?.name && !message.author?.id) issues.push({ severity: "warn", type: "missing_author", message_id: message.id }); |
|
if (message.author?.inferred) issues.push({ severity: "info", type: "author_inferred", message_id: message.id }); |
|
if (!message.content?.normalized && !toArray(message.attachments).length && !toArray(message.embeds).length) { |
|
issues.push({ severity: "info", type: "empty_text_no_attachment_or_embed", message_id: message.id }); |
|
} |
|
for (const warning of toArray(message.collection?.extraction_warnings)) { |
|
issues.push({ severity: "info", type: warning, message_id: message.id }); |
|
} |
|
|
|
const ts = message.timestamp || message.timestamp_from_snowflake || null; |
|
if (previousTs && ts && previousTs > ts) issues.push({ severity: "info", type: "timestamp_order_anomaly", message_id: message.id }); |
|
if (ts) previousTs = ts; |
|
} |
|
|
|
const summary = issues.reduce( |
|
(acc, issue) => { |
|
acc[issue.severity] = (acc[issue.severity] || 0) + 1; |
|
acc.by_type[issue.type] = (acc.by_type[issue.type] || 0) + 1; |
|
return acc; |
|
}, |
|
{ error: 0, warn: 0, info: 0, by_type: {} } |
|
); |
|
|
|
return { |
|
at: nowIso(), |
|
message_count: messages.length, |
|
summary, |
|
truncated: issues.length > state.config.maxAuditIssues, |
|
issues: issues.slice(0, state.config.maxAuditIssues), |
|
}; |
|
}; |
|
|
|
const audit = () => auditMessages(sortedMessages()); |
|
|
|
const stats = () => { |
|
const messages = sortedMessages(); |
|
const scroll = getScrollInfo(); |
|
const totalAiChars = messages.reduce((sum, m) => sum + (m.ai?.text?.length || 0), 0); |
|
|
|
return { |
|
exporter: GLOBAL_NAME, |
|
running: state.running, |
|
started_at: state.startedAt, |
|
updated_at: state.updatedAt, |
|
source: getRouteInfo(), |
|
counts: { |
|
messages: messages.length, |
|
missing_authors: messages.filter((m) => !m.author?.name && !m.author?.id).length, |
|
inferred_authors: messages.filter((m) => m.author?.inferred).length, |
|
partial_messages: messages.filter((m) => m.flags?.partial).length, |
|
attachments: messages.reduce((sum, m) => sum + toArray(m.attachments).length, 0), |
|
links: messages.reduce((sum, m) => sum + toArray(m.links).length, 0), |
|
chunks_estimate: messages.length ? Math.max(1, Math.ceil(totalAiChars / state.config.chunkTargetChars)) : 0, |
|
}, |
|
time_range: { |
|
start: messages[0]?.timestamp || messages[0]?.timestamp_from_snowflake || null, |
|
end: messages.at(-1)?.timestamp || messages.at(-1)?.timestamp_from_snowflake || null, |
|
}, |
|
scroll, |
|
auto: { ...state.auto }, |
|
capture: { ...state.capture }, |
|
route_history: [...state.routeHistory], |
|
last_collect: state.lastCollectStats, |
|
last_error: state.lastError, |
|
}; |
|
}; |
|
|
|
const buildExport = () => { |
|
collect(); |
|
const messages = exportMessages(); |
|
const chunks = makeChunks(messages); |
|
const snapshot = stats(); |
|
const quality = auditMessages(messages); |
|
|
|
return compactObject({ |
|
schema: "discord_visible_dom_ai_export", |
|
exporter: { |
|
name: GLOBAL_NAME, |
|
mode: "browser_visible_dom_only", |
|
}, |
|
exported_at: nowIso(), |
|
collection_started_at: state.startedAt, |
|
collection_updated_at: state.updatedAt, |
|
source: { |
|
...redactRouteForExport(snapshot.source), |
|
page_title: document.title, |
|
user_agent: state.config.includeEnvironmentMetadata ? navigator.userAgent : undefined, |
|
language: state.config.includeEnvironmentMetadata ? navigator.language : undefined, |
|
}, |
|
counts: { |
|
...snapshot.counts, |
|
chunks: chunks.length, |
|
}, |
|
time_range: snapshot.time_range, |
|
collection_status: { |
|
scroll_position_pct: snapshot.scroll.pct, |
|
at_top: snapshot.scroll.at_top, |
|
at_bottom: snapshot.scroll.at_bottom, |
|
scroller_method: snapshot.scroll.scroller_method, |
|
auto: snapshot.auto, |
|
route_history: state.config.redact && state.config.redactIds ? redactKnownIdFields(snapshot.route_history) : snapshot.route_history, |
|
last_collect: state.config.redact && state.config.redactIds ? redactKnownIdFields(snapshot.last_collect) : snapshot.last_collect, |
|
last_error: snapshot.last_error, |
|
note: "Scroll position is viewport location, not percent of total channel history.", |
|
}, |
|
config: { ...state.config }, |
|
audit: quality, |
|
caveats: [ |
|
"This export contains only messages rendered in the browser while this script was running.", |
|
"Discord virtualizes message history, so unvisited history is not present in the DOM.", |
|
"Author names can be inferred for visually grouped continuation messages and may be missing if the group header was never rendered.", |
|
"This snippet itself does not fetch Discord APIs, extract tokens, or bypass permissions.", |
|
], |
|
messages, |
|
chunks, |
|
}); |
|
}; |
|
|
|
const buildMessagesJsonl = () => { |
|
collect(); |
|
const route = redactRouteForExport(getRouteInfo()); |
|
return ( |
|
exportMessages() |
|
.map((message) => |
|
JSON.stringify( |
|
compactObject({ |
|
schema: "discord_message_ai_jsonl", |
|
type: "discord_message", |
|
index: message.index, |
|
id: message.id, |
|
url: message.url, |
|
timestamp: message.timestamp || message.timestamp_from_snowflake, |
|
author_id: message.author?.id || null, |
|
author_name: message.author?.name || null, |
|
author_inferred: Boolean(message.author?.inferred), |
|
content: message.content?.normalized || message.content?.text || "", |
|
text: message.ai?.text || messageToAIText(message), |
|
token_estimate: message.ai?.token_estimate || estimateTokens(message.ai?.text), |
|
links: message.links || [], |
|
attachments: message.attachments || [], |
|
reply_to: message.reply_to || null, |
|
source: route, |
|
metadata: { |
|
mentions: message.mentions || [], |
|
embeds: message.embeds || [], |
|
reactions: message.reactions || [], |
|
media: message.media || [], |
|
flags: message.flags || {}, |
|
collection: message.collection || {}, |
|
}, |
|
}) |
|
) |
|
) |
|
.join("\n") + "\n" |
|
); |
|
}; |
|
|
|
const buildChunksJsonl = () => { |
|
collect(); |
|
const route = redactRouteForExport(getRouteInfo()); |
|
return ( |
|
makeChunks(exportMessages()) |
|
.map((chunk) => |
|
JSON.stringify( |
|
compactObject({ |
|
schema: "discord_chunk_ai_jsonl", |
|
type: "discord_transcript_chunk", |
|
...chunk, |
|
source: route, |
|
}) |
|
) |
|
) |
|
.join("\n") + "\n" |
|
); |
|
}; |
|
|
|
const buildMarkdown = () => { |
|
const exportObj = buildExport(); |
|
const title = exportObj.source?.title || exportObj.source?.channel_id || "Discord Channel"; |
|
const lines = [ |
|
`# Discord export: ${title}`, |
|
"", |
|
`- Exported at: ${exportObj.exported_at}`, |
|
`- Messages: ${exportObj.counts.messages}`, |
|
`- Range: ${exportObj.time_range.start || "n/a"} -> ${exportObj.time_range.end || "n/a"}`, |
|
"- Caveat: visible rendered DOM only", |
|
"", |
|
"---", |
|
"", |
|
]; |
|
|
|
for (const message of exportObj.messages) lines.push(message.ai.text, ""); |
|
return lines.join("\n"); |
|
}; |
|
|
|
const safeFilename = (value) => |
|
String(value || "discord-channel") |
|
.replace(/[^\w.-]+/g, "_") |
|
.replace(/^_+|_+$/g, "") |
|
.slice(0, 160) || "discord-channel"; |
|
|
|
const downloadText = (text, extension, mimeType) => { |
|
const route = getRouteInfo(); |
|
const stamp = nowIso().replace(/[:.]/g, "-"); |
|
const base = safeFilename(`discord-${route.channel_id || route.dm_id || "channel"}-${stamp}`); |
|
const blob = new Blob([text], { type: mimeType }); |
|
const url = URL.createObjectURL(blob); |
|
const anchor = document.createElement("a"); |
|
|
|
anchor.href = url; |
|
anchor.download = `${base}.${extension}`; |
|
document.body.appendChild(anchor); |
|
anchor.click(); |
|
anchor.remove(); |
|
window.setTimeout(() => URL.revokeObjectURL(url), 1500); |
|
}; |
|
|
|
const downloadJson = () => downloadText(JSON.stringify(buildExport(), null, 2), "json", "application/json"); |
|
const downloadMessagesJsonl = () => downloadText(buildMessagesJsonl(), "messages.jsonl", "application/x-ndjson"); |
|
const downloadChunksJsonl = () => downloadText(buildChunksJsonl(), "chunks.jsonl", "application/x-ndjson"); |
|
const downloadMarkdown = () => downloadText(buildMarkdown(), "md", "text/markdown"); |
|
const downloadAuditJson = () => downloadText(JSON.stringify(audit(), null, 2), "audit.json", "application/json"); |
|
const downloadSelected = (formats = state.config.autoCaptureDownloadFormats) => { |
|
const selected = new Set(toArray(formats)); |
|
const downloaded = []; |
|
|
|
if (selected.has("json")) { |
|
downloadJson(); |
|
downloaded.push("json"); |
|
} |
|
if (selected.has("messagesJsonl")) { |
|
downloadMessagesJsonl(); |
|
downloaded.push("messagesJsonl"); |
|
} |
|
if (selected.has("chunksJsonl")) { |
|
downloadChunksJsonl(); |
|
downloaded.push("chunksJsonl"); |
|
} |
|
if (selected.has("markdown")) { |
|
downloadMarkdown(); |
|
downloaded.push("markdown"); |
|
} |
|
if (selected.has("audit")) { |
|
downloadAuditJson(); |
|
downloaded.push("audit"); |
|
} |
|
|
|
return { downloaded, count: downloaded.length }; |
|
}; |
|
|
|
const downloadAll = () => downloadSelected(["json", "messagesJsonl", "chunksJsonl", "markdown", "audit"]); |
|
|
|
const copyText = async (text) => { |
|
if (navigator.clipboard?.writeText) { |
|
try { |
|
await navigator.clipboard.writeText(text); |
|
return { copied: true, method: "clipboard", downloaded: false, chars: text.length }; |
|
} catch (error) { |
|
recordError("copyText.clipboard", error); |
|
} |
|
} |
|
|
|
const textarea = document.createElement("textarea"); |
|
textarea.value = text; |
|
textarea.style.position = "fixed"; |
|
textarea.style.left = "-9999px"; |
|
document.body.appendChild(textarea); |
|
textarea.focus(); |
|
textarea.select(); |
|
const copied = document.execCommand?.("copy") || false; |
|
textarea.remove(); |
|
|
|
if (copied) return { copied: true, method: "execCommand", downloaded: false, chars: text.length }; |
|
|
|
downloadText(text, "clipboard-fallback.txt", "text/plain"); |
|
return { copied: false, method: "download_fallback", downloaded: true, chars: text.length }; |
|
}; |
|
|
|
const copyStats = () => copyText(JSON.stringify(stats(), null, 2)); |
|
const copyAudit = () => copyText(JSON.stringify(audit(), null, 2)); |
|
|
|
const sessionKey = () => `discordAIDump:${getRouteInfo().route_key}`; |
|
|
|
const saveSession = () => { |
|
const payload = JSON.stringify({ |
|
schema: "discord_visible_dom_session", |
|
saved_at: nowIso(), |
|
source: getRouteInfo(), |
|
messages: sortedMessages(), |
|
}); |
|
sessionStorage.setItem(sessionKey(), payload); |
|
return { saved: true, chars: payload.length, messages: state.messagesById.size }; |
|
}; |
|
|
|
const loadSession = () => { |
|
const raw = sessionStorage.getItem(sessionKey()); |
|
if (!raw) return { loaded: false, messages: 0 }; |
|
const parsed = JSON.parse(raw); |
|
let loaded = 0; |
|
for (const message of toArray(parsed.messages)) { |
|
if (!message?.id) continue; |
|
state.messagesById.set(message.id, mergeMessage(state.messagesById.get(message.id), message)); |
|
loaded += 1; |
|
} |
|
updateUiSafe(); |
|
return { loaded: true, messages: loaded, saved_at: parsed.saved_at }; |
|
}; |
|
|
|
const clearSession = () => { |
|
sessionStorage.removeItem(sessionKey()); |
|
return { cleared: true }; |
|
}; |
|
|
|
const runUiAction = async (action, event) => { |
|
try { |
|
if (action === "hide") document.getElementById(APP_ID)?.remove(); |
|
if (action === "collect") collect(); |
|
if (action === "autoCapture") await autoCapture(); |
|
if (action === "autoCaptureDownload") await autoCapture({ download: true }); |
|
if (action === "autoUp") await autoScroll({ direction: "up" }); |
|
if (action === "autoDown") await autoScroll({ direction: "down" }); |
|
if (action === "stopAuto") stopCapture("manual_stop"); |
|
if (action === "json") downloadJson(); |
|
if (action === "messagesJsonl") downloadMessagesJsonl(); |
|
if (action === "chunksJsonl") downloadChunksJsonl(); |
|
if (action === "md") downloadMarkdown(); |
|
if (action === "audit") downloadAuditJson(); |
|
if (action === "all") downloadAll(); |
|
if (action === "copyStats") await copyStats(); |
|
if (action === "saveSession") saveSession(); |
|
if (action === "loadSession") loadSession(); |
|
if (action === "toggleRedact") { |
|
configure({ redact: !state.config.redact }); |
|
if (event?.target) event.target.textContent = `Redact: ${state.config.redact ? "on" : "off"}`; |
|
} |
|
if (action === "reset") reset(); |
|
} catch (error) { |
|
recordError(`ui:${action}`, error); |
|
} |
|
}; |
|
|
|
const ensureUi = () => { |
|
if (!document.getElementById(STYLE_ID)) { |
|
const style = document.createElement("style"); |
|
style.id = STYLE_ID; |
|
style.textContent = ` |
|
#${APP_ID} { |
|
position: fixed; |
|
right: 16px; |
|
bottom: 16px; |
|
width: min(480px, calc(100vw - 32px)); |
|
z-index: 2147483647; |
|
color: #f7f7fb; |
|
background: rgba(18, 18, 22, 0.96); |
|
border: 1px solid rgba(255,255,255,0.16); |
|
border-radius: 14px; |
|
box-shadow: 0 16px 48px rgba(0,0,0,0.42); |
|
font: 12px/1.4 -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; |
|
overflow: hidden; |
|
backdrop-filter: blur(10px); |
|
user-select: none; |
|
} |
|
#${APP_ID} * { box-sizing: border-box; } |
|
#${APP_ID} .panel { padding: 12px; } |
|
#${APP_ID} .toprow { display: flex; align-items: center; justify-content: space-between; gap: 8px; margin-bottom: 8px; } |
|
#${APP_ID} .handle { cursor: move; } |
|
#${APP_ID} .title { font-size: 13px; font-weight: 750; letter-spacing: 0.01em; } |
|
#${APP_ID} .subtle { color: rgba(247,247,251,0.68); } |
|
#${APP_ID} .metrics { display: grid; grid-template-columns: repeat(4, 1fr); gap: 6px; margin: 8px 0; } |
|
#${APP_ID} .metric { border: 1px solid rgba(255,255,255,0.10); border-radius: 10px; padding: 7px; background: rgba(255,255,255,0.06); } |
|
#${APP_ID} .metric .value { display: block; font-size: 14px; font-weight: 750; } |
|
#${APP_ID} .metric .label { display: block; color: rgba(247,247,251,0.62); font-size: 11px; } |
|
#${APP_ID} .barwrap { margin: 8px 0 4px; } |
|
#${APP_ID} .barlabel { display:flex; justify-content:space-between; color: rgba(247,247,251,0.72); margin-bottom: 4px; } |
|
#${APP_ID} .progress { height: 9px; background: rgba(255,255,255,0.12); border-radius: 999px; overflow: hidden; } |
|
#${APP_ID} .bar { height: 100%; width: 0%; background: rgba(247,247,251,0.86); transition: width 180ms ease; } |
|
#${APP_ID} .status { color: rgba(247,247,251,0.78); overflow-wrap: anywhere; white-space: pre-line; max-height: 128px; overflow: auto; padding-right: 4px; user-select: text; } |
|
#${APP_ID} .buttons { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 10px; } |
|
#${APP_ID} button { |
|
appearance: none; |
|
cursor: pointer; |
|
border: 1px solid rgba(255,255,255,0.16); |
|
border-radius: 9px; |
|
background: rgba(255,255,255,0.08); |
|
color: #f7f7fb; |
|
padding: 6px 8px; |
|
font: inherit; |
|
} |
|
#${APP_ID} button:hover { background: rgba(255,255,255,0.15); } |
|
#${APP_ID} button.primary { background: rgba(255,255,255,0.17); } |
|
#${APP_ID} button.danger { color: #ffd1d1; } |
|
#${APP_ID} button.warn { color: #ffe3a3; } |
|
`; |
|
document.documentElement.appendChild(style); |
|
} |
|
|
|
let ui = document.getElementById(APP_ID); |
|
if (ui) return ui; |
|
|
|
ui = document.createElement("div"); |
|
ui.id = APP_ID; |
|
ui.innerHTML = ` |
|
<div class="panel"> |
|
<div class="toprow handle" data-action="dragHandle"> |
|
<div> |
|
<div class="title">Discord AI Dump</div> |
|
<div class="subtle">Visible rendered DOM only</div> |
|
</div> |
|
<button data-action="hide" title="Hide panel, keep collector running">Hide</button> |
|
</div> |
|
<div class="metrics"> |
|
<div class="metric"><span class="value" data-k="messages">0</span><span class="label">messages</span></div> |
|
<div class="metric"><span class="value" data-k="visible">0</span><span class="label">visible</span></div> |
|
<div class="metric"><span class="value" data-k="chunks">0</span><span class="label">chunks</span></div> |
|
<div class="metric"><span class="value" data-k="partial">0</span><span class="label">partial</span></div> |
|
</div> |
|
<div class="barwrap"> |
|
<div class="barlabel"><span>Viewport position</span><span data-k="scrollPct">0%</span></div> |
|
<div class="progress"><div class="bar" data-k="scrollBar"></div></div> |
|
</div> |
|
<div class="status" data-k="status">Starting...</div> |
|
<div class="buttons"> |
|
<button class="primary" data-action="collect">Collect</button> |
|
<button class="primary" data-action="autoCapture">Auto capture</button> |
|
<button class="warn" data-action="autoCaptureDownload">Auto + files</button> |
|
<button data-action="autoUp">Auto up</button> |
|
<button data-action="autoDown">Auto down</button> |
|
<button class="danger" data-action="stopAuto">Stop</button> |
|
<button class="primary" data-action="json">JSON</button> |
|
<button data-action="messagesJsonl">Messages JSONL</button> |
|
<button data-action="chunksJsonl">Chunks JSONL</button> |
|
<button data-action="md">MD</button> |
|
<button data-action="audit">Audit</button> |
|
<button data-action="all">All files</button> |
|
<button data-action="copyStats">Copy stats</button> |
|
<button data-action="saveSession">Save session</button> |
|
<button data-action="loadSession">Load session</button> |
|
<button data-action="toggleRedact">Redact: off</button> |
|
<button data-action="reset">Reset</button> |
|
</div> |
|
</div> |
|
`; |
|
|
|
ui.addEventListener("click", (event) => { |
|
const action = event.target?.dataset?.action; |
|
if (!action || action === "dragHandle") return; |
|
void runUiAction(action, event); |
|
}); |
|
|
|
makeDraggable(ui); |
|
document.body.appendChild(ui); |
|
return ui; |
|
}; |
|
|
|
const makeDraggable = (ui) => { |
|
const handle = ui.querySelector(".handle"); |
|
if (!handle) return; |
|
|
|
handle.addEventListener("pointerdown", (event) => { |
|
if (event.target?.tagName === "BUTTON") return; |
|
state.drag = { |
|
pointerId: event.pointerId, |
|
startX: event.clientX, |
|
startY: event.clientY, |
|
left: ui.offsetLeft, |
|
top: ui.offsetTop, |
|
}; |
|
ui.setPointerCapture?.(event.pointerId); |
|
}); |
|
|
|
ui.addEventListener("pointermove", (event) => { |
|
if (!state.drag || state.drag.pointerId !== event.pointerId) return; |
|
const nextLeft = Math.max(8, Math.min(window.innerWidth - ui.offsetWidth - 8, state.drag.left + event.clientX - state.drag.startX)); |
|
const nextTop = Math.max(8, Math.min(window.innerHeight - ui.offsetHeight - 8, state.drag.top + event.clientY - state.drag.startY)); |
|
ui.style.left = `${nextLeft}px`; |
|
ui.style.top = `${nextTop}px`; |
|
ui.style.right = "auto"; |
|
ui.style.bottom = "auto"; |
|
}); |
|
|
|
ui.addEventListener("pointerup", (event) => { |
|
if (state.drag?.pointerId === event.pointerId) state.drag = null; |
|
}); |
|
}; |
|
|
|
const show = () => { |
|
const ui = ensureUi(); |
|
updateUiSafe(); |
|
return ui; |
|
}; |
|
|
|
const updateUi = () => { |
|
const ui = document.getElementById(APP_ID); |
|
if (!ui) return; |
|
|
|
const snapshot = stats(); |
|
const scroll = snapshot.scroll; |
|
const last = state.lastCollectStats || { visible_nodes: 0, added: 0, updated: 0, skipped: 0 }; |
|
const auto = state.capture.active |
|
? `capture ${state.capture.step || "starting"}` |
|
: state.auto.active |
|
? `auto-${state.auto.direction} round ${state.auto.rounds}` |
|
: "manual"; |
|
|
|
ui.querySelector('[data-k="messages"]').textContent = String(snapshot.counts.messages); |
|
ui.querySelector('[data-k="visible"]').textContent = String(last.visible_nodes || 0); |
|
ui.querySelector('[data-k="chunks"]').textContent = String(snapshot.counts.chunks_estimate); |
|
ui.querySelector('[data-k="partial"]').textContent = String(snapshot.counts.partial_messages); |
|
ui.querySelector('[data-k="scrollPct"]').textContent = `${scroll.pct}%`; |
|
ui.querySelector('[data-k="scrollBar"]').style.width = `${scroll.pct}%`; |
|
|
|
ui.querySelector('[data-k="status"]').textContent = [ |
|
`${auto} | +${last.added || 0} / updated ${last.updated || 0} / skipped ${last.skipped || 0}`, |
|
`range ${snapshot.time_range.start || "n/a"} -> ${snapshot.time_range.end || "n/a"}`, |
|
`missing authors ${snapshot.counts.missing_authors} | inferred authors ${snapshot.counts.inferred_authors}`, |
|
`top=${scroll.at_top} bottom=${scroll.at_bottom} | redact=${state.config.redact} | scroller=${scroll.scroller_method || "n/a"}`, |
|
state.routeHistory.length ? `route resets: ${state.routeHistory.length}` : null, |
|
state.lastError ? `last error: ${state.lastError.scope}: ${state.lastError.message}` : null, |
|
"Progress is viewport position, not percent of total channel history.", |
|
] |
|
.filter(Boolean) |
|
.join("\n"); |
|
}; |
|
|
|
const updateUiSafe = () => { |
|
try { |
|
updateUi(); |
|
} catch (error) { |
|
state.lastError = { at: nowIso(), scope: "updateUi", message: error?.message || String(error), stack: error?.stack || null }; |
|
} |
|
}; |
|
|
|
const stopAutoScroll = (reason = "stopped") => { |
|
state.auto.active = false; |
|
state.auto.stoppedAt = nowIso(); |
|
state.auto.reason = reason; |
|
updateUiSafe(); |
|
}; |
|
|
|
const stopCapture = (reason = "stopped") => { |
|
state.capture.active = false; |
|
state.capture.stoppedAt = nowIso(); |
|
state.capture.reason = reason; |
|
stopAutoScroll(reason); |
|
updateUiSafe(); |
|
}; |
|
|
|
const autoScroll = async ({ direction = "up", maxRounds = state.config.maxAutoScrollRounds } = {}) => { |
|
if (state.auto.active) return { started: false, reason: "already_active" }; |
|
|
|
const scroller = findScroller({ force: true }); |
|
state.auto = { |
|
active: true, |
|
direction, |
|
rounds: 0, |
|
idleRounds: 0, |
|
startedAt: nowIso(), |
|
stoppedAt: null, |
|
reason: null, |
|
}; |
|
|
|
let lastCount = state.messagesById.size; |
|
let lastScrollHeight = scroller.scrollHeight; |
|
let lastTop = scroller.scrollTop; |
|
|
|
while (state.auto.active && state.auto.rounds < maxRounds) { |
|
state.auto.rounds += 1; |
|
collect(); |
|
|
|
const beforeTop = scroller.scrollTop; |
|
const delta = direction === "up" ? -state.config.autoScrollStepPx : state.config.autoScrollStepPx; |
|
scroller.scrollTop = Math.max(0, Math.min(scroller.scrollHeight, beforeTop + delta)); |
|
|
|
await sleep(state.config.autoScrollDelayMs); |
|
collect(); |
|
|
|
const scroll = getScrollInfo(); |
|
const countChanged = state.messagesById.size !== lastCount; |
|
const heightChanged = scroller.scrollHeight !== lastScrollHeight; |
|
const moved = Math.abs(scroller.scrollTop - lastTop) > 4 || Math.abs(scroller.scrollTop - beforeTop) > 4; |
|
const hitBoundary = direction === "up" ? scroll.at_top : scroll.at_bottom; |
|
|
|
if (!countChanged && !heightChanged && (!moved || hitBoundary)) state.auto.idleRounds += 1; |
|
else state.auto.idleRounds = 0; |
|
|
|
lastCount = state.messagesById.size; |
|
lastScrollHeight = scroller.scrollHeight; |
|
lastTop = scroller.scrollTop; |
|
updateUiSafe(); |
|
|
|
if (state.auto.idleRounds >= state.config.autoScrollIdleRounds) { |
|
stopAutoScroll(`idle_${state.auto.idleRounds}_rounds`); |
|
break; |
|
} |
|
} |
|
|
|
if (state.auto.active && state.auto.rounds >= maxRounds) stopAutoScroll("max_rounds"); |
|
collect(); |
|
return { started: true, ...state.auto }; |
|
}; |
|
|
|
const autoCapture = async ({ |
|
directions = state.config.autoCaptureDirections, |
|
download = false, |
|
formats = state.config.autoCaptureDownloadFormats, |
|
restoreInitialScroll = false, |
|
} = {}) => { |
|
if (state.capture.active) return { started: false, reason: "capture_already_active" }; |
|
|
|
const scroller = findScroller({ force: true }); |
|
const initialTop = scroller.scrollTop; |
|
const normalizedDirections = toArray(directions).length ? toArray(directions) : ["up", "down"]; |
|
|
|
state.capture = { |
|
active: true, |
|
step: "collect", |
|
startedAt: nowIso(), |
|
stoppedAt: null, |
|
reason: null, |
|
downloadOnDone: Boolean(download), |
|
}; |
|
|
|
collect(); |
|
|
|
for (const direction of normalizedDirections) { |
|
if (!state.capture.active) break; |
|
if (!["up", "down"].includes(direction)) continue; |
|
state.capture.step = direction; |
|
updateUiSafe(); |
|
await autoScroll({ direction }); |
|
} |
|
|
|
collect(); |
|
|
|
if (restoreInitialScroll && isUsableScroller(scroller)) { |
|
scroller.scrollTop = Math.max(0, Math.min(scroller.scrollHeight, initialTop)); |
|
await sleep(150); |
|
collect(); |
|
} |
|
|
|
state.capture.step = download ? "download" : "done"; |
|
let downloads = null; |
|
if (download && state.capture.active) downloads = downloadSelected(formats); |
|
|
|
state.capture.active = false; |
|
state.capture.stoppedAt = nowIso(); |
|
state.capture.reason = state.capture.reason || "complete"; |
|
updateUiSafe(); |
|
|
|
return { |
|
started: true, |
|
reason: state.capture.reason, |
|
downloads, |
|
stats: stats(), |
|
audit: audit(), |
|
}; |
|
}; |
|
|
|
const run = (options = {}) => autoCapture(options); |
|
|
|
const configure = (partial = {}) => { |
|
state.config = { ...state.config, ...partial }; |
|
|
|
if (state.running) { |
|
if (state.intervalHandle) clearInterval(state.intervalHandle); |
|
state.intervalHandle = state.config.collectIntervalMs > 0 ? setInterval(scheduleCollect, state.config.collectIntervalMs) : null; |
|
bindObservers({ force: true }); |
|
} |
|
|
|
updateUiSafe(); |
|
return { ...state.config }; |
|
}; |
|
|
|
const reset = ({ clearMessages = true, clearAliases = true } = {}) => { |
|
stopCapture("reset"); |
|
if (clearMessages) state.messagesById.clear(); |
|
if (clearAliases) state.aliasById.clear(); |
|
state.routeKey = getRouteInfo().route_key; |
|
state.routeHistory = []; |
|
state.lastCollectStats = null; |
|
state.lastError = null; |
|
state.errorLog = []; |
|
state.startedAt = nowIso(); |
|
collect(); |
|
updateUiSafe(); |
|
}; |
|
|
|
const start = () => { |
|
if (state.running) stop({ removeUi: false }); |
|
state.running = true; |
|
state.startedAt = nowIso(); |
|
state.routeKey = getRouteInfo().route_key; |
|
|
|
show(); |
|
|
|
if (!/\bdiscord\.com$/i.test(location.hostname)) { |
|
console.warn("discordAIDump is intended for discord.com channel pages. Current hostname:", location.hostname); |
|
} |
|
|
|
bindObservers({ force: true }); |
|
collect(); |
|
if (state.config.collectIntervalMs > 0) state.intervalHandle = setInterval(scheduleCollect, state.config.collectIntervalMs); |
|
if (state.config.autoCaptureOnStart) { |
|
void autoCapture({ download: state.config.autoCaptureDownloadOnDone }); |
|
} |
|
|
|
console.log( |
|
[ |
|
"discordAIDump started.", |
|
"Run discordAIDump.run() for automatic capture or discordAIDump.run({ download: true }) for capture + files.", |
|
"Exports: downloadJson(), downloadMessagesJsonl(), downloadChunksJsonl(), downloadMarkdown(), downloadAuditJson(), downloadAll().", |
|
"Diagnostics: stats(), audit(), selfTest(), copyStats(), copyAudit().", |
|
"Visible DOM only. No token extraction, network requests, or hidden Discord API calls.", |
|
].join("\n") |
|
); |
|
}; |
|
|
|
const stop = ({ removeUi = false } = {}) => { |
|
if (state.intervalHandle) clearInterval(state.intervalHandle); |
|
state.intervalHandle = null; |
|
state.collectScheduled = false; |
|
state.running = false; |
|
unbindObservers(); |
|
stopCapture("stop"); |
|
|
|
if (removeUi) { |
|
for (const id of LEGACY_UI_IDS) document.getElementById(id)?.remove(); |
|
} |
|
}; |
|
|
|
const selfTest = () => |
|
safe("selfTest", { ok: false, error: state.lastError }, () => { |
|
const tests = []; |
|
const assert = (name, condition) => tests.push({ name, ok: Boolean(condition) }); |
|
|
|
assert("snowflake timestamp parses", Boolean(snowflakeTimestamp("175928847299117063"))); |
|
assert("text normalization trims", normalizeForAI(" a \n\n\n b ") === "a\n\nb"); |
|
assert("attachment classified", classifyUrl("https://cdn.discordapp.com/attachments/1/2/file.png") === "attachment"); |
|
assert("internal user link classified", classifyUrl("https://discord.com/users/123456789012345678") === "internal_user"); |
|
assert("stats available", typeof stats().counts.messages === "number"); |
|
assert("auto capture available", typeof autoCapture === "function"); |
|
|
|
return { |
|
ok: tests.every((test) => test.ok), |
|
tests, |
|
stats: stats(), |
|
}; |
|
}); |
|
|
|
const api = { |
|
state, |
|
start, |
|
stop, |
|
show, |
|
reset, |
|
configure, |
|
collect, |
|
stats, |
|
audit, |
|
selfTest, |
|
messages: sortedMessages, |
|
makeChunks, |
|
buildExport, |
|
buildMessagesJsonl, |
|
buildChunksJsonl, |
|
buildMarkdown, |
|
downloadJson, |
|
downloadMessagesJsonl, |
|
downloadChunksJsonl, |
|
downloadMarkdown, |
|
downloadAuditJson, |
|
downloadAll, |
|
copyStats, |
|
copyAudit, |
|
saveSession, |
|
loadSession, |
|
clearSession, |
|
run, |
|
autoCapture, |
|
stopCapture, |
|
downloadSelected, |
|
autoScroll, |
|
autoScrollUp: () => autoScroll({ direction: "up" }), |
|
autoScrollDown: () => autoScroll({ direction: "down" }), |
|
stopAutoScroll, |
|
}; |
|
|
|
window[GLOBAL_NAME] = api; |
|
|
|
if (state.config.autoStart) start(); |
|
})(); |