Last active
September 3, 2025 20:11
-
-
Save Sdy603/d3115374f28542d05fd0d55cd69c749e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // rollup-duo-usage-autodetect.js | |
| // | |
| // Purpose | |
| // ------- | |
| // Pull per-user, per-day GitLab Duo usage **without ClickHouse**. | |
| // 1) Probe GraphQL for `group.aiUsageData.all`; if present, use it. | |
| // 2) Else fall back to `group.aiUsageData.codeSuggestionEvents`. | |
| // Counts are **always** computed; sizes are included when the schema exposes `suggestionSize`. | |
| // Any non–Code-Suggestions event types from `all` are summarized into `extras` with: | |
| // - count (per user/day) | |
| // - __typename | |
| // - fields[] (introspected once per typename, cached per run) | |
| // | |
| // Output CSV columns (matches table): | |
| // date,email,username,is_active, | |
| // total_accepts_size,total_rejects_size, | |
| // total_accepts_count,total_rejects_count, | |
| // extras,source | |
| // | |
| // Engagement rule: | |
| // is_active = (total_accepts_count + total_rejects_count) > 0 | |
| // | |
| // Usage | |
| // ----- | |
| // node rollup-duo-usage-autodetect.js \ | |
| // --baseUrl=https://gitlab.example.com \ | |
| // --groupPath=my-group/subgroup \ | |
| // --token=XXXXXXXX \ | |
| // --out=gitlab_duo_daily_usages.csv \ | |
| // [--resolveEmails=rest|none] # default: rest (on SaaS use 'none') \ | |
| // [--concurrency=6] # email lookup workers \ | |
| // [--logNewEvents=true|false] # default: false; prints event::typename seen | |
| // | |
| // Prerequisites | |
| // ------------- | |
| // - Node 18+ (global fetch). | |
| // - Personal Access Token with `read_api`. | |
| // - For email enrichment on self-managed, use an admin/read_user token | |
| // (GitLab.com usually won't return other users' emails). | |
| // | |
| // What endpoints are used? | |
| // ------------------------ | |
| // - `AiUsageData.all` (when present): unified AI event stream registered in the enum. | |
| // - `AiUsageData.codeSuggestionEvents`: raw Code Suggestions (shown/accepted/rejected) from Postgres. | |
| // - We do **not** call ClickHouse-only endpoints (e.g., `AiUserMetrics`, `AiMetrics`). | |
| // | |
| // How to expose new AI events via `all` (self-managed) | |
| // ---------------------------------------------------- | |
| // Server maintainers must: | |
| // 1) Add the event to the enum: ee/app/graphql/types/analytics/ai_usage/ai_usage_event_type_enum.rb | |
| // 2) Compile GraphQL docs: `bundle exec rake gitlab:graphql:compile_docs` | |
| // | |
| // Notes | |
| // ----- | |
| // - UTC day bucketing (YYYY-MM-DD). | |
| // - SHOWN is ignored for metrics/active. | |
| // - If `all` doesn’t expose `suggestionSize`, sizes are left blank; counts still populate. | |
| // - `extras` looks like: | |
| // { | |
| // "events": { | |
| // "duo.chat.message": { "count": 3, "typename": "DuoChatEvent", "fields": ["timestamp","user","tokenCount","model", ...] }, | |
| // "code.review.request": { "count": 1, "typename": "CodeReviewRequestEvent", "fields": ["timestamp","user","mergeRequestId", ...] } | |
| // } | |
| // } | |
| // | |
| // Load | |
| // ---- | |
| // COPY custom.gitlab_duo_daily_usages | |
| // (date, email, username, is_active, | |
| // total_accepts_size, total_rejects_size, | |
| // total_accepts_count, total_rejects_count, | |
| // extras, source) | |
| // FROM '/path/to/gitlab_duo_daily_usages.csv' | |
| // WITH (FORMAT csv, HEADER true, NULL ''); | |
| // | |
| // --------------------------------------------------------------------------- | |
| const fs = require("fs"); | |
| const os = require("os"); | |
| function getArg(name, def = undefined) { | |
| const p = `--${name}=`; | |
| const raw = process.argv.find(a => a.startsWith(p)); | |
| return raw ? raw.slice(p.length) : def; | |
| } | |
| const baseUrl = getArg("baseUrl"); | |
| const groupPath = getArg("groupPath"); | |
| const token = getArg("token"); | |
| const outFile = getArg("out", "gitlab_duo_daily_usages.csv"); | |
| const resolveEmails = (getArg("resolveEmails", "rest") || "rest").toLowerCase(); // rest|none | |
| const concurrency = Math.max(1, Math.min(16, Number(getArg("concurrency", "6")))); | |
| const logNewEvents = String(getArg("logNewEvents", "false")).toLowerCase() === "true"; | |
| if (!baseUrl || !groupPath || !token) { | |
| console.error("Missing required args.\nExample:"); | |
| console.error("node rollup-duo-usage-autodetect.js --baseUrl=https://gitlab.example.com --groupPath=my-group --token=XXXX"); | |
| process.exit(1); | |
| } | |
| const base = baseUrl.replace(/\/+$/,""); | |
| const gqlEndpoint = `${base}/api/graphql`; | |
| const restRoot = `${base}/api/v4`; | |
| // ---------- Helpers ---------- | |
| function toNum(v) { const n = Number(v); return Number.isFinite(n) ? n : 0; } | |
| function escCSV(v) { | |
| if (v === null || v === undefined) return ""; | |
| const s = String(v); | |
| return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s; | |
| } | |
| const EV_ACCEPT = "CODE_SUGGESTION_ACCEPTED_IN_IDE"; | |
| const EV_REJECT = "CODE_SUGGESTION_REJECTED_IN_IDE"; | |
| const EV_SHOWN = "CODE_SUGGESTION_SHOWN_IN_IDE"; | |
| function initAgg(usingSize) { | |
| return { | |
| acceptSize: usingSize ? 0 : null, | |
| rejectSize: usingSize ? 0 : null, | |
| acceptCount: 0, | |
| rejectCount: 0, | |
| extras: { events: {} }, // key -> { count, typename, fields[] } | |
| userId: null, | |
| }; | |
| } | |
| function extrasKeyFromEvent(ev) { | |
| return (ev || "") | |
| .toLowerCase() | |
| .replace(/[^a-z0-9]+/g, ".") | |
| .replace(/^\.+|\.+$/g, ""); | |
| } | |
| // ---------- Simple retry for REST ---------- | |
| async function backoffFetch(url, opts, attempts = 5) { | |
| let delay = 500; | |
| for (let i = 1; i <= attempts; i++) { | |
| const r = await fetch(url, opts); | |
| if (r.ok) return r; | |
| if (r.status >= 500 || r.status === 429) { | |
| await new Promise(res => setTimeout(res, delay)); | |
| delay = Math.min(5000, delay * 2); | |
| continue; | |
| } | |
| return r; | |
| } | |
| return fetch(url, opts); | |
| } | |
| // ---------- Email enrichment ---------- | |
| async function restLookupEmail(username) { | |
| const url = `${restRoot}/users?username=${encodeURIComponent(username)}`; | |
| const res = await backoffFetch(url, { | |
| headers: { | |
| "Accept": "application/json", | |
| "Authorization": `Bearer ${token}`, | |
| "Private-Token": token | |
| } | |
| }); | |
| if (!res.ok) return null; | |
| const arr = await res.json().catch(() => null); | |
| const u = Array.isArray(arr) ? arr[0] : null; | |
| return (u && typeof u.email === "string") ? u.email : null; | |
| } | |
| async function mapUsernamesToEmails(usernames) { | |
| const map = new Map(); | |
| if (resolveEmails !== "rest") { | |
| for (const u of usernames) map.set(u, null); | |
| return map; | |
| } | |
| const uniq = [...new Set(usernames)]; | |
| const queue = uniq.slice(); | |
| const workers = Math.max(1, Math.min(16, concurrency)); | |
| async function worker() { | |
| while (queue.length) { | |
| const u = queue.pop(); | |
| try { | |
| const email = await restLookupEmail(u); | |
| map.set(u, email || null); | |
| } catch { | |
| map.set(u, null); | |
| } | |
| } | |
| } | |
| await Promise.all(Array.from({ length: workers }, worker)); | |
| return map; | |
| } | |
| // ---------- Probe for `all` & suggestionSize ---------- | |
| async function probeAllSupportsSuggestionSize() { | |
| const q = ` | |
| query($fullPath: ID!) { | |
| group(fullPath: $fullPath) { | |
| aiUsageData { | |
| all(first: 1) { | |
| nodes { | |
| __typename | |
| event | |
| timestamp | |
| user { id username } | |
| suggestionSize | |
| } | |
| } | |
| } | |
| } | |
| } | |
| `; | |
| const res = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ query: q, variables: { fullPath: groupPath } }) | |
| }); | |
| const json = await res.json().catch(() => ({})); | |
| if (json?.errors) { | |
| const msg = JSON.stringify(json.errors); | |
| if (msg.includes("Field 'all'") || msg.includes('Cannot query field "all"')) { | |
| return { hasAll: false, hasSize: false }; | |
| } | |
| if (msg.includes("suggestionSize")) { | |
| return { hasAll: true, hasSize: false }; | |
| } | |
| } | |
| const hasAll = json?.data?.group?.aiUsageData?.all !== undefined; | |
| return { hasAll, hasSize: hasAll }; | |
| } | |
| async function probeAllExists() { | |
| const q = ` | |
| query($fullPath: ID!) { | |
| group(fullPath: $fullPath) { | |
| aiUsageData { | |
| all(first: 1) { nodes { __typename event timestamp user { username } } } | |
| } | |
| } | |
| } | |
| `; | |
| const res = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ query: q, variables: { fullPath: groupPath } }) | |
| }); | |
| const json = await res.json().catch(() => ({})); | |
| if (json?.errors) return false; | |
| return json?.data?.group?.aiUsageData?.all !== undefined; | |
| } | |
| // ---------- Introspection cache (typename -> fields[]) ---------- | |
| const typeFieldsCache = new Map(); | |
| async function getTypeFields(typeName) { | |
| if (!typeName) return []; | |
| if (typeFieldsCache.has(typeName)) return typeFieldsCache.get(typeName); | |
| const q = ` | |
| query($t:String!) { | |
| __type(name:$t) { | |
| name | |
| fields { name } | |
| } | |
| } | |
| `; | |
| try { | |
| const r = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type":"application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ query: q, variables: { t: typeName } }) | |
| }); | |
| const j = await r.json().catch(() => ({})); | |
| const fields = j?.data?.__type?.fields?.map(f => f.name) || []; | |
| typeFieldsCache.set(typeName, fields); | |
| return fields; | |
| } catch { | |
| typeFieldsCache.set(typeName, []); | |
| return []; | |
| } | |
| } | |
| // ---------- Pagination ---------- | |
| async function* paginateAll(selectSize) { | |
| let after = null; | |
| const q = ` | |
| query($fullPath: ID!, $after: String) { | |
| group(fullPath: $fullPath) { | |
| aiUsageData { | |
| all(first: 100, after: $after) { | |
| pageInfo { hasNextPage endCursor } | |
| nodes { | |
| __typename | |
| event | |
| timestamp | |
| user { id username } | |
| ${selectSize ? "suggestionSize" : ""} | |
| } | |
| } | |
| } | |
| } | |
| } | |
| `.trim(); | |
| for (;;) { | |
| const res = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ query: q, variables: { fullPath: groupPath, after } }) | |
| }); | |
| if (!res.ok) { | |
| const text = await res.text().catch(() => ""); | |
| throw new Error(`GraphQL HTTP ${res.status}: ${text || res.statusText}`); | |
| } | |
| const body = await res.json(); | |
| if (body.errors) throw new Error(`GraphQL errors: ${JSON.stringify(body.errors)}`); | |
| const conn = body?.data?.group?.aiUsageData?.all; | |
| if (!conn) break; | |
| yield conn.nodes || []; | |
| if (!conn.pageInfo?.hasNextPage) break; | |
| after = conn.pageInfo.endCursor || null; | |
| } | |
| } | |
| async function* paginateCodeSuggestions() { | |
| let after = null; | |
| const q = ` | |
| query($fullPath: ID!, $after: String) { | |
| group(fullPath: $fullPath) { | |
| aiUsageData { | |
| codeSuggestionEvents(first: 100, after: $after) { | |
| pageInfo { hasNextPage endCursor } | |
| nodes { | |
| event | |
| timestamp | |
| suggestionSize | |
| user { id username } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| `; | |
| for (;;) { | |
| const res = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ query: q, variables: { fullPath: groupPath, after } }) | |
| }); | |
| if (!res.ok) { | |
| const text = await res.text().catch(() => ""); | |
| throw new Error(`GraphQL HTTP ${res.status}: ${text || res.statusText}`); | |
| } | |
| const body = await res.json(); | |
| if (body.errors) throw new Error(`GraphQL errors: ${JSON.stringify(body.errors)}`); | |
| const conn = body?.data?.group?.aiUsageData?.codeSuggestionEvents; | |
| if (!conn) break; | |
| yield conn.nodes || []; | |
| if (!conn.pageInfo?.hasNextPage) break; | |
| after = conn.pageInfo.endCursor || null; | |
| } | |
| } | |
| // ---------- Main ---------- | |
| async function main() { | |
| // Probe | |
| let useAll = false; | |
| let allHasSize = false; | |
| try { | |
| const probe = await probeAllSupportsSuggestionSize(); | |
| useAll = probe.hasAll; | |
| allHasSize = probe.hasSize; | |
| if (!useAll) { | |
| useAll = await probeAllExists(); | |
| allHasSize = false; | |
| } | |
| } catch { | |
| useAll = false; | |
| allHasSize = false; | |
| } | |
| console.log( | |
| useAll | |
| ? `[aiUsageData.all] supported (suggestionSize=${allHasSize ? "yes" : "no"})` | |
| : `[aiUsageData.all] not available; falling back to codeSuggestionEvents` | |
| ); | |
| const usingSize = useAll ? allHasSize : true; // codeSuggestionEvents always has size | |
| const rollups = Object.create(null); | |
| const seenUsers = new Set(); | |
| const unknownEvents = new Set(); | |
| const iter = useAll ? paginateAll(allHasSize) : paginateCodeSuggestions(); | |
| for await (const nodes of iter) { | |
| for (const n of nodes) { | |
| const ts = n?.timestamp; | |
| const user = n?.user?.username; | |
| const ev = n?.event; | |
| if (!ts || !user || !ev) continue; | |
| const date = new Date(ts).toISOString().slice(0,10); // YYYY-MM-DD UTC | |
| if (!rollups[date]) rollups[date] = Object.create(null); | |
| if (!rollups[date][user]) rollups[date][user] = initAgg(usingSize); | |
| const a = rollups[date][user]; | |
| a.userId = a.userId || n?.user?.id || null; | |
| if (ev === EV_ACCEPT || ev === EV_REJECT) { | |
| a[ev === EV_ACCEPT ? "acceptCount" : "rejectCount"] += 1; | |
| if (usingSize) { | |
| const s = toNum(n?.suggestionSize); | |
| if (s > 0) { | |
| if (ev === EV_ACCEPT) a.acceptSize += s; | |
| else a.rejectSize += s; | |
| } | |
| } | |
| } else if (ev === EV_SHOWN) { | |
| // ignore shown | |
| } else if (useAll) { | |
| const key = extrasKeyFromEvent(ev); | |
| const slot = a.extras.events; | |
| if (!slot[key]) slot[key] = { count: 0, typename: n?.__typename || null, fields: [] }; | |
| slot[key].count += 1; | |
| if (!slot[key].typename && n?.__typename) slot[key].typename = n.__typename; | |
| // Introspect fields once per typename, cache across run | |
| const t = n?.__typename; | |
| if (t && slot[key].fields.length === 0) { | |
| try { | |
| slot[key].fields = await getTypeFields(t); | |
| } catch { /* ignore */ } | |
| } | |
| if (logNewEvents) unknownEvents.add(`${ev}::${n?.__typename || "UnknownType"}`); | |
| } | |
| seenUsers.add(user); | |
| } | |
| } | |
| // Emails (best-effort) | |
| const usernameToEmail = await mapUsernamesToEmails([...seenUsers]); | |
| // Write CSV | |
| const header = [ | |
| "date","email","username","is_active", | |
| "total_accepts_size","total_rejects_size", | |
| "total_accepts_count","total_rejects_count", | |
| "extras","source" | |
| ].join(",") + "\n"; | |
| fs.writeFileSync(outFile, header, "utf8"); | |
| let written = 0; | |
| const source = useAll ? "all" : "codeSuggestionEvents"; | |
| for (const date of Object.keys(rollups).sort()) { | |
| const byUser = rollups[date]; | |
| for (const username of Object.keys(byUser).sort()) { | |
| const a = byUser[username]; | |
| const email = usernameToEmail.get(username) || ""; | |
| const isActive = (a.acceptCount + a.rejectCount) > 0; | |
| const accSize = usingSize ? a.acceptSize : ""; | |
| const rejSize = usingSize ? a.rejectSize : ""; | |
| const row = [ | |
| escCSV(date), | |
| escCSV(email), | |
| escCSV(username), | |
| escCSV(isActive), | |
| escCSV(accSize), | |
| escCSV(rejSize), | |
| escCSV(a.acceptCount), | |
| escCSV(a.rejectCount), | |
| escCSV(JSON.stringify(a.extras || {})), | |
| escCSV(source) | |
| ].join(","); | |
| fs.appendFileSync(outFile, row + os.EOL, "utf8"); | |
| written++; | |
| } | |
| } | |
| console.log(`Wrote ${written} rows to ${outFile}`); | |
| if (resolveEmails === "rest") { | |
| const missing = [...usernameToEmail.entries()].filter(([, e]) => !e).length; | |
| if (missing > 0) { | |
| console.warn(`Note: ${missing} user(s) returned no email (permissions/privacy). Rows have blank email.`); | |
| } | |
| } | |
| if (logNewEvents && unknownEvents.size) { | |
| console.log(`[new events observed via 'all'] ${[...unknownEvents].join(", ")}`); | |
| } | |
| } | |
| main().catch(err => { | |
| console.error(err.stack || String(err)); | |
| process.exit(1); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment