Last active
March 24, 2026 11:59
-
-
Save Sdy603/d3115374f28542d05fd0d55cd69c749e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // rollup-duo-usage-autodetect.js | |
| // | |
| // Purpose | |
| // ------- | |
| // Pull per-user, per-day GitLab Duo usage **without ClickHouse**. | |
| // 1) Probe GraphQL for `group.aiUsageData.all`; if present, use it. | |
| // 2) Else fall back to `group.aiUsageData.codeSuggestionEvents`. | |
| // Counts are **always** computed; sizes are included when the schema exposes `suggestionSize`. | |
| // Any non–Code-Suggestions event types from `all` are summarized into `extras` with: | |
| // - count (per user/day) | |
| // - __typename | |
| // - fields[] (introspected once per typename, cached per run) | |
| // | |
| // Output CSV columns (matches table): | |
| // date,email,username,is_active, | |
| // total_accepts_size,total_rejects_size, | |
| // total_accepts_count,total_rejects_count, | |
| // extras,source | |
| // | |
| // Engagement rule: | |
| // is_active = (total_accepts_count + total_rejects_count) > 0 | |
| // | |
| // Usage | |
| // ----- | |
| // node rollup-duo-usage-autodetect.js \ | |
| // --baseUrl=https://gitlab.example.com \ | |
| // --groupPath=my-group/subgroup \ | |
| // --token=XXXXXXXX \ | |
| // --out=gitlab_duo_daily_usages.csv \ | |
| // [--resolveEmails=rest|none] # default: rest (on SaaS use 'none') \ | |
| // [--concurrency=6] # email lookup workers \ | |
| // [--logNewEvents=true|false] # default: false; prints event::typename seen | |
| // [--startDate=2026-01-01] # optional; defaults to last 90 days | |
| // [--endDate=2026-03-24] # optional; defaults to today (UTC) | |
| // | |
| // Prerequisites | |
| // ------------- | |
| // - Node 18+ (global fetch). | |
| // - Personal Access Token with `read_api`. | |
| // - For email enrichment on self-managed, use an admin/read_user token | |
| // (GitLab.com usually won't return other users' emails). | |
| // | |
| // What endpoints are used? | |
| // ------------------------ | |
| // - `AiUsageData.all` (when present): unified AI event stream registered in the enum. | |
| // - `AiUsageData.codeSuggestionEvents`: raw Code Suggestions (shown/accepted/rejected) from Postgres. | |
| // - We do **not** call ClickHouse-only endpoints (e.g., `AiUserMetrics`, `AiMetrics`). | |
| // | |
| // Notes | |
| // ----- | |
| // - UTC day bucketing (YYYY-MM-DD). | |
| // - SHOWN is ignored for metrics/active. | |
| // - If `all` doesn’t expose `suggestionSize`, sizes are left blank; counts still populate. | |
| // - GitLab's `all` and `codeSuggestionEvents` endpoints support a maximum date range of one month per query, | |
| // so this script automatically chunks requests by month. :contentReference[oaicite:1]{index=1} | |
| // - GitLab retains 3 months of non-ClickHouse AI usage data, so the default date range is the last 90 days. :contentReference[oaicite:2]{index=2} | |
| // | |
| // Load | |
| // ---- | |
| // COPY custom.gitlab_duo_daily_usages | |
| // (date, email, username, is_active, | |
| // total_accepts_size, total_rejects_size, | |
| // total_accepts_count, total_rejects_count, | |
| // extras, source) | |
| // FROM '/path/to/gitlab_duo_daily_usages.csv' | |
| // WITH (FORMAT csv, HEADER true, NULL ''); | |
| // | |
| // --------------------------------------------------------------------------- | |
| const fs = require("fs"); | |
| const os = require("os"); | |
| function getArg(name, def = undefined) { | |
| const p = `--${name}=`; | |
| const raw = process.argv.find(a => a.startsWith(p)); | |
| return raw ? raw.slice(p.length) : def; | |
| } | |
| const baseUrl = getArg("baseUrl"); | |
| const groupPath = getArg("groupPath"); | |
| const token = getArg("token"); | |
| const outFile = getArg("out", "gitlab_duo_daily_usages.csv"); | |
| const resolveEmails = (getArg("resolveEmails", "rest") || "rest").toLowerCase(); // rest|none | |
| const concurrency = Math.max(1, Math.min(16, Number(getArg("concurrency", "6")))); | |
| const logNewEvents = String(getArg("logNewEvents", "false")).toLowerCase() === "true"; | |
| const startDateArg = getArg("startDate"); // YYYY-MM-DD | |
| const endDateArg = getArg("endDate"); // YYYY-MM-DD | |
| if (!baseUrl || !groupPath || !token) { | |
| console.error("Missing required args.\nExample:"); | |
| console.error("node rollup-duo-usage-autodetect.js --baseUrl=https://gitlab.example.com --groupPath=my-group --token=XXXX"); | |
| process.exit(1); | |
| } | |
| const base = baseUrl.replace(/\/+$/,""); | |
| const gqlEndpoint = `${base}/api/graphql`; | |
| const restRoot = `${base}/api/v4`; | |
| // ---------- Helpers ---------- | |
| function toNum(v) { const n = Number(v); return Number.isFinite(n) ? n : 0; } | |
| function escCSV(v) { | |
| if (v === null || v === undefined) return ""; | |
| const s = String(v); | |
| return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s; | |
| } | |
| const EV_ACCEPT = "CODE_SUGGESTION_ACCEPTED_IN_IDE"; | |
| const EV_REJECT = "CODE_SUGGESTION_REJECTED_IN_IDE"; | |
| const EV_SHOWN = "CODE_SUGGESTION_SHOWN_IN_IDE"; | |
| function initAgg(usingSize) { | |
| return { | |
| acceptSize: usingSize ? 0 : null, | |
| rejectSize: usingSize ? 0 : null, | |
| acceptCount: 0, | |
| rejectCount: 0, | |
| extras: { events: {} }, // key -> { count, typename, fields[] } | |
| userId: null, | |
| }; | |
| } | |
| function extrasKeyFromEvent(ev) { | |
| return (ev || "") | |
| .toLowerCase() | |
| .replace(/[^a-z0-9]+/g, ".") | |
| .replace(/^\.+|\.+$/g, ""); | |
| } | |
| function toIsoDate(d) { | |
| return new Date(d).toISOString().slice(0, 10); | |
| } | |
| function startOfUtcDay(dateLike) { | |
| const d = new Date(dateLike); | |
| return new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate())); | |
| } | |
| function endOfUtcDay(dateLike) { | |
| const d = new Date(dateLike); | |
| return new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate(), 23, 59, 59, 999)); | |
| } | |
| // Generates inclusive monthly ranges, each no larger than one calendar month. | |
| // Example: | |
| // 2026-01-15 .. 2026-03-24 | |
| // becomes | |
| // 2026-01-15 .. 2026-01-31 | |
| // 2026-02-01 .. 2026-02-28 | |
| // 2026-03-01 .. 2026-03-24 | |
| function generateMonthlyRanges(startDate, endDate) { | |
| const start = startOfUtcDay(startDate); | |
| const end = endOfUtcDay(endDate); | |
| if (start > end) { | |
| throw new Error(`Invalid date range: startDate ${toIsoDate(start)} is after endDate ${toIsoDate(end)}`); | |
| } | |
| const ranges = []; | |
| let cursor = new Date(start); | |
| while (cursor <= end) { | |
| const rangeStart = new Date(cursor); | |
| const monthEnd = new Date(Date.UTC( | |
| rangeStart.getUTCFullYear(), | |
| rangeStart.getUTCMonth() + 1, | |
| 0, | |
| 23, 59, 59, 999 | |
| )); | |
| const rangeEnd = monthEnd < end ? monthEnd : new Date(end); | |
| ranges.push({ | |
| start: toIsoDate(rangeStart), | |
| end: toIsoDate(rangeEnd), | |
| }); | |
| cursor = new Date(Date.UTC( | |
| rangeStart.getUTCFullYear(), | |
| rangeStart.getUTCMonth() + 1, | |
| 1 | |
| )); | |
| } | |
| return ranges; | |
| } | |
| // ---------- Simple retry for REST ---------- | |
| async function backoffFetch(url, opts, attempts = 5) { | |
| let delay = 500; | |
| for (let i = 1; i <= attempts; i++) { | |
| const r = await fetch(url, opts); | |
| if (r.ok) return r; | |
| if (r.status >= 500 || r.status === 429) { | |
| await new Promise(res => setTimeout(res, delay)); | |
| delay = Math.min(5000, delay * 2); | |
| continue; | |
| } | |
| return r; | |
| } | |
| return fetch(url, opts); | |
| } | |
| // ---------- Email enrichment ---------- | |
| async function restLookupEmail(username) { | |
| const url = `${restRoot}/users?username=${encodeURIComponent(username)}`; | |
| const res = await backoffFetch(url, { | |
| headers: { | |
| "Accept": "application/json", | |
| "Authorization": `Bearer ${token}`, | |
| "Private-Token": token | |
| } | |
| }); | |
| if (!res.ok) return null; | |
| const arr = await res.json().catch(() => null); | |
| const u = Array.isArray(arr) ? arr[0] : null; | |
| return (u && typeof u.email === "string") ? u.email : null; | |
| } | |
| async function mapUsernamesToEmails(usernames) { | |
| const map = new Map(); | |
| if (resolveEmails !== "rest") { | |
| for (const u of usernames) map.set(u, null); | |
| return map; | |
| } | |
| const uniq = [...new Set(usernames)]; | |
| const queue = uniq.slice(); | |
| const workers = Math.max(1, Math.min(16, concurrency)); | |
| async function worker() { | |
| while (queue.length) { | |
| const u = queue.pop(); | |
| try { | |
| const email = await restLookupEmail(u); | |
| map.set(u, email || null); | |
| } catch { | |
| map.set(u, null); | |
| } | |
| } | |
| } | |
| await Promise.all(Array.from({ length: workers }, worker)); | |
| return map; | |
| } | |
| // ---------- Probe for `all` & suggestionSize ---------- | |
| async function probeAllSupportsSuggestionSize() { | |
| const q = ` | |
| query($fullPath: ID!, $startDate: Date, $endDate: Date) { | |
| group(fullPath: $fullPath) { | |
| aiUsageData { | |
| all(first: 1, startDate: $startDate, endDate: $endDate) { | |
| nodes { | |
| __typename | |
| event | |
| timestamp | |
| user { id username } | |
| suggestionSize | |
| } | |
| } | |
| } | |
| } | |
| } | |
| `; | |
| const res = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ | |
| query: q, | |
| variables: { | |
| fullPath: groupPath, | |
| startDate: startDateArg || null, | |
| endDate: endDateArg || null | |
| } | |
| }) | |
| }); | |
| const json = await res.json().catch(() => ({})); | |
| if (json?.errors) { | |
| const msg = JSON.stringify(json.errors); | |
| if (msg.includes("Field 'all'") || msg.includes('Cannot query field "all"')) { | |
| return { hasAll: false, hasSize: false }; | |
| } | |
| if (msg.includes("suggestionSize")) { | |
| return { hasAll: true, hasSize: false }; | |
| } | |
| } | |
| const hasAll = json?.data?.group?.aiUsageData?.all !== undefined; | |
| return { hasAll, hasSize: hasAll }; | |
| } | |
| async function probeAllExists() { | |
| const q = ` | |
| query($fullPath: ID!, $startDate: Date, $endDate: Date) { | |
| group(fullPath: $fullPath) { | |
| aiUsageData { | |
| all(first: 1, startDate: $startDate, endDate: $endDate) { | |
| nodes { | |
| __typename | |
| event | |
| timestamp | |
| user { username } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| `; | |
| const res = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ | |
| query: q, | |
| variables: { | |
| fullPath: groupPath, | |
| startDate: startDateArg || null, | |
| endDate: endDateArg || null | |
| } | |
| }) | |
| }); | |
| const json = await res.json().catch(() => ({})); | |
| if (json?.errors) return false; | |
| return json?.data?.group?.aiUsageData?.all !== undefined; | |
| } | |
| // ---------- Introspection cache (typename -> fields[]) ---------- | |
| const typeFieldsCache = new Map(); | |
| async function getTypeFields(typeName) { | |
| if (!typeName) return []; | |
| if (typeFieldsCache.has(typeName)) return typeFieldsCache.get(typeName); | |
| const q = ` | |
| query($t:String!) { | |
| __type(name:$t) { | |
| name | |
| fields { name } | |
| } | |
| } | |
| `; | |
| try { | |
| const r = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type":"application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ query: q, variables: { t: typeName } }) | |
| }); | |
| const j = await r.json().catch(() => ({})); | |
| const fields = j?.data?.__type?.fields?.map(f => f.name) || []; | |
| typeFieldsCache.set(typeName, fields); | |
| return fields; | |
| } catch { | |
| typeFieldsCache.set(typeName, []); | |
| return []; | |
| } | |
| } | |
| // ---------- Pagination ---------- | |
| async function* paginateAll(selectSize, startDate, endDate) { | |
| let after = null; | |
| const q = ` | |
| query($fullPath: ID!, $after: String, $startDate: Date!, $endDate: Date!) { | |
| group(fullPath: $fullPath) { | |
| aiUsageData { | |
| all(first: 100, after: $after, startDate: $startDate, endDate: $endDate) { | |
| pageInfo { hasNextPage endCursor } | |
| nodes { | |
| __typename | |
| event | |
| timestamp | |
| user { id username } | |
| ${selectSize ? "suggestionSize" : ""} | |
| } | |
| } | |
| } | |
| } | |
| } | |
| `.trim(); | |
| for (;;) { | |
| const res = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ | |
| query: q, | |
| variables: { fullPath: groupPath, after, startDate, endDate } | |
| }) | |
| }); | |
| if (!res.ok) { | |
| const text = await res.text().catch(() => ""); | |
| throw new Error(`GraphQL HTTP ${res.status}: ${text || res.statusText}`); | |
| } | |
| const body = await res.json(); | |
| if (body.errors) throw new Error(`GraphQL errors: ${JSON.stringify(body.errors)}`); | |
| const conn = body?.data?.group?.aiUsageData?.all; | |
| if (!conn) break; | |
| yield conn.nodes || []; | |
| if (!conn.pageInfo?.hasNextPage) break; | |
| after = conn.pageInfo.endCursor || null; | |
| } | |
| } | |
| async function* paginateCodeSuggestions(startDate, endDate) { | |
| let after = null; | |
| const q = ` | |
| query($fullPath: ID!, $after: String, $startDate: Date!, $endDate: Date!) { | |
| group(fullPath: $fullPath) { | |
| aiUsageData { | |
| codeSuggestionEvents(first: 100, after: $after, startDate: $startDate, endDate: $endDate) { | |
| pageInfo { hasNextPage endCursor } | |
| nodes { | |
| event | |
| timestamp | |
| suggestionSize | |
| user { id username } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| `; | |
| for (;;) { | |
| const res = await fetch(gqlEndpoint, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json", "Authorization": `Bearer ${token}` }, | |
| body: JSON.stringify({ | |
| query: q, | |
| variables: { fullPath: groupPath, after, startDate, endDate } | |
| }) | |
| }); | |
| if (!res.ok) { | |
| const text = await res.text().catch(() => ""); | |
| throw new Error(`GraphQL HTTP ${res.status}: ${text || res.statusText}`); | |
| } | |
| const body = await res.json(); | |
| if (body.errors) throw new Error(`GraphQL errors: ${JSON.stringify(body.errors)}`); | |
| const conn = body?.data?.group?.aiUsageData?.codeSuggestionEvents; | |
| if (!conn) break; | |
| yield conn.nodes || []; | |
| if (!conn.pageInfo?.hasNextPage) break; | |
| after = conn.pageInfo.endCursor || null; | |
| } | |
| } | |
| // ---------- Main ---------- | |
| async function main() { | |
| const today = new Date(); | |
| const defaultEnd = toIsoDate(today); | |
| const defaultStartDateObj = new Date(today); | |
| defaultStartDateObj.setUTCDate(defaultStartDateObj.getUTCDate() - 90); | |
| const defaultStart = toIsoDate(defaultStartDateObj); | |
| const startDate = startDateArg || defaultStart; | |
| const endDate = endDateArg || defaultEnd; | |
| const dateRanges = generateMonthlyRanges(startDate, endDate); | |
| // Probe | |
| let useAll = false; | |
| let allHasSize = false; | |
| try { | |
| const probe = await probeAllSupportsSuggestionSize(); | |
| useAll = probe.hasAll; | |
| allHasSize = probe.hasSize; | |
| if (!useAll) { | |
| useAll = await probeAllExists(); | |
| allHasSize = false; | |
| } | |
| } catch { | |
| useAll = false; | |
| allHasSize = false; | |
| } | |
| console.log( | |
| useAll | |
| ? `[aiUsageData.all] supported (suggestionSize=${allHasSize ? "yes" : "no"})` | |
| : `[aiUsageData.all] not available; falling back to codeSuggestionEvents` | |
| ); | |
| console.log(`Date range: ${startDate} -> ${endDate}`); | |
| console.log(`Monthly windows: ${dateRanges.length}`); | |
| const usingSize = useAll ? allHasSize : true; | |
| const rollups = Object.create(null); | |
| const seenUsers = new Set(); | |
| const unknownEvents = new Set(); | |
| for (const range of dateRanges) { | |
| console.log(`Processing range ${range.start} -> ${range.end}`); | |
| const iter = useAll | |
| ? paginateAll(allHasSize, range.start, range.end) | |
| : paginateCodeSuggestions(range.start, range.end); | |
| for await (const nodes of iter) { | |
| for (const n of nodes) { | |
| const ts = n?.timestamp; | |
| const user = n?.user?.username; | |
| const ev = n?.event; | |
| if (!ts || !user || !ev) continue; | |
| const date = new Date(ts).toISOString().slice(0,10); | |
| if (!rollups[date]) rollups[date] = Object.create(null); | |
| if (!rollups[date][user]) rollups[date][user] = initAgg(usingSize); | |
| const a = rollups[date][user]; | |
| a.userId = a.userId || n?.user?.id || null; | |
| if (ev === EV_ACCEPT || ev === EV_REJECT) { | |
| a[ev === EV_ACCEPT ? "acceptCount" : "rejectCount"] += 1; | |
| if (usingSize) { | |
| const s = toNum(n?.suggestionSize); | |
| if (s > 0) { | |
| if (ev === EV_ACCEPT) a.acceptSize += s; | |
| else a.rejectSize += s; | |
| } | |
| } | |
| } else if (ev === EV_SHOWN) { | |
| // ignore shown | |
| } else if (useAll) { | |
| const key = extrasKeyFromEvent(ev); | |
| const slot = a.extras.events; | |
| if (!slot[key]) slot[key] = { count: 0, typename: n?.__typename || null, fields: [] }; | |
| slot[key].count += 1; | |
| if (!slot[key].typename && n?.__typename) slot[key].typename = n.__typename; | |
| const t = n?.__typename; | |
| if (t && slot[key].fields.length === 0) { | |
| try { | |
| slot[key].fields = await getTypeFields(t); | |
| } catch { | |
| // ignore | |
| } | |
| } | |
| if (logNewEvents) unknownEvents.add(`${ev}::${n?.__typename || "UnknownType"}`); | |
| } | |
| seenUsers.add(user); | |
| } | |
| } | |
| } | |
| // Emails (best-effort) | |
| const usernameToEmail = await mapUsernamesToEmails([...seenUsers]); | |
| // Write CSV | |
| const header = [ | |
| "date","email","username","is_active", | |
| "total_accepts_size","total_rejects_size", | |
| "total_accepts_count","total_rejects_count", | |
| "extras","source" | |
| ].join(",") + "\n"; | |
| fs.writeFileSync(outFile, header, "utf8"); | |
| let written = 0; | |
| const source = useAll ? "all" : "codeSuggestionEvents"; | |
| for (const date of Object.keys(rollups).sort()) { | |
| const byUser = rollups[date]; | |
| for (const username of Object.keys(byUser).sort()) { | |
| const a = byUser[username]; | |
| const email = usernameToEmail.get(username) || ""; | |
| const isActive = (a.acceptCount + a.rejectCount) > 0; | |
| const accSize = usingSize ? a.acceptSize : ""; | |
| const rejSize = usingSize ? a.rejectSize : ""; | |
| const row = [ | |
| escCSV(date), | |
| escCSV(email), | |
| escCSV(username), | |
| escCSV(isActive), | |
| escCSV(accSize), | |
| escCSV(rejSize), | |
| escCSV(a.acceptCount), | |
| escCSV(a.rejectCount), | |
| escCSV(JSON.stringify(a.extras || {})), | |
| escCSV(source) | |
| ].join(","); | |
| fs.appendFileSync(outFile, row + os.EOL, "utf8"); | |
| written++; | |
| } | |
| } | |
| console.log(`Wrote ${written} rows to ${outFile}`); | |
| if (resolveEmails === "rest") { | |
| const missing = [...usernameToEmail.entries()].filter(([, e]) => !e).length; | |
| if (missing > 0) { | |
| console.warn(`Note: ${missing} user(s) returned no email (permissions/privacy). Rows have blank email.`); | |
| } | |
| } | |
| if (logNewEvents && unknownEvents.size) { | |
| console.log(`[new events observed via 'all'] ${[...unknownEvents].join(", ")}`); | |
| } | |
| } | |
| main().catch(err => { | |
| console.error(err.stack || String(err)); | |
| process.exit(1); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment