Skip to content

Instantly share code, notes, and snippets.

@Sdy603
Created May 19, 2025 22:45
Show Gist options
  • Save Sdy603/e5c8710bbe9bbd1a0a8db6f1f7d8660d to your computer and use it in GitHub Desktop.
Save Sdy603/e5c8710bbe9bbd1a0a8db6f1f7d8660d to your computer and use it in GitHub Desktop.
import fs from 'fs';
import path from 'path';
import dotenv from 'dotenv';
import { Client } from 'pg';
import csvParser from 'csv-parser';
dotenv.config();
const validateEnvVars = () => {
const requiredVars = ["DX_DB_CONNECTION"];
requiredVars.forEach((key) => {
if (!process.env[key]) {
console.error(`Missing required environment variable: ${key}`);
process.exit(1);
}
});
};
validateEnvVars();
const normalizePostgresURL = (url) =>
url.startsWith("postgres://") ? url.replace("postgres://", "postgresql://") : url;
const DX_DB_CONNECTION = normalizePostgresURL(process.env.DX_DB_CONNECTION);
const CSV_FILE_PATH = process.env.CSV_FILE_PATH || './cursor.csv';
const DRY_RUN = process.env.DRY_RUN === 'true';
const CHUNK_SIZE = parseInt(process.env.CHUNK_SIZE || '100', 10);
const REQUIRED_COLUMNS = [
'date',
'source_id',
'email',
'is_active',
'chat_suggested_lines_added',
'chat_suggested_lines_deleted',
'chat_accepted_lines_added',
'chat_accepted_lines_deleted',
'chat_total_applies',
'chat_total_accepts',
'chat_total_rejects',
'chat_tabs_shown',
'tabs_accepted',
'edit_requests',
'ask_requests',
'agent_requests',
'cmd_k_usages',
'subscription_included_reqs',
'api_key_reqs',
'usage_based_reqs',
'bugbot_usages',
'most_used_model',
'most_used_apply_extension',
'most_used_tab_extension',
'client_version'
];
function renameHeaders(row, headerMap) {
const newRow = {};
for (const key in row) {
const newKey = headerMap[key] || key;
newRow[newKey] = row[key];
}
return newRow;
}
const HEADER_MAP = {
"Date": "date",
"User ID": "source_id",
"Email": "email",
"Is Active": "is_active",
"Chat Suggested Lines Added": "chat_suggested_lines_added",
"Chat Suggested Lines Deleted": "chat_suggested_lines_deleted",
"Chat Accepted Lines Added": "chat_accepted_lines_added",
"Chat Accepted Lines Deleted": "chat_accepted_lines_deleted",
"Chat Total Applies": "chat_total_applies",
"Chat Total Accepts": "chat_total_accepts",
"Chat Total Rejects": "chat_total_rejects",
"Chat Tabs Shown": "chat_tabs_shown",
"Tabs Accepted": "tabs_accepted",
"Edit Requests": "edit_requests",
"Ask Requests": "ask_requests",
"Agent Requests": "agent_requests",
"Cmd+K Usages": "cmd_k_usages",
"Subscription Included Reqs": "subscription_included_reqs",
"API Key Reqs": "api_key_reqs",
"Usage Based Reqs": "usage_based_reqs",
"Bugbot Usages": "bugbot_usages",
"Most Used Model": "most_used_model",
"Most Used Apply Extension": "most_used_apply_extension",
"Most Used Tab Extension": "most_used_tab_extension",
"Client Version": "client_version",
};
const client = new Client({
connectionString: DX_DB_CONNECTION,
ssl: { rejectUnauthorized: false }
});
const yargs = require('yargs/yargs');
const { hideBin } = require('yargs/helpers');
const argv = yargs(hideBin(process.argv))
.option('file', {
alias: 'f',
type: 'string',
description: 'Path to the CSV file',
default: './cursor.csv'
})
.option('dry-run', {
alias: 'd',
type: 'boolean',
description: 'Run in dry mode without inserting into the DB',
default: false
})
.option('chunk-size', {
alias: 'c',
type: 'number',
description: 'Number of rows to insert per chunk',
default: 100
})
.help()
.argv;
const CSV_FILE_PATH = argv.file;
const DRY_RUN = argv.dryRun;
const CHUNK_SIZE = argv.chunkSize;
const REQUIRED_COLUMNS = [
'date', 'source_id', 'email', 'is_active',
'chat_suggested_lines_added', 'chat_suggested_lines_deleted',
'chat_accepted_lines_added', 'chat_accepted_lines_deleted',
'chat_total_applies', 'chat_total_accepts', 'chat_total_rejects',
'chat_tabs_shown', 'tabs_accepted', 'edit_requests', 'ask_requests',
'agent_requests', 'cmd_k_usages', 'subscription_included_reqs',
'api_key_reqs', 'usage_based_reqs', 'bugbot_usages',
'most_used_model', 'most_used_apply_extension',
'most_used_tab_extension', 'client_version'
];
function renameHeaders(row, headerMap) {
const newRow = {};
for (const key in row) {
const newKey = headerMap[key] || key;
newRow[newKey] = row[key];
}
return newRow;
}
const HEADER_MAP = {
"Date": "date",
"User ID": "source_id",
"Email": "email",
"Is Active": "is_active",
"Chat Suggested Lines Added": "chat_suggested_lines_added",
"Chat Suggested Lines Deleted": "chat_suggested_lines_deleted",
"Chat Accepted Lines Added": "chat_accepted_lines_added",
"Chat Accepted Lines Deleted": "chat_accepted_lines_deleted",
"Chat Total Applies": "chat_total_applies",
"Chat Total Accepts": "chat_total_accepts",
"Chat Total Rejects": "chat_total_rejects",
"Chat Tabs Shown": "chat_tabs_shown",
"Tabs Accepted": "tabs_accepted",
"Edit Requests": "edit_requests",
"Ask Requests": "ask_requests",
"Agent Requests": "agent_requests",
"Cmd+K Usages": "cmd_k_usages",
"Subscription Included Reqs": "subscription_included_reqs",
"API Key Reqs": "api_key_reqs",
"Usage Based Reqs": "usage_based_reqs",
"Bugbot Usages": "bugbot_usages",
"Most Used Model": "most_used_model",
"Most Used Apply Extension": "most_used_apply_extension",
"Most Used Tab Extension": "most_used_tab_extension",
"Client Version": "client_version",
};
async function validateHeaders(headers) {
for (const col of REQUIRED_COLUMNS) {
if (!headers.includes(col)) {
throw new Error(`Missing required column: ${col}`);
}
}
}
let totalInserted = 0;
let totalChunks = 0;
async function insertChunk(rows) {
if (rows.length === 0) return;
const placeholders = rows.map(
(_, rowIndex) => `(${REQUIRED_COLUMNS.map((_, colIndex) => `$${rowIndex * REQUIRED_COLUMNS.length + colIndex + 1}`).join(', ')})`
).join(', ');
const values = rows.flatMap(row => REQUIRED_COLUMNS.map(col => row[col] === '' ? null : row[col]));
const query = `
INSERT INTO custom.cursor_daily_usages (${REQUIRED_COLUMNS.join(', ')})
VALUES ${placeholders}
ON CONFLICT DO NOTHING;
`;
if (DRY_RUN) {
console.log('🟑 Dry run: would insert chunk of', rows.length, 'rows');
} else {
await client.query(query, values);
console.log('βœ… Inserted chunk of', rows.length, 'rows');
totalInserted += rows.length;
totalChunks++;
}
}
async function main() {
try {
await client.connect();
await client.query('SELECT 1');
console.log('βœ… DB connection verified.');
} catch (err) {
console.error('❌ Failed to connect to database:', err.message);
process.exit(1);
}
let chunk = [];
let headersValidated = false;
const stream = fs.createReadStream(CSV_FILE_PATH).pipe(csvParser());
stream.on('headers', async (headers) => {
await validateHeaders(headers);
headersValidated = true;
console.log('βœ… CSV headers validated');
});
stream.on('data', (row) => {
chunk.push(renameHeaders(row, HEADER_MAP));
if (chunk.length >= CHUNK_SIZE) {
stream.pause();
insertChunk(chunk)
.then(() => {
chunk = [];
stream.resume();
})
.catch((err) => {
console.error('❌ Insert error during chunk:', err);
process.exit(1);
});
}
});
stream.on('end', async () => {
if (!headersValidated) {
console.error('❌ CSV headers were not validated.');
process.exit(1);
}
if (chunk.length > 0) {
try {
await insertChunk(chunk);
} catch (err) {
console.error('❌ Final insert error:', err);
process.exit(1);
}
}
console.log('πŸŽ‰ All rows processed.');
if (!DRY_RUN) {
console.log(`πŸ“Š Summary: ${totalInserted} rows inserted across ${totalChunks} chunks.`);
}
await client.end();
});
}
main().catch((err) => {
console.error('πŸ’₯ Fatal error:', err);
process.exit(1);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment