Skip to content

Instantly share code, notes, and snippets.

@Sdy603
Created July 17, 2025 17:55
Show Gist options
  • Select an option

  • Save Sdy603/927bd8ad88bdfb1b9fe3ecb60c0c145c to your computer and use it in GitHub Desktop.

Select an option

Save Sdy603/927bd8ad88bdfb1b9fe3ecb60c0c145c to your computer and use it in GitHub Desktop.
// proposed table schema:
// sha TEXT PRIMARY KEY,
// source_url TEXT,
// author_id TEXT, -- remains TEXT to match source_id in bitbucket_server_users
// created TIMESTAMP,
// repository_source_id INTEGER NOT NULL, -- used to map to bitbucket_server_repos
// row_created_at TIMESTAMP DEFAULT NOW()
const fs = require('fs');
const csv = require('csv-parser');
const { createObjectCsvWriter } = require('csv-writer');
const axios = require('axios');
const { Pool } = require('pg');
const pLimit = require('p-limit');
const limit = pLimit(5);
// --- Config ---
const BITBUCKET_BASE_URL = '<baseURL>'; // Do not include /rest/api/latest
const BITBUCKET_USER = 'BITBUCKET_USERNAME'; // Replace with actual username
const BITBUCKET_PASSWORD = 'BITBUCKET_PASSWORD'; // Replace with actual password
const pool = new Pool({
connectionString: 'DB_CONNECTION_STRING', // Replace with actual DB connection string -- https://app.getdx.com/datacloud/dbusers
ssl: { rejectUnauthorized: false },
});
// --- CSV Writer for Dry Run ---
const dryRunCsvWriter = createObjectCsvWriter({
path: 'dry_run_output.csv',
header: [
{ id: 'sha', title: 'sha' },
{ id: 'source_url', title: 'source_url' },
{ id: 'author_id', title: 'author_id' },
{ id: 'created', title: 'created' },
{ id: 'repository_source_id', title: 'repository_source_id' },
],
append: false,
});
// --- Fetch all commits for a repo with pagination ---
async function fetchAllCommits(projectKey, repoSlug) {
const commits = [];
let start = 0;
let isLastPage = false;
let page = 1;
while (!isLastPage) {
const url = `${BITBUCKET_BASE_URL}/rest/api/latest/projects/${projectKey}/repos/${repoSlug}/commits?start=${start}`;
console.log(`[PAGE ${page}] Fetching: ${url}`);
try {
const response = await axios.get(url, {
auth: {
username: BITBUCKET_USER,
password: BITBUCKET_PASSWORD,
},
});
const data = response.data;
if (data.values) {
commits.push(...data.values);
console.log(`Page ${page}: ${data.values.length} commits (total: ${commits.length})`);
}
isLastPage = data.isLastPage;
start = data.nextPageStart;
page++;
} catch (error) {
console.error(`Error fetching commits for ${projectKey}/${repoSlug}:`, error.message);
break;
}
}
return commits;
}
// --- Insert commit into Postgres ---
async function insertCommit(commit, repositorySourceId) {
const { id: sha, message, author, authorTimestamp } = commit;
const source_url = commit?.links?.self?.[0]?.href || null;
const created = new Date(authorTimestamp);
const sql = `
INSERT INTO custom.bitbucket_repo_commits
(sha, source_url, author_id, created, repository_source_id, row_created_at)
VALUES ($1, $2, $3, $4, $5, NOW())
ON CONFLICT DO NOTHING
`;
await pool.query(sql, [
sha,
source_url,
author?.id || null,
created,
repositorySourceId,
]);
}
// --- Write to CSV in dry-run mode ---
async function writeCommitToCsv(commit, repositorySourceId) {
const { id: sha, author, authorTimestamp } = commit;
const source_url = commit?.links?.self?.[0]?.href || null;
const created = new Date(authorTimestamp).toISOString();
await dryRunCsvWriter.writeRecords([
{
sha,
source_url,
author_id: author?.id || '',
created,
repository_source_id: repositorySourceId,
},
]);
}
// --- Process the input CSV ---
async function processCSV(csvPath, dryRun = false) {
const rows = [];
await new Promise((resolve, reject) => {
fs.createReadStream(csvPath)
.pipe(csv())
.on('data', (row) => rows.push(row))
.on('end', resolve)
.on('error', reject);
});
if (dryRun) {
fs.writeFileSync('dry_run_output.csv', '');
}
for (const row of rows) {
const {
project_key,
repository_slug,
bitbucket_repo_id, // this is the repository_source_id
} = row;
await limit(async () => {
console.log(`[INFO] Fetching commits for ${project_key}/${repository_slug}`);
const commits = await fetchAllCommits(project_key, repository_slug);
console.log(`[INFO] ${commits.length} commits retrieved`);
for (const commit of commits) {
if (dryRun) {
await writeCommitToCsv(commit, bitbucket_repo_id);
} else {
await insertCommit(commit, bitbucket_repo_id);
}
}
console.log(`[DONE] ${dryRun ? 'Previewed' : 'Imported'} ${commits.length} commits`);
});
}
if (!dryRun) {
await pool.end();
}
}
// --- Entry Point ---
const args = process.argv.slice(2);
const CSV_PATH = args[0] || './temp.csv';
const DRY_RUN = args.includes('--dry-run');
processCSV(CSV_PATH, DRY_RUN)
.then(() => console.log(`Complete: ${DRY_RUN ? 'Dry run' : 'Live mode'}`))
.catch((err) => {
console.error('Unhandled error:', err);
process.exit(1);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment