Created
July 17, 2025 17:55
-
-
Save Sdy603/927bd8ad88bdfb1b9fe3ecb60c0c145c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // proposed table schema: | |
| // sha TEXT PRIMARY KEY, | |
| // source_url TEXT, | |
| // author_id TEXT, -- remains TEXT to match source_id in bitbucket_server_users | |
| // created TIMESTAMP, | |
| // repository_source_id INTEGER NOT NULL, -- used to map to bitbucket_server_repos | |
| // row_created_at TIMESTAMP DEFAULT NOW() | |
| const fs = require('fs'); | |
| const csv = require('csv-parser'); | |
| const { createObjectCsvWriter } = require('csv-writer'); | |
| const axios = require('axios'); | |
| const { Pool } = require('pg'); | |
| const pLimit = require('p-limit'); | |
| const limit = pLimit(5); | |
| // --- Config --- | |
| const BITBUCKET_BASE_URL = '<baseURL>'; // Do not include /rest/api/latest | |
| const BITBUCKET_USER = 'BITBUCKET_USERNAME'; // Replace with actual username | |
| const BITBUCKET_PASSWORD = 'BITBUCKET_PASSWORD'; // Replace with actual password | |
| const pool = new Pool({ | |
| connectionString: 'DB_CONNECTION_STRING', // Replace with actual DB connection string -- https://app.getdx.com/datacloud/dbusers | |
| ssl: { rejectUnauthorized: false }, | |
| }); | |
| // --- CSV Writer for Dry Run --- | |
| const dryRunCsvWriter = createObjectCsvWriter({ | |
| path: 'dry_run_output.csv', | |
| header: [ | |
| { id: 'sha', title: 'sha' }, | |
| { id: 'source_url', title: 'source_url' }, | |
| { id: 'author_id', title: 'author_id' }, | |
| { id: 'created', title: 'created' }, | |
| { id: 'repository_source_id', title: 'repository_source_id' }, | |
| ], | |
| append: false, | |
| }); | |
| // --- Fetch all commits for a repo with pagination --- | |
| async function fetchAllCommits(projectKey, repoSlug) { | |
| const commits = []; | |
| let start = 0; | |
| let isLastPage = false; | |
| let page = 1; | |
| while (!isLastPage) { | |
| const url = `${BITBUCKET_BASE_URL}/rest/api/latest/projects/${projectKey}/repos/${repoSlug}/commits?start=${start}`; | |
| console.log(`[PAGE ${page}] Fetching: ${url}`); | |
| try { | |
| const response = await axios.get(url, { | |
| auth: { | |
| username: BITBUCKET_USER, | |
| password: BITBUCKET_PASSWORD, | |
| }, | |
| }); | |
| const data = response.data; | |
| if (data.values) { | |
| commits.push(...data.values); | |
| console.log(`Page ${page}: ${data.values.length} commits (total: ${commits.length})`); | |
| } | |
| isLastPage = data.isLastPage; | |
| start = data.nextPageStart; | |
| page++; | |
| } catch (error) { | |
| console.error(`Error fetching commits for ${projectKey}/${repoSlug}:`, error.message); | |
| break; | |
| } | |
| } | |
| return commits; | |
| } | |
| // --- Insert commit into Postgres --- | |
| async function insertCommit(commit, repositorySourceId) { | |
| const { id: sha, message, author, authorTimestamp } = commit; | |
| const source_url = commit?.links?.self?.[0]?.href || null; | |
| const created = new Date(authorTimestamp); | |
| const sql = ` | |
| INSERT INTO custom.bitbucket_repo_commits | |
| (sha, source_url, author_id, created, repository_source_id, row_created_at) | |
| VALUES ($1, $2, $3, $4, $5, NOW()) | |
| ON CONFLICT DO NOTHING | |
| `; | |
| await pool.query(sql, [ | |
| sha, | |
| source_url, | |
| author?.id || null, | |
| created, | |
| repositorySourceId, | |
| ]); | |
| } | |
| // --- Write to CSV in dry-run mode --- | |
| async function writeCommitToCsv(commit, repositorySourceId) { | |
| const { id: sha, author, authorTimestamp } = commit; | |
| const source_url = commit?.links?.self?.[0]?.href || null; | |
| const created = new Date(authorTimestamp).toISOString(); | |
| await dryRunCsvWriter.writeRecords([ | |
| { | |
| sha, | |
| source_url, | |
| author_id: author?.id || '', | |
| created, | |
| repository_source_id: repositorySourceId, | |
| }, | |
| ]); | |
| } | |
| // --- Process the input CSV --- | |
| async function processCSV(csvPath, dryRun = false) { | |
| const rows = []; | |
| await new Promise((resolve, reject) => { | |
| fs.createReadStream(csvPath) | |
| .pipe(csv()) | |
| .on('data', (row) => rows.push(row)) | |
| .on('end', resolve) | |
| .on('error', reject); | |
| }); | |
| if (dryRun) { | |
| fs.writeFileSync('dry_run_output.csv', ''); | |
| } | |
| for (const row of rows) { | |
| const { | |
| project_key, | |
| repository_slug, | |
| bitbucket_repo_id, // this is the repository_source_id | |
| } = row; | |
| await limit(async () => { | |
| console.log(`[INFO] Fetching commits for ${project_key}/${repository_slug}`); | |
| const commits = await fetchAllCommits(project_key, repository_slug); | |
| console.log(`[INFO] ${commits.length} commits retrieved`); | |
| for (const commit of commits) { | |
| if (dryRun) { | |
| await writeCommitToCsv(commit, bitbucket_repo_id); | |
| } else { | |
| await insertCommit(commit, bitbucket_repo_id); | |
| } | |
| } | |
| console.log(`[DONE] ${dryRun ? 'Previewed' : 'Imported'} ${commits.length} commits`); | |
| }); | |
| } | |
| if (!dryRun) { | |
| await pool.end(); | |
| } | |
| } | |
| // --- Entry Point --- | |
| const args = process.argv.slice(2); | |
| const CSV_PATH = args[0] || './temp.csv'; | |
| const DRY_RUN = args.includes('--dry-run'); | |
| processCSV(CSV_PATH, DRY_RUN) | |
| .then(() => console.log(`Complete: ${DRY_RUN ? 'Dry run' : 'Live mode'}`)) | |
| .catch((err) => { | |
| console.error('Unhandled error:', err); | |
| process.exit(1); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment