Skip to content

Instantly share code, notes, and snippets.

@sjelfull
Created July 4, 2025 21:51
Show Gist options
  • Save sjelfull/ee2d5064842a36363b78cf49af9b24ed to your computer and use it in GitHub Desktop.
Save sjelfull/ee2d5064842a36363b78cf49af9b24ed to your computer and use it in GitHub Desktop.
Migrate script from Sanity dataset to Media Library. Pulled from https://www.sanity.io/docs/media-library/media-migration#k7772445e6b09
import * as fs from 'node:fs'
import * as path from 'node:path'
import {config} from 'dotenv'
import ndjson from 'ndjson'
// Define interface for migration options
export interface MigrateMediaOptions {
projectId: string
dataset: string
mediaLibraryId: string
sanityToken: string
imagesDir: string
filesDir: string
dataFilePath: string
assetsFilePath: string
isDryRun?: boolean
isVerbose?: boolean
testImageName?: string
includeAspects?: boolean
}
// Load environment variables from .env file when running as CLI
if (fs.existsSync('./.env')) {
config({path: './.env'})
}
interface Document {
_id: string
_type: string
[key: string]: any
}
interface UploadResult {
assetId: string
assetInstanceId: string
}
interface ImageProcessingResult {
filename: string
hash: string
success: boolean
error?: string
}
interface FileProcessingResult {
filename: string
hash: string
success: boolean
error?: string
}
// Add command line argument parsing
function parseCliArgs() {
const args = process.argv.slice(2)
const isDryRun = args.includes('--dry-run')
const isVerbose = args.includes('--verbose')
const includeAspects = args.includes('--include-aspects') // migrate legacy metadata to aspects
const testImageIndex = args.indexOf('--test-image')
const testImageName = testImageIndex !== -1 ? args[testImageIndex + 1] : undefined
const BATCH_SIZE = 20 // Number of images to process in parallel
return {
isDryRun,
isVerbose,
testImageName,
includeAspects,
BATCH_SIZE,
}
}
// Only logs if verbose flag is present
function logVerbose(isVerbose: boolean, ...args: any[]) {
if (isVerbose) {
console.log(...args)
}
}
// Always prints a message, clearing the previous line if not in verbose mode
function logStatus(isVerbose: boolean, message: string) {
if (!isVerbose) {
process.stdout.write('\r'.padEnd(process.stdout.columns || 80) + '\r') // Clear line first
}
console.log(message) // Always log the status
}
// Function to validate path segments
function validatePathSegment(segment: string, index: number): void {
if (segment === undefined || segment === null || segment === '') {
throw new Error(`Invalid path segment at index ${index}: segment is empty or undefined`)
}
// Check if this is an array index
if (!isNaN(Number(segment))) {
return // Valid array index
}
// Check for valid property name format
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(segment)) {
throw new Error(
`Invalid property name at index ${index}: "${segment}" - must start with a letter or underscore and contain only alphanumeric characters and underscores`,
)
}
}
// Function to build field path. The path is used to patch documents that contain the image.
function buildFieldPath(fieldPath: string[]): string {
if (!fieldPath || fieldPath.length === 0) {
throw new Error('Field path cannot be empty')
}
// Validate all segments first
fieldPath.forEach((segment, index) => validatePathSegment(segment, index))
let path = fieldPath[0]
for (let i = 1; i < fieldPath.length; i++) {
const segment = fieldPath[i]
// If the segment is a number or looks like an array index, wrap it in brackets
if (/^\d+$/.test(segment) || segment.startsWith('[')) {
path += `[${segment}]`
} else {
path += `.${segment}`
}
}
return path
}
// Function to find image references in a document.
async function findImageReferences(
doc: Document,
): Promise<{docId: string; path: string; fieldPath: string[]}[]> {
const results: {docId: string; path: string; fieldPath: string[]}[] = []
function traverse(obj: any, currentPath: string[] = []) {
if (!obj || typeof obj !== 'object') return
if (obj._sanityAsset && obj._sanityAsset.startsWith('image@file://./images/')) {
const imagePath = obj._sanityAsset.replace('image@file://./images/', '')
results.push({
docId: doc._id,
path: imagePath,
fieldPath: [...currentPath],
})
}
Object.entries(obj).forEach(([key, value]) => {
if (typeof value === 'object' && value !== null) {
traverse(value, [...currentPath, key])
}
})
}
traverse(doc)
return results
}
// Function to find file references in a document.
async function findFileReferences(
doc: Document,
): Promise<{docId: string; path: string; fieldPath: string[]}[]> {
const results: {docId: string; path: string; fieldPath: string[]}[] = []
function traverse(obj: any, currentPath: string[] = []) {
if (!obj || typeof obj !== 'object') return
if (obj._sanityAsset && obj._sanityAsset.startsWith('file@file://./files/')) {
const filePath = obj._sanityAsset.replace('file@file://./files/', '')
results.push({
docId: doc._id,
path: filePath,
fieldPath: [...currentPath],
})
}
Object.entries(obj).forEach(([key, value]) => {
if (typeof value === 'object' && value !== null) {
traverse(value, [...currentPath, key])
}
})
}
traverse(doc)
return results
}
// Function to verify image file before upload
function verifyImageFile(filePath: string): {exists: boolean; size: number} {
try {
if (!fs.existsSync(filePath)) {
return {exists: false, size: 0}
}
const stats = fs.statSync(filePath)
return {exists: true, size: stats.size}
} catch (error) {
console.error(`Error verifying file ${filePath}: ${error.message || error}`)
return {exists: false, size: 0}
}
}
// Function to verify file before upload
function verifyFile(filePath: string): {exists: boolean; size: number} {
try {
if (!fs.existsSync(filePath)) {
return {exists: false, size: 0}
}
const stats = fs.statSync(filePath)
return {exists: true, size: stats.size}
} catch (error) {
console.error(`Error verifying file ${filePath}: ${error.message || error}`)
return {exists: false, size: 0}
}
}
// Function to upload asset to media library
// Returns an object with assetId and assetInstanceId or throws error
async function uploadAsset(
mediaLibraryId: string,
token: string,
assetPath: string,
isVerbose: boolean,
): Promise<UploadResult> {
const baseUrl = `https://api.sanity.io/v2024-06-24/media-libraries/${mediaLibraryId}/upload`
const parts = assetPath.split('/')
const filename = parts[parts.length - 1]
const fileInfo = verifyFile(assetPath)
if (!fileInfo.exists) {
throw new Error(`File not found: ${assetPath}`)
}
if (fileInfo.size === 0) {
throw new Error(`File is empty: ${assetPath}`)
}
const params: Record<string, string> = {
filename,
autoGenerateTitle: 'true',
}
const queryParams = new URLSearchParams(params).toString()
const url = `${baseUrl}?${queryParams}`
try {
logVerbose(isVerbose, 'Uploading asset with:', {
mediaLibraryId,
tokenLength: token.length,
assetPath,
url,
})
const body = fs.readFileSync(assetPath)
const response = await fetch(url, {
method: 'POST',
headers: {
Accept: 'application/json',
'Content-Type': 'application/x-www-form-urlencoded',
Authorization: `Bearer ${token}`,
},
body,
})
const responseText = await response.text()
if (!response.ok) {
throw new Error(
`Upload HTTP Error: ${response.status} ${response.statusText} - ${responseText}`,
)
}
try {
const parsedResponse = JSON.parse(responseText)
const assetId = parsedResponse?.asset?._id
const assetInstanceId = parsedResponse?.assetInstance?._id
if (!assetId || !assetInstanceId) {
throw new Error(
`Required ID(s) not found in parsed response (assetId: ${assetId}, assetInstanceId: ${assetInstanceId}). Structure logged above. Response: ${responseText}`,
)
}
logVerbose(
isVerbose,
`Upload successful for ${filename}, received assetId: ${assetId}, assetInstanceId: ${assetInstanceId}`,
)
return {assetId, assetInstanceId}
} catch (parseError) {
throw new Error(
`Failed to parse upload server response: ${parseError.message} - Response: ${responseText}`,
)
}
} catch (error) {
throw new Error(`Upload failed for ${filename}: ${error.message}`)
}
}
// Helper function for delaying execution
async function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}
// Function to link media library asset with exponential backoff retry
async function linkMediaLibraryAsset(
token: string,
projectId: string,
dataset: string,
mediaLibraryId: string,
assetInstanceId: string,
assetId: string,
isVerbose: boolean,
) {
const baseUrl = `https://${projectId}.api.sanity.io/v2025-01-04/assets/media-library-link/${dataset}`
const maxRetries = 6 // Will give us roughly 1 minute total (1+2+4+8+16+32 = 63 seconds)
const baseDelay = 1000 // Start with 1 second
const maxTimeout = 60000 // 1 minute total timeout
const startTime = Date.now()
for (let attempt = 0; attempt <= maxRetries; attempt++) {
// Check if we've exceeded the total timeout
if (Date.now() - startTime > maxTimeout) {
throw new Error(`Failed to link Media Library asset (ID: ${assetId}): Timeout after ${maxTimeout}ms`)
}
try {
logVerbose(isVerbose, `Linking media library asset (attempt ${attempt + 1}/${maxRetries + 1}):`, {
url: baseUrl,
mediaLibraryId,
assetId,
assetInstanceId,
})
const response = await fetch(baseUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`,
},
body: JSON.stringify({
mediaLibraryId,
assetInstanceId,
assetId,
}),
})
if (!response.ok) {
let errorData = {}
let errorText = ''
try {
errorData = await response.json()
errorText = JSON.stringify(errorData)
} catch (e) {
errorText = await response.text()
errorData = {
error: 'Failed to parse error response',
responseText: errorText,
}
}
// Check if this is a "Media library asset is not ready" error
if (errorText.includes('Media library asset is not ready')) {
if (attempt < maxRetries) {
const delay = Math.min(baseDelay * Math.pow(2, attempt), maxTimeout - (Date.now() - startTime))
logVerbose(isVerbose, `Media library asset not ready, retrying in ${delay}ms (attempt ${attempt + 1}/${maxRetries + 1})`)
await sleep(delay)
continue
}
}
throw new Error(
`Link HTTP Error: ${response.status} ${response.statusText} - ${errorText}`,
)
}
const result = await response.json()
logVerbose(isVerbose, `Media Library asset linked successfully on attempt ${attempt + 1}:`, result)
return result
} catch (error) {
// If it's a network error or other non-HTTP error, retry as well
if (attempt < maxRetries && (error.message.includes('fetch') || error.message.includes('network'))) {
const delay = Math.min(baseDelay * Math.pow(2, attempt), maxTimeout - (Date.now() - startTime))
logVerbose(isVerbose, `Network error, retrying in ${delay}ms (attempt ${attempt + 1}/${maxRetries + 1}): ${error.message}`)
await sleep(delay)
continue
}
// If it's the final attempt or a non-retryable error, throw
throw new Error(`Failed to link Media Library asset (ID: ${assetId}): ${error.message}`)
}
}
// This should never be reached, but just in case
throw new Error(`Failed to link Media Library asset (ID: ${assetId}): Maximum retry attempts exceeded`)
}
// Process a single image through all steps
async function processImage(
filename: string,
index: number,
totalImagesInThisRun: number,
imagesDir: string,
dataFilePath: string,
mediaLibraryId: string,
sanityToken: string,
projectId: string,
dataset: string,
documentUpdateLimiter: RateLimiter,
mediaLibraryLimiter: RateLimiter,
aspectUpdateLimiter: RateLimiter,
parsedDocuments: Document[],
parsedAssets: any,
tags: Map<string, string>,
uploadedAssetIds: Map<string, UploadResult>,
isDryRun: boolean,
isVerbose: boolean,
includeAspects: boolean,
): Promise<ImageProcessingResult> {
const hash = filename.split('-')[0]
const result: ImageProcessingResult = {
filename,
hash,
success: false,
}
try {
// Only log detailed progress in verbose mode
const imageProgress = `(${index + 1}/${totalImagesInThisRun})`
logVerbose(isVerbose, `\nProcessing ${filename} ${imageProgress}`)
// Upload step
// Uploads the image to the media library,
// adds the actualAssetId to a map of asset IDs.
const filepath = path.resolve(imagesDir, filename)
if (isDryRun) {
logVerbose(
isVerbose,
`DRY RUN ${imageProgress}: Would upload ${filename} with auto-generated title`,
)
} else {
logVerbose(isVerbose, `Uploading ${imageProgress} ${filename}...`)
const actualAssetId = await mediaLibraryLimiter.enqueue(() =>
uploadAsset(mediaLibraryId, sanityToken, filepath, isVerbose),
)
uploadedAssetIds.set(hash, actualAssetId)
logVerbose(isVerbose, `Upload ${imageProgress}: ${filename} uploaded successfully.`)
}
// Link step: Take the assetId and asset instance ID from the upload step,
// and link them to the source dataset.
// Pull the asset details from the upload map.
const uploadResult = uploadedAssetIds.get(hash)
if (!isDryRun && !uploadResult) {
throw new Error(
`Cannot link, upload result for hash ${hash} not found (upload likely failed).`,
)
}
if (isDryRun) {
const dryAssetId = uploadResult?.assetId || `mock-asset-id-${hash}`
logVerbose(
isVerbose,
`DRY RUN ${imageProgress}: Would link ${filename} (using Asset ID: ${dryAssetId})`,
)
} else {
const {assetId, assetInstanceId} = uploadResult!
logVerbose(isVerbose, `Linking ${imageProgress} ${filename}...`)
await mediaLibraryLimiter.enqueue(() =>
linkMediaLibraryAsset(
sanityToken,
projectId,
dataset,
mediaLibraryId,
assetInstanceId,
assetId,
isVerbose,
),
)
logVerbose(isVerbose, `Link ${imageProgress}: ${filename} linked.`)
}
// Documents step: Find all documents that contain the image.
// This is done by searching through the parsed documents from the data file.
// The field path is used to patch the document.
const matchingDocs: {docId: string; fieldPath: string[]}[] = []
logVerbose(isVerbose, `Docs ${imageProgress}: Reading references for ${filename}...`)
try {
for (const doc of parsedDocuments) {
const refs = await findImageReferences(doc)
const matches = refs.filter((ref) => ref.path.startsWith(hash))
if (matches.length > 0) {
for (const match of matches) {
matchingDocs.push({docId: doc._id, fieldPath: match.fieldPath})
}
}
}
} catch (readError) {
throw new Error(`Failed reading/parsing ${dataFilePath}: ${readError.message}`)
}
logVerbose(isVerbose, `Found ${matchingDocs.length} document references for ${filename}.`)
if (isDryRun) {
logVerbose(
isVerbose,
`DRY RUN ${imageProgress}: Would update ${matchingDocs.length} documents for ${filename}`,
)
} else {
const uploadResult = uploadedAssetIds.get(hash)
if (!uploadResult) {
throw new Error(`Cannot update docs, asset ID for hash ${hash} not found.`)
}
const {assetId} = uploadResult
if (matchingDocs.length > 0) {
logVerbose(
isVerbose,
`Docs ${imageProgress}: Updating ${matchingDocs.length} refs for ${filename}...`,
)
const docBatchSize = 10
let failedBatchCount = 0
// Patch documents in batches with the GDR.
for (let i = 0; i < matchingDocs.length; i += docBatchSize) {
const batch = matchingDocs.slice(i, i + docBatchSize)
const mutations = batch.map((doc) => ({
patch: {
id: doc.docId,
set: {
[`${buildFieldPath(doc.fieldPath)}.media`]: {
_type: 'globalDocumentReference',
_ref: `media-library:${mediaLibraryId}:${assetId}`,
_weak: true,
},
},
},
}))
const url = `https://${projectId}.api.sanity.io/vX/data/mutate/${dataset}`
await documentUpdateLimiter.enqueue(async () => {
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${sanityToken}`,
},
body: JSON.stringify({mutations}),
})
if (!response.ok) {
const error = await response.json()
logVerbose(isVerbose, 'Failed to update document batch:', error)
failedBatchCount++
return
}
logVerbose(
isVerbose,
`Successfully updated batch of ${batch.length} documents for ${filename}`,
)
} catch (fetchError) {
logVerbose(isVerbose, 'Error during document update fetch:', fetchError)
failedBatchCount++
}
})
}
if (failedBatchCount > 0) {
throw new Error(`${failedBatchCount} batch(es) failed to update for ${filename}.`)
}
logVerbose(isVerbose, `Docs ${imageProgress}: ${filename} refs updated.`)
} else {
logVerbose(
isVerbose,
`Docs ${imageProgress}: No documents needed updating for ${filename}.`,
)
}
}
// Copy metadata to aspect.
// use --include-aspects to enable.
// Requires a deployed aspect named 'metadata' that matches the aspect shape.
// Edit the shape in the patch below to match the desired shape in Media Library.
if (isDryRun && includeAspects) {
logVerbose(isVerbose, 'DRY RUN: Would update aspect with metadata.')
} else if (includeAspects) {
logVerbose(isVerbose, `Updating aspect data for ${imageProgress} ${filename}...`)
// Assert non-null as we checked above
const {assetId, assetInstanceId} = uploadResult!
// We have to build the ID to match the key format found in assets.json
const idParts = assetInstanceId.split('-')
const assetDataId = `${idParts[0]}-${idParts[1]}`
const asset = parsedAssets[assetDataId]
// Pull the tags from the media plugin.
const assetTags = asset?.opt?.media?.tags?.map((tag: any) => tags.get(tag._ref))
const mutations = JSON.stringify({
mutations: [
{
patch: {
id: assetId,
// Create an empty aspects object if it doesn't exist.
setIfMissing: {aspects: {}},
set: {
'aspects.metadata': {
title: asset?.title || '',
description: asset?.description || '',
tags: assetTags || [],
creditLine: asset?.creditLine || '',
altText: asset?.altText || '',
originalFilename: asset?.originalFilename || '',
},
},
},
},
],
})
// Update the aspect by patching the asset in Media Library.
await aspectUpdateLimiter.enqueue(async () => {
const url = `https://api.sanity.io/v2024-06-24/media-libraries/${mediaLibraryId}/mutate`
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${sanityToken}`,
},
body: mutations,
})
if (!response.ok) {
const error = await response.json()
logVerbose(isVerbose, 'Failed to update aspect:', error)
return
}
logVerbose(isVerbose, `Successfully updated aspect for ${filename}`)
} catch (error) {
logVerbose(isVerbose, 'Error updating aspect with metadata:', error)
}
})
}
// Mark as successful
result.success = true
return result
} catch (error) {
result.error = error.message || String(error)
logVerbose(isVerbose, `Error processing ${filename}: ${result.error}`)
return result
}
}
// Process a single file through all steps
async function processFile(
filename: string,
index: number,
totalFilesInThisRun: number,
filesDir: string,
dataFilePath: string,
mediaLibraryId: string,
sanityToken: string,
projectId: string,
dataset: string,
documentUpdateLimiter: RateLimiter,
mediaLibraryLimiter: RateLimiter,
aspectUpdateLimiter: RateLimiter,
parsedDocuments: Document[],
parsedAssets: any,
tags: Map<string, string>,
uploadedAssetIds: Map<string, UploadResult>,
isDryRun: boolean,
isVerbose: boolean,
includeAspects: boolean,
): Promise<FileProcessingResult> {
const hash = filename.split('-')[0]
const result: FileProcessingResult = {
filename,
hash,
success: false,
}
try {
// Only log detailed progress in verbose mode
const fileProgress = `(${index + 1}/${totalFilesInThisRun})`
logVerbose(isVerbose, `\nProcessing ${filename} ${fileProgress}`)
// Upload step
// Uploads the file to the media library,
// adds the actualAssetId to a map of asset IDs.
const filepath = path.resolve(filesDir, filename)
if (isDryRun) {
logVerbose(
isVerbose,
`DRY RUN ${fileProgress}: Would upload ${filename} with auto-generated title`,
)
} else {
logVerbose(isVerbose, `Uploading ${fileProgress} ${filename}...`)
const actualAssetId = await mediaLibraryLimiter.enqueue(() =>
uploadAsset(mediaLibraryId, sanityToken, filepath, isVerbose),
)
uploadedAssetIds.set(hash, actualAssetId)
logVerbose(isVerbose, `Upload ${fileProgress}: ${filename} uploaded successfully.`)
}
// Link step: Take the assetId and asset instance ID from the upload step,
// and link them to the source dataset.
// Pull the asset details from the upload map.
const uploadResult = uploadedAssetIds.get(hash)
if (!isDryRun && !uploadResult) {
throw new Error(
`Cannot link, upload result for hash ${hash} not found (upload likely failed).`,
)
}
if (isDryRun) {
const dryAssetId = uploadResult?.assetId || `mock-asset-id-${hash}`
logVerbose(
isVerbose,
`DRY RUN ${fileProgress}: Would link ${filename} (using Asset ID: ${dryAssetId})`,
)
} else {
const {assetId, assetInstanceId} = uploadResult!
logVerbose(isVerbose, `Linking ${fileProgress} ${filename}...`)
await mediaLibraryLimiter.enqueue(() =>
linkMediaLibraryAsset(
sanityToken,
projectId,
dataset,
mediaLibraryId,
assetInstanceId,
assetId,
isVerbose,
),
)
logVerbose(isVerbose, `Link ${fileProgress}: ${filename} linked.`)
}
// Documents step: Find all documents that contain the file.
// This is done by searching through the parsed documents from the data file.
// The field path is used to patch the document.
const matchingDocs: {docId: string; fieldPath: string[]}[] = []
logVerbose(isVerbose, `Docs ${fileProgress}: Reading references for ${filename}...`)
try {
for (const doc of parsedDocuments) {
const refs = await findFileReferences(doc)
const matches = refs.filter((ref) => ref.path.startsWith(hash))
if (matches.length > 0) {
for (const match of matches) {
matchingDocs.push({docId: doc._id, fieldPath: match.fieldPath})
}
}
}
} catch (readError) {
throw new Error(`Failed reading/parsing ${dataFilePath}: ${readError.message}`)
}
logVerbose(isVerbose, `Found ${matchingDocs.length} document references for ${filename}.`)
if (isDryRun) {
logVerbose(
isVerbose,
`DRY RUN ${fileProgress}: Would update ${matchingDocs.length} documents for ${filename}`,
)
} else {
const uploadResult = uploadedAssetIds.get(hash)
if (!uploadResult) {
throw new Error(`Cannot update docs, asset ID for hash ${hash} not found.`)
}
const {assetId} = uploadResult
if (matchingDocs.length > 0) {
logVerbose(
isVerbose,
`Docs ${fileProgress}: Updating ${matchingDocs.length} refs for ${filename}...`,
)
const docBatchSize = 10
let failedBatchCount = 0
// Patch documents in batches with the GDR.
for (let i = 0; i < matchingDocs.length; i += docBatchSize) {
const batch = matchingDocs.slice(i, i + docBatchSize)
const mutations = batch.map((doc) => ({
patch: {
id: doc.docId,
set: {
[`${buildFieldPath(doc.fieldPath)}.media`]: {
_type: 'globalDocumentReference',
_ref: `media-library:${mediaLibraryId}:${assetId}`,
_weak: true,
},
},
},
}))
const url = `https://${projectId}.api.sanity.io/vX/data/mutate/${dataset}`
await documentUpdateLimiter.enqueue(async () => {
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${sanityToken}`,
},
body: JSON.stringify({mutations}),
})
if (!response.ok) {
const error = await response.json()
logVerbose(isVerbose, 'Failed to update document batch:', error)
failedBatchCount++
return
}
logVerbose(
isVerbose,
`Successfully updated batch of ${batch.length} documents for ${filename}`,
)
} catch (fetchError) {
logVerbose(isVerbose, 'Error during document update fetch:', fetchError)
failedBatchCount++
}
})
}
if (failedBatchCount > 0) {
throw new Error(`${failedBatchCount} batch(es) failed to update for ${filename}.`)
}
logVerbose(isVerbose, `Docs ${fileProgress}: ${filename} refs updated.`)
} else {
logVerbose(
isVerbose,
`Docs ${fileProgress}: No documents needed updating for ${filename}.`,
)
}
}
// Copy metadata to aspect.
// use --include-aspects to enable.
// Requires a deployed aspect named 'metadata' that matches the aspect shape.
// Edit the shape in the patch below to match the desired shape in Media Library.
if (isDryRun && includeAspects) {
logVerbose(isVerbose, 'DRY RUN: Would update aspect with metadata.')
} else if (includeAspects) {
logVerbose(isVerbose, `Updating aspect data for ${fileProgress} ${filename}...`)
// Assert non-null as we checked above
const {assetId, assetInstanceId} = uploadResult!
// We have to build the ID to match the key format found in assets.json
const idParts = assetInstanceId.split('-')
const assetDataId = `${idParts[0]}-${idParts[1]}`
const asset = parsedAssets[assetDataId]
// Pull the tags from the media plugin.
const assetTags = asset?.opt?.media?.tags?.map((tag: any) => tags.get(tag._ref))
const mutations = JSON.stringify({
mutations: [
{
patch: {
id: assetId,
// Create an empty aspects object if it doesn't exist.
setIfMissing: {aspects: {}},
set: {
'aspects.metadata': {
title: asset?.title || '',
description: asset?.description || '',
tags: assetTags || [],
creditLine: asset?.creditLine || '',
altText: asset?.altText || '',
originalFilename: asset?.originalFilename || '',
},
},
},
},
],
})
// Update the aspect by patching the asset in Media Library.
await aspectUpdateLimiter.enqueue(async () => {
const url = `https://api.sanity.io/v2024-06-24/media-libraries/${mediaLibraryId}/mutate`
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${sanityToken}`,
},
body: mutations,
})
if (!response.ok) {
const error = await response.json()
logVerbose(isVerbose, 'Failed to update aspect:', error)
return
}
logVerbose(isVerbose, `Successfully updated aspect for ${filename}`)
} catch (error) {
logVerbose(isVerbose, 'Error updating aspect with metadata:', error)
}
})
}
// Mark as successful
result.success = true
return result
} catch (error) {
result.error = error.message || String(error)
logVerbose(isVerbose, `Error processing ${filename}: ${result.error}`)
return result
}
}
// Rate limiter class to help avoid rate limiting errors.
class RateLimiter {
private queue: (() => Promise<any>)[] = []
private processing = false
private lastRequestTime = 0
private activeRequests = 0
constructor(
private requestsPerSecond: number,
private maxConcurrent: number = 25,
) {}
async enqueue<T>(fn: () => Promise<T>): Promise<T> {
return new Promise((resolve, reject) => {
this.queue.push(async () => {
try {
this.activeRequests++
const result = await fn()
this.activeRequests--
resolve(result)
} catch (error) {
this.activeRequests--
reject(error)
}
})
this.processQueue()
})
}
private async processQueue() {
if (this.processing || this.queue.length === 0) return
this.processing = true
while (this.queue.length > 0) {
const now = Date.now()
const timeSinceLastRequest = now - this.lastRequestTime
const minTimeBetweenRequests = 1000 / this.requestsPerSecond
// Wait if we've hit the concurrent request limit
if (this.activeRequests >= this.maxConcurrent) {
await new Promise((resolve) => setTimeout(resolve, 100))
continue
}
// Wait if we need to respect the rate limit
if (timeSinceLastRequest < minTimeBetweenRequests) {
await new Promise((resolve) =>
setTimeout(resolve, minTimeBetweenRequests - timeSinceLastRequest),
)
}
const batch = this.queue.splice(0, this.maxConcurrent - this.activeRequests)
await Promise.all(batch.map((fn) => fn()))
this.lastRequestTime = Date.now()
}
this.processing = false
}
}
// Main function that can be imported or run from CLI
export async function migrateMedia(options: MigrateMediaOptions): Promise<void> {
const {
projectId,
dataset,
mediaLibraryId,
sanityToken,
imagesDir,
filesDir,
dataFilePath,
assetsFilePath,
isDryRun = false,
isVerbose = false,
testImageName = null,
includeAspects = false,
} = options
if (!sanityToken || sanityToken.trim() === '') {
throw new Error(
"Sanity auth token not provided. Please run 'sanity debug --secrets' to find it, then provide it using --token argument.",
)
}
if (!mediaLibraryId || mediaLibraryId.trim() === '') {
throw new Error(
'MEDIA_LIBRARY_ID is not set. Ensure it is defined in the environment variables.',
)
}
if (!projectId || projectId.trim() === '') {
throw new Error('Project ID not provided.')
}
if (!dataset || dataset.trim() === '') {
throw new Error('Dataset name not provided.')
}
if (!imagesDir || !fs.existsSync(imagesDir)) {
throw new Error(`Images directory not found: ${imagesDir}`)
}
logStatus(isVerbose, `Using images directory: ${imagesDir}`)
if (!filesDir || !fs.existsSync(filesDir)) {
throw new Error(`Files directory not found: ${filesDir}`)
}
logStatus(isVerbose, `Using files directory: ${filesDir}`)
// --- End Determine Images Directory ---
const mediaLibraryLimiter = new RateLimiter(25, 25)
const documentUpdateLimiter = new RateLimiter(25, 25)
const aspectUpdateLimiter = new RateLimiter(25, 25)
const uploadedAssetIds = new Map<string, UploadResult>()
let allImageFiles: string[]
let allFileFiles: string[]
const parsedDocuments: Document[] = []
let parsedAssets: any
const tags = new Map<string, string>() // store tags from media plugin
// load ndjson data file
try {
const dataFilePath = process.env.DATA_FILE_PATH || 'data.ndjson'
const fileStream = fs.createReadStream(dataFilePath)
const parsedData = fileStream.pipe(ndjson.parse())
for await (const doc of parsedData) {
parsedDocuments.push(doc)
}
} catch (error) {
console.error(`Error reading data file: ${error}`)
throw error
}
// load assets data file
try {
parsedAssets = JSON.parse(fs.readFileSync(assetsFilePath, 'utf8'))
} catch (error) {
console.error(`Error reading assets file: ${error}`)
throw error
}
// parse tags from media plugin
try {
for await (const doc of parsedDocuments) {
if (doc._type === 'media.tag') {
tags.set(doc._id, doc.name.current)
}
}
} catch (error) {
console.error(`Error reading tags from data file: ${error}`)
throw error
}
// read images directory
try {
allImageFiles = fs.readdirSync(imagesDir).filter((filename) => !filename.startsWith('.'))
if (allImageFiles.length === 0) {
throw new Error(`No non-hidden images found in directory: ${imagesDir}`)
}
// Use logStatus
logStatus(
isVerbose,
`Found ${allImageFiles.length} total images in directory (excluding hidden).`,
)
} catch (error) {
console.error(`Error reading images directory: ${error}`) // Keep console.error for errors
throw error
}
// read files directory
try {
allFileFiles = fs.readdirSync(filesDir).filter((filename) => !filename.startsWith('.'))
if (allFileFiles.length === 0) {
throw new Error(`No non-hidden files found in directory: ${filesDir}`)
}
// Use logStatus
logStatus(
isVerbose,
`Found ${allFileFiles.length} total files in directory (excluding hidden).`,
)
} catch (error) {
console.error(`Error reading files directory: ${error}`) // Keep console.error for errors
throw error
}
let imageFilesToProcess: string[]
let fileFilesToProcess: string[]
if (testImageName) {
const testImage = allImageFiles.find((file) => file === testImageName)
if (!testImage) {
console.error(`Error: Image "${testImageName}" not found in images directory`)
throw new Error(`Test image "${testImageName}" not found`)
}
imageFilesToProcess = [testImage]
fileFilesToProcess = [] // Skip files when testing specific image
logStatus(isVerbose, `--test-image specified. Processing only: ${testImageName}`)
} else {
imageFilesToProcess = allImageFiles
fileFilesToProcess = allFileFiles
if (isDryRun && !isVerbose) {
const limit = 5
if (imageFilesToProcess.length > limit) {
logStatus(
isVerbose,
`\nLimiting dry run to the first ${limit} *unprocessed* image files found...`,
)
imageFilesToProcess = imageFilesToProcess.slice(0, limit)
}
if (fileFilesToProcess.length > limit) {
logStatus(
isVerbose,
`\nLimiting dry run to the first ${limit} *unprocessed* files found...`,
)
fileFilesToProcess = fileFilesToProcess.slice(0, limit)
}
}
}
const totalImagesInThisRun = imageFilesToProcess.length
const totalFilesInThisRun = fileFilesToProcess.length
let overallProcessedCount = 0
let overallErrorCount = 0
logStatus(isVerbose, `Starting migration process for ${totalImagesInThisRun} image(s) and ${totalFilesInThisRun} file(s)...`)
const BATCH_SIZE = 20
// Process images in batches
const totalBatches = Math.ceil(imageFilesToProcess.length / BATCH_SIZE)
for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) {
const batchStart = batchIndex * BATCH_SIZE
const batchEnd = Math.min(batchStart + BATCH_SIZE, imageFilesToProcess.length)
const currentBatch = imageFilesToProcess.slice(batchStart, batchEnd)
const batchNumber = batchIndex + 1
try {
// Process all images in this batch in parallel
const batchPromises = currentBatch.map((filename, index) =>
processImage(
filename,
batchStart + index,
totalImagesInThisRun,
imagesDir,
dataFilePath,
mediaLibraryId,
sanityToken,
projectId,
dataset,
documentUpdateLimiter,
mediaLibraryLimiter,
aspectUpdateLimiter,
parsedDocuments,
parsedAssets,
tags,
uploadedAssetIds,
isDryRun,
isVerbose,
includeAspects,
),
)
// Wait for all images in the batch to complete
const results = await Promise.all(batchPromises)
const successfulResults = results.filter((r) => r.success)
overallProcessedCount += successfulResults.length
overallErrorCount += results.length - successfulResults.length
// Report batch results
const batchMessage = isDryRun
? `Batch ${batchNumber}/${totalBatches} simulated: ${successfulResults.length}/${currentBatch.length} processed`
: `Batch ${batchNumber}/${totalBatches} completed: ${successfulResults.length}/${currentBatch.length} processed`
// Always show a message about the batch completion, even in verbose mode
if (isVerbose) {
logStatus(isVerbose, batchMessage)
}
if (!isVerbose) {
// Show any errors in non-verbose mode (they're already shown in verbose mode)
const failedResults = results.filter((r) => !r.success)
if (failedResults.length > 0) {
logStatus(isVerbose, ` ${failedResults.length} images failed in this batch:`)
failedResults.forEach((result) => {
logStatus(isVerbose, ` - ${result.filename}: ${result.error}`)
})
}
}
// Small delay between batches to allow rate limiting to catch up
if (batchIndex < totalBatches - 1) {
await sleep(500) // Half second between batches
}
} catch (batchError) {
console.error(`Error processing batch ${batchNumber}:`, batchError)
}
}
// Process files in batches
const totalFileBatches = Math.ceil(fileFilesToProcess.length / BATCH_SIZE)
for (let batchIndex = 0; batchIndex < totalFileBatches; batchIndex++) {
const batchStart = batchIndex * BATCH_SIZE
const batchEnd = Math.min(batchStart + BATCH_SIZE, fileFilesToProcess.length)
const currentBatch = fileFilesToProcess.slice(batchStart, batchEnd)
const batchNumber = batchIndex + 1
try {
// Process all files in this batch in parallel
const batchPromises = currentBatch.map((filename, index) =>
processFile(
filename,
batchStart + index,
totalFilesInThisRun,
filesDir,
dataFilePath,
mediaLibraryId,
sanityToken,
projectId,
dataset,
documentUpdateLimiter,
mediaLibraryLimiter,
aspectUpdateLimiter,
parsedDocuments,
parsedAssets,
tags,
uploadedAssetIds,
isDryRun,
isVerbose,
includeAspects,
),
)
// Wait for all files in the batch to complete
const results = await Promise.all(batchPromises)
const successfulResults = results.filter((r) => r.success)
overallProcessedCount += successfulResults.length
overallErrorCount += results.length - successfulResults.length
// Report batch results
const batchMessage = isDryRun
? `File Batch ${batchNumber}/${totalFileBatches} simulated: ${successfulResults.length}/${currentBatch.length} processed`
: `File Batch ${batchNumber}/${totalFileBatches} completed: ${successfulResults.length}/${currentBatch.length} processed`
// Always show a message about the batch completion, even in verbose mode
if (isVerbose) {
logStatus(isVerbose, batchMessage)
}
if (!isVerbose) {
// Show any errors in non-verbose mode (they're already shown in verbose mode)
const failedResults = results.filter((r) => !r.success)
if (failedResults.length > 0) {
logStatus(isVerbose, ` ${failedResults.length} files failed in this batch:`)
failedResults.forEach((result) => {
logStatus(isVerbose, ` - ${result.filename}: ${result.error}`)
})
}
}
// Small delay between batches to allow rate limiting to catch up
if (batchIndex < totalFileBatches - 1) {
await sleep(500) // Half second between batches
}
} catch (batchError) {
console.error(`Error processing file batch ${batchNumber}:`, batchError)
}
}
// --- Final Summary ---
logStatus(isVerbose, '\n--- Migration Summary ---')
logStatus(
isVerbose,
`Total images found in directory (excluding hidden): ${allImageFiles.length}`,
)
logStatus(
isVerbose,
`Total files found in directory (excluding hidden): ${allFileFiles.length}`,
)
logStatus(isVerbose, `Images attempted this run: ${totalImagesInThisRun}`)
logStatus(isVerbose, `Files attempted this run: ${totalFilesInThisRun}`)
logStatus(isVerbose, `Successfully processed assets this run: ${overallProcessedCount}`)
if (overallErrorCount > 0) {
logStatus(isVerbose, `Assets with errors during this run: ${overallErrorCount}`)
}
if (!isDryRun && overallProcessedCount > 0) {
logStatus(isVerbose, `Successfully processed ${overallProcessedCount} asset(s).`)
}
logStatus(isVerbose, 'Processing complete!')
}
// CLI entry point when script is run directly
if (require.main === module) {
const args = parseCliArgs()
if (args.isDryRun) {
logStatus(args.isVerbose, '\n=== DRY RUN MODE ===')
logStatus(args.isVerbose, 'No actual changes will be made to the media library or documents')
logStatus(args.isVerbose, 'This will simulate the entire process and show what would be done\n')
}
if (args.testImageName) {
logStatus(args.isVerbose, `\n=== TESTING SINGLE IMAGE: ${args.testImageName} ===`)
logStatus(args.isVerbose, 'This will process only the specified image through all steps\n')
}
// Read from environment variables when run as CLI
const sanityToken = process.env.SANITY_TOKEN || ''
const projectId = process.env.SANITY_PROJECT_ID || ''
const dataset = process.env.SANITY_SOURCE_DATASET || ''
const mediaLibraryId = process.env.SANITY_MEDIA_LIBRARY_ID || ''
let imagesDir = process.env.IMAGES_DIR ? path.resolve(process.env.IMAGES_DIR) : ''
let filesDir = process.env.FILES_DIR ? path.resolve(process.env.FILES_DIR) : ''
// Fallback for imagesDir
if (!imagesDir) {
logStatus(
args.isVerbose,
'Warning: IMAGES_DIR environment variable not set. Falling back to relative path.',
)
imagesDir = path.resolve(__dirname, 'export', 'images')
}
// Fallback for filesDir
if (!filesDir) {
logStatus(
args.isVerbose,
'Warning: FILES_DIR environment variable not set. Falling back to relative path.',
)
filesDir = path.resolve(__dirname, 'export', 'files')
}
// Get the data file path
let dataFilePath = ''
if (process.env.DATA_FILE_PATH) {
dataFilePath = path.resolve(process.env.DATA_FILE_PATH)
} else {
dataFilePath = path.resolve(__dirname, 'export', 'data.ndjson')
}
let assetsFilePath = ''
if (process.env.ASSETS_FILE_PATH) {
assetsFilePath = path.resolve(process.env.ASSETS_FILE_PATH)
} else {
assetsFilePath = path.resolve(__dirname, 'export', 'assets.ndjson')
}
migrateMedia({
projectId,
dataset,
mediaLibraryId,
sanityToken,
imagesDir,
filesDir,
dataFilePath,
assetsFilePath,
isDryRun: args.isDryRun,
isVerbose: args.isVerbose,
testImageName: args.testImageName,
includeAspects: args.includeAspects,
}).catch((error) => {
logStatus(args.isVerbose, '\n--- UNHANDLED SCRIPT ERROR ---')
console.error('Error:', error.message || error)
process.exit(1)
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment