Last active
April 21, 2021 23:01
-
-
Save bjoerge/6c0b5bdcd316fb3631f48bdaefaf1456 to your computer and use it in GitHub Desktop.
Translate all locale strings with missing translations in a Sanity dataset - https://sanity.io
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable no-console */ | |
import client from 'part:@sanity/base/client' | |
import reduce from 'json-reduce' | |
// see https://github.com/sanity-io/sanity/blob/next/packages/example-studio/schemas/languages.js for an example | |
import {SUPPORTED_LANGUAGES} from '../schemas/languages' | |
import Translate from '@google-cloud/translate' | |
const translateAPI = new Translate({ | |
projectId: '<your google cloud project id>', | |
key: '<your google cloud api key>' | |
}) | |
// This example shows how you may write a migration script that recursively collects all values of a certain type. | |
// In this case it collects all localeStrings that has missing translations, calls the Google Translate API and | |
// patches these localeStrings. | |
// NOTE: We're using the export api, which fetches and traverses *all* documents in the dataset. This may go well | |
// if the dataset contains a relatively limited number of documents, but if the dataset is large, some kind of | |
// streaming with backpressure should be applied | |
// This script will exit if any of the mutations fail due to a revision mismatch (which means the | |
// document was edited between fetch => update) (See the ifRevisionID key in each patch) | |
// Fetch all documents in the dataset | |
const fetchDocuments = () => | |
client | |
.request({ | |
url: `/data/export/${client.config().dataset}` | |
}) | |
.then(res => | |
res | |
.trim() | |
.split('\n') | |
.map(line => JSON.parse(line)) | |
) | |
const serializePath = path => { | |
return path.reduce((target, part, i) => { | |
const isIndex = typeof part === 'number' | |
const separator = i === 0 ? '' : '.' | |
const add = isIndex ? `[${part}]` : `${separator}${part}` | |
return `${target}${add}` | |
}, '') | |
} | |
// For a given document, collect a list of all locale strings, and their keypath | |
const collectLocaleStringNodes = document => | |
reduce( | |
document, | |
(prev, value, keyPath) => | |
value._type === 'localeString' | |
? [ | |
...prev, | |
{ | |
path: serializePath(keyPath), | |
current: value | |
} | |
] | |
: prev, | |
[] | |
) | |
const hasMissingTranslations = localeString => | |
SUPPORTED_LANGUAGES.some(lang => lang.googleTranslateCode && !(lang.id in localeString)) | |
const hasPossibleSourceLang = localeString => | |
SUPPORTED_LANGUAGES.some(lang => lang.googleTranslateCode && lang.id in localeString) | |
const isFixable = localeString => | |
hasMissingTranslations(localeString) && hasPossibleSourceLang(localeString) | |
const translateLang = async (text, targetLang) => { | |
return (await translateAPI.translate(text, targetLang.googleTranslateCode))[0] | |
} | |
const translateLocaleString = async localeString => { | |
const missingLangs = SUPPORTED_LANGUAGES.filter( | |
lang => lang.googleTranslateCode && !(lang.id in localeString) | |
) | |
const sourceLang = SUPPORTED_LANGUAGES.find(lang => (localeString[lang.id] || '').trim()) | |
if (!sourceLang) { | |
// no source to translate from | |
return localeString | |
} | |
const sourceText = localeString[sourceLang.id] | |
const translated = await Promise.all( | |
missingLangs.map(async lang => { | |
try { | |
return {[lang.id]: await translateLang(sourceText, lang)} | |
} catch (err) { | |
console.log(`Could not translate "${sourceText}" to ${lang.title}`) | |
} | |
return {} | |
}) | |
) | |
return Object.assign({}, localeString, ...translated) | |
} | |
const translateAll = fixableDocs => { | |
return Promise.all( | |
fixableDocs.map(async fixable => ({ | |
...fixable, | |
localeStringNodes: await Promise.all( | |
fixable.localeStringNodes.map(async node => ({ | |
...node, | |
translated: await translateLocaleString(node.current) | |
})) | |
) | |
})) | |
) | |
} | |
const batch = (array, batchSize) => { | |
if (array.length === 0) { | |
return array | |
} | |
if (array.length <= batchSize) { | |
return [array] | |
} | |
return [array.slice(0, batchSize), ...batch(array.slice(batchSize), batchSize)] | |
} | |
const createSetPatches = localeStrings => | |
localeStrings.reduce( | |
(patches, node) => ({ | |
...patches, | |
[node.path]: node.translated | |
}), | |
{} | |
) | |
const createPatch = doc => ({ | |
id: doc._id, | |
ifRevisionID: doc._rev, | |
set: createSetPatches(doc.localeStringNodes) | |
}) | |
const createTransaction = patches => | |
patches.reduce((tx, patch) => { | |
const {id, ...rest} = patch | |
return tx.patch(id, rest) | |
}, client.transaction()) | |
const logTodo = fixable => { | |
const stringsCount = fixable.reduce( | |
(count, fixableItem) => count + fixableItem.localeStringNodes.length, | |
0 | |
) | |
console.log(`Fixing ${stringsCount} localeString field(s) in ${fixable.length} document(s)`) | |
} | |
const logResults = results => { | |
const count = results.reduce((total, res) => total + res.documentIds.length, 0) | |
console.log(`Translated localeStrings in ${count} document(s) (${results.length} transaction(s))`) | |
} | |
const runMigration = async () => { | |
const fixableDocs = (await fetchDocuments()) | |
.map(document => ({ | |
// for each document we collect all locale strings recursively together with | |
// their keypath (i.e. the location in the document) | |
_id: document._id, | |
_rev: document._rev, | |
localeStringNodes: collectLocaleStringNodes(document) | |
})) | |
.filter(document => document.localeStringNodes.some(node => isFixable(node.current))) | |
// Now fixableDocs is an array of | |
// {id: string, localeStringsNodes: [{path: string, current: LocaleString}]} | |
logTodo(fixableDocs) | |
// Translate all fixable docs | |
const fixedDocs = await translateAll(fixableDocs) | |
// Now fixedDocs is an array of | |
// {id: string, localeStringsNodes: [{path: string, current: LocaleString, translated: LocaleString}]} | |
// Convert to Sanity patches | |
const patches = fixedDocs.reduce((acc, fixedDoc) => acc.concat(createPatch(fixedDoc)), []) | |
// Create transactions from patches in batches of n | |
const transactions = batch(patches, 10).map(createTransaction) | |
// Commit each batch in sequence | |
const results = await transactions.reduce( | |
async (prev, tx) => [...(await prev), await tx.commit()], | |
Promise.resolve([]) | |
) | |
logResults(results) | |
} | |
runMigration().catch(err => { | |
console.error(err) | |
process.exit(1) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment