Created
December 16, 2024 19:37
-
-
Save joncardasis/67b1aa7c6dbfe10ad2e731d2153d651c to your computer and use it in GitHub Desktop.
Pseudolocalization inspired by Netflix (https://netflixtechblog.com/pseudo-localization-netflix-12fff76fbcbe)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {pseudoLocalizeString} from 'utils/pseudoLocalize' | |
describe('pseudoLocalizeString', () => { | |
it.each([ | |
['Hello, world!', 'Ħḗḗḗŀŀǿǿǿ, ẇǿǿřŀḓ!'], | |
['Get help online.', 'Ɠḗḗḗŧ ħḗḗŀƥ ǿǿƞŀīīƞḗḗ.'], | |
[ | |
'The quick brown fox jumps over the lazy dog', | |
'Ŧħḗḗḗ ɋŭŭŭīīīƈķ ƀřǿǿǿẇƞ ƒǿǿǿẋ ĵŭŭḿƥş ǿǿṽḗḗř ŧħḗḗ ŀȧȧẑẏ ḓǿǿɠ', | |
], | |
])( | |
'should pseudo-localize strings with +35% longer text', | |
(input, expected) => { | |
const localizedString = pseudoLocalizeString(input) | |
expect(localizedString).toBe(expected) | |
expect(localizedString.length).toBeGreaterThanOrEqual(input.length * 1.35) | |
} | |
) | |
}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable id-length */ | |
const ACCENTED_MAP = { | |
a: 'ȧ', | |
A: 'Ȧ', | |
b: 'ƀ', | |
B: 'Ɓ', | |
c: 'ƈ', | |
C: 'Ƈ', | |
d: 'ḓ', | |
D: 'Ḓ', | |
e: 'ḗ', | |
E: 'Ḗ', | |
f: 'ƒ', | |
F: 'Ƒ', | |
g: 'ɠ', | |
G: 'Ɠ', | |
h: 'ħ', | |
H: 'Ħ', | |
i: 'ī', | |
I: 'Ī', | |
j: 'ĵ', | |
J: 'Ĵ', | |
k: 'ķ', | |
K: 'Ķ', | |
l: 'ŀ', | |
L: 'Ŀ', | |
m: 'ḿ', | |
M: 'Ḿ', | |
n: 'ƞ', | |
N: 'Ƞ', | |
o: 'ǿ', | |
O: 'Ǿ', | |
p: 'ƥ', | |
P: 'Ƥ', | |
q: 'ɋ', | |
Q: 'Ɋ', | |
r: 'ř', | |
R: 'Ř', | |
s: 'ş', | |
S: 'Ş', | |
t: 'ŧ', | |
T: 'Ŧ', | |
v: 'ṽ', | |
V: 'Ṽ', | |
u: 'ŭ', | |
U: 'Ŭ', | |
w: 'ẇ', | |
W: 'Ẇ', | |
x: 'ẋ', | |
X: 'Ẋ', | |
y: 'ẏ', | |
Y: 'Ẏ', | |
z: 'ẑ', | |
Z: 'Ẑ', | |
} | |
// Inspired by pseudo-localization from Netflix (https://netflixtechblog.com/pseudo-localization-netflix-12fff76fbcbe) | |
export const pseudoLocalizeString = (string: string): string => { | |
let pseudoLocalizedText = string | |
.split('') | |
.map(char => ACCENTED_MAP[char as keyof typeof ACCENTED_MAP] || char) | |
.join('') | |
// Emulate 35% longer text by duplicating vowels | |
const targetLength = Math.ceil(string.length * 1.35) | |
const vowelMatches = [...string.toLowerCase().matchAll(/[aeiou]/gi)] | |
const distribution = distribute( | |
vowelMatches.length, | |
targetLength - string.length | |
) | |
let offset = 0 | |
for (let i = 0; i < vowelMatches.length; i++) { | |
const match = vowelMatches[i] | |
const vowelInstances = distribution[i] | |
const pos = match.index + offset | |
const vowel = pseudoLocalizedText[pos] | |
// Insert additional vowel instances | |
pseudoLocalizedText = | |
pseudoLocalizedText.slice(0, pos) + | |
vowel.repeat(vowelInstances + 1) + | |
pseudoLocalizedText.slice(pos + 1) | |
offset += vowelInstances | |
} | |
return pseudoLocalizedText | |
} | |
// Create a front-loaded distribution. E.g, length of 5 and spend of 12 = [3, 3, 2, 2, 2] | |
function distribute(length: number, spend: number): number[] { | |
const base = Math.floor(spend / length) | |
const remainder = spend % length | |
return Array(length) | |
.fill(base) | |
.map((n, i) => (i < remainder ? n + 1 : n)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment