Created
February 1, 2023 00:44
-
-
Save qgustavor/3c9e14fdb80f14a5267f5a0477f01933 to your computer and use it in GitHub Desktop.
A customizable script that fixes common issues in anime subtitles such as wrong romanization, bad line breaking, basic time shifts and font scaling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { parseFlags } from 'https://deno.land/x/[email protected]/flags/mod.ts' | |
import parse from 'https://cdn.skypack.dev/pin/@qgustavor/[email protected]/mode=imports/optimized/@qgustavor/ass-parser.js' | |
import stringify from 'https://cdn.skypack.dev/pin/@qgustavor/[email protected]/mode=imports/optimized/@qgustavor/ass-stringify.js' | |
import JSON5 from 'https://deno.land/x/[email protected]/mod.ts' | |
const defaultConfig = { | |
targetDir: '..', | |
filenameReplacement: null, | |
handleLineBreaks: false, | |
shiftTimes: false, | |
fontScale: 1, | |
replacements: [] | |
} | |
const storedConfig = JSON5.parse(await Deno.readTextFile('config.json').catch(() => '{}')) | |
const { flags } = parseFlags(Deno.args, { | |
flags: [{ | |
name: 'targetDir', | |
aliases: ['target-dir'], | |
}, { | |
name: 'animeId', | |
aliases: ['anime-id'], | |
type: 'number' | |
}, { | |
name: 'handleLineBreaks', | |
aliases: ['handle-line-breaks'], | |
type: 'boolean' | |
}, { | |
name: 'shiftTimes', | |
aliases: ['shift-times'], | |
type: 'number' | |
}, { | |
name: 'fontScale', | |
aliases: ['font-scale'], | |
type: 'number', | |
default: 1 | |
}] | |
}) | |
const config = { | |
...defaultConfig, | |
...storedConfig, | |
...flags | |
} | |
let characterData | |
try { | |
characterData = JSON5.parse(await Deno.readTextFile('characters.json')) | |
} catch (e) { | |
if (e.code !== 'ENOENT') throw e | |
if (!config.animeId) config.animeId = Number(prompt('Enter MyAnimeList Anime ID:')) | |
if (!config.animeId) throw Error('Invalid or missing anime ID') | |
const response = await fetch(`https://api.jikan.moe/v4/anime/${config.animeId}/characters`) | |
characterData = await response.json() | |
await Deno.writeTextFile('characters.json', JSON.stringify(characterData)) | |
} | |
const names = characterData.data | |
.map(e => e.character.name.replace(', ', ' ')) | |
.filter(e => !e.match(/\(.*\)/)) | |
const isolatedNames = Array.from(new Set(names.join(' ').split(' '))) | |
const replacementMap = isolatedNames.map(name => { | |
const replacement = name | |
.replace(/ou/g, 'o') | |
.replace(/([aiueo])\1/g, '$1') | |
if (replacement === name) return null | |
const regex = new RegExp('(^|\\b|\\\\[Nnh])(' + replacement + ')(\\b|$)', 'gi') | |
return [regex, name, replacement] | |
}).filter(e => e) | |
const matcher = new RegExp(replacementMap.map(e => e[2]).join('|')) | |
const japaneseRegex = /^([kstnhmyrgzpbp]?[aiueo]|([sc]h|[knhmrgbp]y)[auo]|[sc]hi|tsu|[fz]u|wa|n|ji|d[aeo])+$/i | |
const invertedNames = names.map(e => { | |
const parts = e.split(' ') | |
if (parts.length !== 2 || !parts[0].match(japaneseRegex) || !parts[1].match(japaneseRegex)) { | |
return e | |
} | |
return parts.reverse().join(' ') | |
}) | |
const invertedMatcher = new RegExp(invertedNames.join('|'), 'g') | |
for await (const file of Deno.readDir('.')) { | |
if (!file.name.endsWith('.ass')) continue | |
const data = await Deno.readTextFile(file.name) | |
const parsed = parse(data, { comments: true }) | |
const events = parsed.find(e => e.section === 'Events').body | |
for (const event of events) { | |
if (event.key !== 'Dialogue') continue | |
let text = event.value.Text | |
if (text.match(matcher)) { | |
for (const [replacement, name] of replacementMap) { | |
text = text.replaceAll(replacement, (all, prefix, oldWord) => { | |
const allCaps = !oldWord.match(/[a-z]/) | |
return prefix + (allCaps ? name.toUpperCase() : name) | |
}) | |
} | |
} | |
text = text.replaceAll(invertedMatcher, e => { | |
const index = invertedNames.indexOf(e) | |
if (index === -1) { | |
console.log('Inverted name matching error with', e) | |
return e | |
} | |
return names[index] | |
}) | |
if (config.handleLineBreaks && !text.match(/\{.*\\(pos|move|clip).*\}/)) { | |
text = text | |
// Removes line breaks before single words | |
.replace(/\s*\\N\s*(\S+\s*$)/m, ' $1') | |
// Moves line breaks close to punctuation | |
.replace(/([,.?!]|-(?!\w))([^,.?!-]{1,4})\s*\\N\s*/m, '$1\\N$2 ') | |
// Removes repeated spaces and trim | |
.replace(/ +/g, ' ').trim() | |
if (text.replaceAll(/\{.*?\}/g, '').length < 45) { | |
text = text | |
// Removes line breaks from short lines | |
.replace(/([^?!.]*)\\N(.*?)$/, '$1 $2') | |
// Removes repeated spaces | |
.replace(/ +/g, ' ') | |
} | |
} | |
// Handle replacements | |
for (const [searchStr, replacement] of config.replacements) { | |
const search = parseRegex(searchStr) | |
text = text.replaceAll(search, replacement) | |
} | |
event.value.Text = text | |
// Shift timings | |
if (config.shiftTimes) { | |
event.value.Start = handleShift(event.value.Start, config.shiftTimes) | |
event.value.End = handleShift(event.value.End, config.shiftTimes) | |
} | |
} | |
// Handle font sizes | |
if (config.fontScale !== 1) { | |
parsed.find(e => e.section.includes('Styles')).body.forEach(style => { | |
if (style.key !== 'Style') return | |
style.value.Fontsize = Math.round(style.value.Fontsize * config.fontScale) | |
}) | |
} | |
const filename = config.filenameReplacement | |
? file.name.replaceAll(parseRegex(config.filenameReplacement[0]), config.filenameReplacement[1]) | |
: file.name | |
const targetDir = new URL(config.targetDir, import.meta.url) | |
const targetPath = new URL(filename, targetDir.href) | |
await Deno.writeTextFile(targetPath, stringify(parsed)) | |
} | |
function handleShift (time, delta) { | |
return new Date(1000 * Math.max(0, time.split(':').reduce((sum, e) => sum * 60 + Number(e), 0) + delta)).toISOString().slice(12, 22) | |
} | |
// Edited from [email protected] | |
function parseRegex (input) { | |
// Validate input | |
if (typeof input !== 'string') { | |
throw new Error('Invalid input. Input must be a string') | |
} | |
// Parse input | |
const m = input.match(/(\/?)(.+)\1([a-z]*)/i) | |
// Require valid flags | |
if (!m[3] || !/^(?!.*?(.).*?\1)[gmixXsuUAJ]+$/.test(m[3])) { | |
return input | |
} | |
// Create the regular expression | |
return new RegExp(m[2], m[3]) | |
} | |
/* | |
Example config.json: | |
{ | |
replacements: [ | |
// NO sempai | |
['/SEMPAI/g', 'SENPAI'], | |
['/([Ss])empai/g', '$1enpai'], | |
// Shin'ichi | |
['Shinichi', "Shin'ichi"], | |
// Missing spaces | |
['/(\\.\\.\\.|…|[.?!])([A-Za-z0-9])/g', '$1 $2'], | |
// Remove honorifics (mostly are wrong, add the correct ones using honorifics-fixer) | |
['/-(senpai|sama|kun|chan|sensei)(\\b|$)/g', ''] | |
] | |
} | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment