Skip to content

Instantly share code, notes, and snippets.

@flodolo
Created March 16, 2015 15:57
Show Gist options
  • Save flodolo/ae0b111711ab09ad1229 to your computer and use it in GitHub Desktop.
Save flodolo/ae0b111711ab09ad1229 to your computer and use it in GitHub Desktop.
Clean XLIFF (php test)
#!/usr/bin/env php
<?php
$command_help = "clean_xliff - Clean localized XLIFF file\n" .
"Usage: clean_xliff [reference_file] [localized file]\n\n" .
"Example: clean_xliff en-US/firefox-ios.xliff it/firefox-ios.xliff\n" .
"(remove obsolete strings from it/firefox-ios.xliff)\n";
$missing_parameter = "This command needs more parameters, please check mark_active --help.\n";
if (php_sapi_name() != 'cli') {
die('This command can only be used in CLI mode.');
}
if (isset($argv[1])) {
if (in_array($argv[1], ['-h', '--help']) && isset($command_help)) {
die($command_help);
}
}
if (count($argv) < 3) {
die($missing_parameter);
}
$ref_root = simplexml_load_file($argv[1]);
$ref_root->registerXPathNamespace('x', $ref_root->getNamespaces()['']);
$l10n_root = simplexml_load_file($argv[2]);
$l10n_root->registerXPathNamespace('x', $l10n_root->getNamespaces()['']);
$file_changed = false;
// Check file elements
$ref_files = [];
$ref_strings = [];
if (count($ref_root)) {
$files = $ref_root->xpath('//x:file');
foreach ($files as $file_element) {
// Store filename
$filename = (string) $file_element['original'];
$ref_files[] = $filename;
// Store strings. Multidimensional array, because the same ID can be
// in multiple files.
$ref_strings[$filename] = [];
foreach ($file_element->body->{'trans-unit'} as $trans_unit) {
$ref_strings[$filename][] = (string) $trans_unit['id'];
}
}
}
$l10n_files = [];
$l10n_strings = [];
if (count($l10n_root)) {
$files = $l10n_root->xpath('//x:file');
foreach ($files as $file_element) {
$filename = (string) $file_element['original'];
$l10n_files[] = $filename;
if (! in_array($filename, $ref_files)) {
echo "Removing obsolete <file> element ({$filename})\n";
unset($file_element[0]);
$file_changed = true;
} else {
// File is not obsolete, check strings inside files
$l10n_strings[$filename] = [];
// I use a clone, can't unset nodes directly without breaking the iterator
$iterator_strings = clone $file_element->body->{'trans-unit'};
foreach ($iterator_strings as $trans_unit) {
$string_id = (string) $trans_unit['id'];
$l10n_strings[$filename][] = $string_id;
if (! in_array($string_id, $ref_strings[$filename])) {
echo "Removing obsolete string from {$filename}. String ID: {$string_id}.\n";
$f = $l10n_root->xpath("//x:file[@original='{$filename}']/x:body/x:trans-unit[@id='{$string_id}']");
unset($f[0][0]);
$file_changed = true;
}
}
}
}
}
if ($file_changed) {
// Saving back the XML file, removing empty lines
$new_output = preg_split('/$\R?^/m', $l10n_root->asXml());
foreach ($new_output as $key => $line) {
if (trim($line) == '') {
unset($new_output[$key]);
}
}
$new_output = implode($new_output, "\n");
file_put_contents($argv[2], $new_output);
} else {
echo "\nNo obsolete strings found.\n";
}
// Warn about missing files or strings
$missing_files = array_diff($ref_files, $l10n_files);
if (count($missing_files) > 0) {
echo "\nMissing <file> elements in localized file:\n";
foreach ($missing_files as $filename) {
echo "* {$filename}\n";
}
}
foreach ($ref_strings as $filename => $strings) {
$missing_strings = array_diff($strings, $l10n_strings[$filename]);
if (count($missing_strings) > 0) {
echo "\nMissing strings in localized file {$filename}:\n";
foreach ($missing_strings as $filename) {
echo "* {$filename}\n";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment