Created
February 4, 2012 19:59
-
-
Save zaach/1739769 to your computer and use it in GitHub Desktop.
PO parser from http://jsgettext.berlios.de/lib/Gettext.js adapted for Node.js and modified to be more like po2json.pl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/* | |
PO parser from http://jsgettext.berlios.de/lib/Gettext.js | |
adapted for Node.js and modified to be more like po2json.pl | |
- Zach Carter <[email protected]> | |
*/ | |
/* | |
Pure Javascript implementation of Uniforum message translation. | |
Copyright (C) 2008 Joshua I. Miller <[email protected]>, all rights reserved | |
This program is free software; you can redistribute it and/or modify it | |
under the terms of the GNU Library General Public License as published | |
by the Free Software Foundation; either version 2, or (at your option) | |
any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Library General Public License for more details. | |
You should have received a copy of the GNU Library General Public | |
License along with this program; if not, write to the Free Software | |
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
USA. | |
*/ | |
var fs = require('fs'); | |
var path = require('path'); | |
var argv = process.argv; | |
var pretty = ~argv.indexOf('-p'); | |
function parse (file) { | |
fs.readFile(fs.realpathSync(file), 'utf8', function (err, data) { | |
if (err) throw err; | |
if (pretty) { | |
var result = {}; | |
result[path.basename(file, '.po')] = parse_po(data); | |
// perl JSON encoder uses three spaces (╯°□°)╯︵ ┻━┻ | |
console.log(JSON.stringify(result, null, ' ')); | |
} else { | |
console.log(JSON.stringify(parse_po(data))); | |
} | |
}); | |
} | |
var context_glue = "\004"; | |
var parse_po = function(data) { | |
var rv = {}; | |
var buffer = {}; | |
var lastbuffer = ""; | |
var errors = []; | |
var lines = data.split("\n"); | |
for (var i=0; i<lines.length; i++) { | |
// chomp | |
lines[i] = lines[i].replace(/(\n|\r)+$/, ''); | |
var match; | |
// Empty line / End of an entry. | |
if (/^$/.test(lines[i])) { | |
if (typeof(buffer['msgid']) != 'undefined') { | |
var msg_ctxt_id = (typeof(buffer['msgctxt']) != 'undefined' && | |
buffer['msgctxt'].length) ? | |
buffer['msgctxt']+context_glue+buffer['msgid'] : | |
buffer['msgid']; | |
var msgid_plural = (typeof(buffer['msgid_plural']) != 'undefined' && | |
buffer['msgid_plural'].length) ? | |
buffer['msgid_plural'] : | |
null; | |
// find msgstr_* translations and push them on | |
var trans = []; | |
for (var str in buffer) { | |
var match; | |
if (match = str.match(/^msgstr_(\d+)/)) | |
trans[parseInt(match[1])] = buffer[str]; | |
} | |
trans.unshift(msgid_plural); | |
// only add it if we've got a translation | |
// NOTE: this doesn't conform to msgfmt specs | |
if (trans.length > 1) rv[msg_ctxt_id] = trans; | |
buffer = {}; | |
lastbuffer = ""; | |
} | |
// comments | |
} else if (/^(#[^~]|#$)/.test(lines[i])) { | |
continue; | |
// msgctxt | |
} else if (match = lines[i].match(/^(?:#~ )?msgctxt\s+(.*)/)) { | |
lastbuffer = 'msgctxt'; | |
buffer[lastbuffer] = parse_po_dequote(match[1]); | |
// msgid | |
} else if (match = lines[i].match(/^(?:#~ )?msgid\s+(.*)/)) { | |
lastbuffer = 'msgid'; | |
buffer[lastbuffer] = parse_po_dequote(match[1]); | |
// msgid_plural | |
} else if (match = lines[i].match(/^(?:#~ )?msgid_plural\s+(.*)/)) { | |
lastbuffer = 'msgid_plural'; | |
buffer[lastbuffer] = parse_po_dequote(match[1]); | |
// msgstr | |
} else if (match = lines[i].match(/^(?:#~ )?msgstr\s+(.*)/)) { | |
lastbuffer = 'msgstr_0'; | |
buffer[lastbuffer] = parse_po_dequote(match[1]); | |
// msgstr[0] (treak like msgstr) | |
} else if (match = lines[i].match(/^(?:#~ )?msgstr\[0\]\s+(.*)/)) { | |
lastbuffer = 'msgstr_0'; | |
buffer[lastbuffer] = parse_po_dequote(match[1]); | |
// msgstr[n] | |
} else if (match = lines[i].match(/^(?:#~ )?msgstr\[(\d+)\]\s+(.*)/)) { | |
lastbuffer = 'msgstr_'+match[1]; | |
buffer[lastbuffer] = parse_po_dequote(match[2]); | |
// continued string | |
} else if (/^(?:#~ )?"/.test(lines[i])) { | |
buffer[lastbuffer] += parse_po_dequote(lines[i]); | |
// something strange | |
} else { | |
errors.push("Strange line ["+i+"] : "+lines[i]); | |
} | |
} | |
// handle the final entry | |
if (typeof(buffer['msgid']) != 'undefined') { | |
var msg_ctxt_id = (typeof(buffer['msgctxt']) != 'undefined' && | |
buffer['msgctxt'].length) ? | |
buffer['msgctxt']+context_glue+buffer['msgid'] : | |
buffer['msgid']; | |
var msgid_plural = (typeof(buffer['msgid_plural']) != 'undefined' && | |
buffer['msgid_plural'].length) ? | |
buffer['msgid_plural'] : | |
null; | |
// find msgstr_* translations and push them on | |
var trans = []; | |
for (var str in buffer) { | |
var match; | |
if (match = str.match(/^msgstr_(\d+)/)) | |
trans[parseInt(match[1])] = buffer[str]; | |
} | |
trans.unshift(msgid_plural); | |
// only add it if we've got a translation | |
// NOTE: this doesn't conform to msgfmt specs | |
if (trans.length > 1) rv[msg_ctxt_id] = trans; | |
buffer = {}; | |
lastbuffer = ""; | |
} | |
// parse out the header | |
if (rv[""] && rv[""][1]) { | |
var cur = {}; | |
var hlines = rv[""][1].split(/\\n/); | |
for (var i=0; i<hlines.length; i++) { | |
if (! hlines[i].length) continue; | |
var pos = hlines[i].indexOf(':', 0); | |
if (pos != -1) { | |
var key = hlines[i].substring(0, pos); | |
var val = hlines[i].substring(pos +1); | |
if (cur[key] && cur[key].length) { | |
errors.push("SKIPPING DUPLICATE HEADER LINE: "+hlines[i]); | |
} else if (/#-#-#-#-#/.test(key)) { | |
errors.push("SKIPPING ERROR MARKER IN HEADER: "+hlines[i]); | |
} else { | |
// remove begining spaces if any (the perl script keeps them) | |
//val = val.replace(/^\s+/, ''); | |
cur[key] = val; | |
} | |
} else { | |
errors.push("PROBLEM LINE IN HEADER: "+hlines[i]); | |
cur[hlines[i]] = ''; | |
} | |
} | |
// replace header string with assoc array | |
rv[""] = cur; | |
} else { | |
rv[""] = {}; | |
} | |
// TODO: XXX: if there are errors parsing, what do we want to do? | |
// GNU Gettext silently ignores errors. So will we. | |
// alert( "Errors parsing po file:\n" + errors.join("\n") ); | |
if (errors.length) console.warn(errors.join("\n")); | |
return rv; | |
}; | |
var parse_po_dequote = function(str) { | |
var match; | |
if (match = str.match(/^(?:#~ )?"(.*)"/)) { | |
str = match[1]; | |
} | |
str = str.replace(/\\"/g, '"'); | |
return str; | |
}; | |
if (argv.indexOf('-h') >= 0 || argv.length < 3) { | |
console.log(path.basename(argv[1])+" {-p} {file.po} > {outputfile.json}\n -p : do pretty-printing of json data\n"); | |
} else { | |
parse(argv[argv[2] === '-p' ? 3:2]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This was extremely helpful. Thanks... I forked it and added some code to let you use it in a node runtime environment. It's a bit rough... doesn't use pretty option, but here it is: https://gist.github.com/2846424