Created
October 7, 2020 11:19
-
-
Save sebres/1a87b68b8d82071985c1359a2e31c999 to your computer and use it in GitHub Desktop.
poparse.tcl -- PO files (Portable Object) (GNU gettext format) parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#************************************************************************** | |
# parse | |
# Internally used proc to parse PO files (Portable Object) (GNU gettext | |
# format). | |
# Parameters: | |
# f_ -- | |
#************************************************************************** | |
proc parse { f_ } { | |
## TODO [SB]: file should be read partially, possible with other RE (non greedy ".*?" - could be slow by big files) : | |
set text_ [read $f_] | |
set dcat [dict create] | |
set st 0 | |
while { [regexp -start $st -indices {\nmsgid\M(.*?)\nmsgstr\M(.*?)\n(?:\n|$)} $text_ wh msgid msgstr] } { | |
set st [lindex $wh 1] | |
set msgid [string range $text_ [lindex $msgid 0] [lindex $msgid 1]] | |
set msgstr [string range $text_ [lindex $msgstr 0] [lindex $msgstr 1]] | |
set msgid [split [string trim $msgid " \n"] "\n"] | |
set msgstr [split [string trim $msgstr " \n"] "\n"] | |
set id "" | |
set str "" | |
foreach line $msgid { | |
set line [string trim $line] | |
set t {} | |
regexp {"(.*)"} $line {} t | |
append id $t | |
} | |
foreach line $msgstr { | |
set line [string trim $line] | |
set t {} | |
regexp {"(.*)"} $line {} t | |
append str $t | |
} | |
## special case for escaped newline and quot : | |
set id [string map [list \\n \n \\n\\r \n\r \\\" \"] $id] | |
set str [string map [list \\n \n \\n\\r \n\r \\\" \"] $str] | |
## check group syntax : | |
if { [regexp {^\[([^\[\]]+)\]\.(.*)$} $id {} grp id] } { | |
## save memory by equal id/str messages : | |
if { $str == $id } { | |
set str $id | |
} | |
## add to group : | |
dict set dcat $grp $id $str | |
continue | |
} | |
## save memory by equal id/str messages : | |
if { $str == $id } { | |
set str $id | |
} | |
dict set dcat $id $str | |
} | |
## create one empty elm, cause dcat should be not empty - mark as already loaded : | |
dict set dcat {} {} | |
return $dcat | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment