Created
September 2, 2013 23:23
-
-
Save seriyps/6418131 to your computer and use it in GitHub Desktop.
Gettext .mo format parser for Erlang
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%% @author Sergey Prokhorov <[email protected]> | |
%%% @copyright (C) 2013, Sergey Prokhorov | |
%%% @doc | |
%%% Simple gettext .mo file format parser for Erlang. | |
%%% | |
%%% Produce [{KeyPlurals::[binary()], TransPlurals::[binary()]}] orddict as | |
%%% output. | |
%%% Eg, for .po file (converted to .mo) | |
%%% <pre> | |
%%% msgid "Download" | |
%%% msgstr "Скачать" | |
%%% | |
%%% msgid "Stone" | |
%%% msgid_plural "Stones" | |
%%% msgstr[0] "Камень" | |
%%% msgstr[1] "Камня" | |
%%% msgstr[2] "Камней" | |
%%% </pre> | |
%%% it will produce | |
%%% <pre><code> | |
%%% [{[<<"Download">>], [<<"Скачать">>]}, | |
%%% {[<<"Stone">>, <<"Stones">>], [<<"Камень">>, <<"Камня">>, <<"Камней">>]}] | |
%%% </code></pre> | |
%%% TODO: simple MIME parser (for key "" - translation metadata) | |
%%% TODO: plural form expression interpreter | |
%%% @end | |
%%% Created : 3 Sep 2013 by Sergey Prokhorov <[email protected]> | |
-module(mo_parser). | |
-export([parse/1, to_dict/1]). | |
-record(st, | |
{bin :: binary(), | |
obin :: binary(), | |
catalog=[] :: [{[binary()], [binary()]}], | |
bo :: little | big, | |
version :: integer(), | |
msg_cnt :: integer(), | |
orig_tab_offset :: integer(), | |
trans_tab_offset :: integer()}). | |
parse(Name) when is_list(Name) -> | |
{ok, Bin} = file:read_file(Name), | |
parse(Bin); | |
parse(Bin) when is_binary(Bin) -> | |
State = #st{bin=Bin, obin=Bin}, | |
State2 = parse_magick(State), | |
State3 = parse_meta(State2), | |
parse_catalog(State3, 0). | |
to_dict(#st{catalog=Catalog}) -> | |
Catalog. | |
parse_magick(#st{bin = <<16#950412de:32/little, Ver:32/little, Rest/binary>>} = S) -> | |
S#st{bo=little, version=Ver, bin=Rest}; | |
parse_magick(#st{bin = <<16#950412de:32/big, Ver:32/big, Rest/binary>>} = S) -> | |
S#st{bo=big, version=Ver, bin=Rest}. | |
parse_meta(#st{bo=little, bin = <<MsgCnt:32/little, OrigTabOffset:32/little, | |
TransTabOffset:32/little, Rest/binary>>} = S) -> | |
S#st{msg_cnt = MsgCnt, orig_tab_offset = OrigTabOffset, | |
trans_tab_offset = TransTabOffset, bin=Rest}; | |
parse_meta(#st{bo=big, bin = <<MsgCnt:32/big, OrigTabOffset:32/big, | |
TransTabOffset:32/big, Rest/binary>>} = S) -> | |
S#st{msg_cnt = MsgCnt, orig_tab_offset = OrigTabOffset, | |
trans_tab_offset = TransTabOffset, bin=Rest}. | |
parse_catalog(#st{msg_cnt=N, catalog=Cat} = S, N) -> | |
S#st{catalog=lists:reverse(Cat)}; | |
parse_catalog(#st{orig_tab_offset=OrigO, trans_tab_offset=TransO, | |
obin=Bin, bo=Bo, catalog=Catalog} = S, N) -> | |
Orig = get_string(N, OrigO, Bin, Bo), | |
Trans = get_string(N, TransO, Bin, Bo), | |
NewCatalog = [{Orig, Trans} | Catalog], | |
parse_catalog(S#st{catalog=NewCatalog}, N + 1). | |
get_string(N, O, Bin, little) -> | |
O1 = O + 8 * N, | |
<<_:O1/binary, Len:32/little, StringO:32/little, _/binary>> = Bin, | |
get_strings(StringO, Len, Bin); | |
get_string(N, O, Bin, big) -> | |
O1 = O + 8 * N, | |
<<_:O1/binary, Len:32/big, StringO:32/big, _/binary>> = Bin, | |
get_strings(StringO, Len, Bin). | |
get_strings(StringO, Len, Bin) -> | |
%% split by \0 to plural forms | |
<<_:StringO/binary, String:Len/binary, _/binary>> = Bin, | |
binary:split(String, [<<0>>], [global]). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This module was later included in https://github.com/seriyps/gettexter