Skip to content

Instantly share code, notes, and snippets.

@jokea
Created June 19, 2012 08:50
Show Gist options
  • Save jokea/2953077 to your computer and use it in GitHub Desktop.
Save jokea/2953077 to your computer and use it in GitHub Desktop.
Erlang programming ex3_9
-module(ex3_9).
-export([convert_to_rawdoc/1, convert_raw/1, make_index/1, print/1]).
%%% convert a file into raw document
convert_to_rawdoc(File) ->
case file:open(File, [raw, {read_ahead, 1024}]) of
{error, Reason} ->
io:format("Error opening ~p: ~p~n", [File, Reason]);
{ok, Handle} ->
do_read(Handle, [])
end.
do_read(Handle, List) ->
case file:read_line(Handle) of
{error, Reason} ->
io:format("Error reading file: ~p~n", [Reason]),
file:close(Handle),
[];
eof ->
file:close(Handle),
List;
{ok, Data} -> % eliminate trailing LF if exists
Last = lists:last(Data),
if
[Last] == "\n" ->
NewList = lists:append(List, [{lists:sublist(Data, length(Data)-1)}]);
true ->
NewList = lists:append(List, [{Data}])
end,
do_read(Handle, NewList)
end.
%%% convert raw document to document
convert_raw(Raw) -> do_convert(Raw, []).
do_convert([], R) -> R;
do_convert([{Content} | T], R) ->
NewR = parse(Content, R),
do_convert(T, NewR).
parse([], R) -> R;
parse([H | T], R) ->
case [H] of
" " -> parse(T, R);
_ ->
{Word, Rest} = getword([H | T], []),
parse(Rest, lists:append(R, [{Word}]))
end.
getword([], R) -> {R, []};
getword([H | T], R) ->
case [H] of
" " -> {R, T};
_ -> getword(T, lists:append(R, [H]))
end.
%%% indexing a document
make_index(Doc) -> do_index(Doc, [], 0).
do_index([], R, _) -> R;
do_index([{H} | T], R, Pos) ->
case lists:keyfind(H, 1, R) of
false ->
NewR = lists:append(R, [{H, [Pos]}]),
do_index(T, NewR, Pos+1);
{Key, Val} ->
NewVal = lists:append(Val, [Pos]),
NewR = lists:keyreplace(H, 1, R, {Key, NewVal}),
do_index(T, NewR, Pos+1)
end.
%%% pretty print of index
print([]) -> ok;
print([{Word, List}]) ->
NewList = combine_list(List),
{Word, NewList}.
combine_list(List) ->
NewList = lists:sort(List),
NewList2 = remove_redundant(NewList),
do_combine(NewList2, []).
remove_redundant(List) -> dedup(List, []).
dedup([], R) -> R;
dedup([H | T], []) -> dedup(T, [H]);
dedup([H | T], R) ->
Last = lists:last(R),
if
H == Last -> dedup(T, R);
true -> dedup(T, lists:append(R, [H]))
end.
do_combine([], R) -> R;
do_combine([H | T], []) -> do_combine(T, [{H, H}]);
do_combine([H | T], R) ->
{Min, Max} = lists:last(R),
if
H == Max+1 ->
R1 = lists:delete({Min, Max}, R),
R2 = lists:append(R1, [{Min, H}]),
do_combine(T, R2);
true ->
R1 = lists:append(R, [{H, H}]),
do_combine(T, R1)
end.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment