mareknowak · May 23, 2020 17:49
diff --git a/index.erl b/index.erl
 %% FP in Erlang 3.3
 %%
 %% Programming challenge: indexing a file

 -module(index).
 -export(
   [ get_file_contents/1
   , show_file_contents/1
   , split_index/2
   , split_index_bin/2
   , write_list/2
   , traverse/3
   , main/2 % main function
   , test/0
   ]).

 %% To generate index from a text file use main/2:
 %% Example:
 %% index:main("dickens-christmas.txt", "index.txt")
 %%
 %% Output of the main function contains also times of execution of reading from
 %% file, generating index and writing to file.

 % Used to read a file into a list of lines.
 % Example files available in:
 %   gettysburg-address.txt (short)
 %   dickens-christmas.txt  (long)


 % Get the contents of a text file into a list of lines.
 % Each line has its trailing newline removed.

 get_file_contents(Name) ->
    {ok,File} = file:open(Name,[read]),
    Rev = get_all_lines(File,[]),
    lists:reverse(Rev).

 % Auxiliary function for get_file_contents.
 % Not exported.

 get_all_lines(File,Partial) ->
    case io:get_line(File,"") of
        eof -> file:close(File),
               Partial;
        Line -> {Strip,_} = lists:split(length(Line)-1,Line),
                get_all_lines(File,[Strip|Partial])
    end.

 % Show the contents of a list of strings.
 % Can be used to check the results of calling get_file_contents.

 show_file_contents([L|Ls]) ->
    io:format("~s~n",[L]),
    show_file_contents(Ls);
 show_file_contents([]) ->
    ok.


 %% Auxiliary functions


 %% @doc Test whether first argument is a member of the second argument (list)
 %%

 member(_X, []) -> false;
 member(X, [X | _Ys]) -> true;
 member(X, [_Y | Ys]) -> member(X, Ys).

 test_member() ->
    true = member(2,[2,0,0,1]),
    false = member(20,[2,0,0,1]),
    ok.

 %% @doc Is a word common?

 is_common(Word) ->
    Common =
        [ "about"
        , "after"
        , "again"
        , "all"
        , "and"
        , "any"
        , "are"
        , "been"
        , "before"
        , "but"
        , "came"
        , "can"
        , "come"
        , "could"
        , "couldn't"
        , "did"
        , "didn't"
        , "don't"
        , "done"
        , "every"
        , "for"
        , "from"
        , "had"
        , "has"
        , "have"
        , "having"
        , "her"
        , "him"
        , "himself"
        , "his"
        , "i'd"
        , "i'll"
        , "i'm"
        , "i've"
        , "into"
        , "isn't"
        , "it's"
        , "its"
        , "knew"
        , "know"
        , "like"
        , "man"
        , "man's"
        , "men"
        , "more"
        , "mrs"
        , "much"
        , "not"
        , "now"
        , "out"
        , "said"
        , "she"
        , "should"
        , "than"
        , "that"
        , "that's"
        , "the"
        , "their"
        , "them"
        , "then"
        , "there"
        , "there's"
        , "these"
        , "they"
        , "they'd"
        , "this"
        , "those"
        , "upon"
        , "very"
        , "was"
        , "wasn't"
        , "were"
        , "what"
        , "when"
        , "will"
        , "with"
        , "would"
        , "wouldn't"
        , "you"
        , "you'd"
        , "you'll"
        , "you're"
        , "your"
        ],
    member(Word, Common).

 %% @doc Input: a list of words.
 %%      Output: list of words not shorter then 3 chars and not common

 clean([], Cleaned) -> Cleaned;
 clean([Word|Words], Cleaned) ->
    case length(Word) < 3 of
        true -> clean(Words, Cleaned);
        false ->
            %% drop common words
            case is_common(Word) of
                true -> clean(Words, Cleaned);
                false -> clean(Words, [Word|Cleaned])
            end
    end.

 clean(Words) ->
    clean(Words, []).

 %% @doc Get words from a line

 line_to_words(Line) ->
    %% Let's be case-agnostic
    CF = string:casefold(Line),
    %% return strings separated by a char
    Words = string:lexemes(CF, "\s\t.,:;-+()[]{}~*=%&#\"!-_\\`?"),
    clean(Words).

 %% Auxiliary functions that relate list of integers and list of ranges
 %%
 %% @doc Convert ranges to list of ordered integers

 range_to_list(Range) ->
    {From, To} = Range,
    lists:seq(From, To).

 test_range_to_list() ->
    [1, 2, 3] = range_to_list({1, 3}),
    [11, 12, 13] = range_to_list({11, 13}),
    [8] = range_to_list({8, 8}),
    ok.

 ranges_to_list([], Out) -> Out;
 ranges_to_list([R|Rs], Out) ->
    L = range_to_list(R),
    ranges_to_list(Rs, Out ++ L).

 ranges_to_list(Ranges) ->
    ranges_to_list(Ranges, []).

 test_ranges_to_list() ->
    [3, 4] = ranges_to_list([{3,4}]),
    [3, 4, 5, 7, 11, 12, 13] = ranges_to_list([{3,5},{7,7},{11,13}]),
    ok.

 %% @doc Input: an integer and a list of integers
 %%      Output Range for given integer and the rest of list of integers

 get_range_aux({From, To}, []) ->
    {{From, To}, []};
 get_range_aux({From, To}, [N| Ints]) when N =< To->
    get_range_aux({From, To}, Ints);
 get_range_aux({From, To}, [N| Ints]) ->
    case N == To + 1 of
        true -> get_range_aux({From, N}, Ints);
        false -> {{From, To}, [N| Ints]}
    end.

 get_range(N, List) ->
    get_range_aux({N, N}, List).

 test_get_range() ->
    {{1,3}, [5, 6]} = get_range(1, [1,2,3,5,6]),
    {{4,6}, [8, 9]} = get_range(4, [1,2,3,4,5,6, 8, 9]),
    ok.

 %% @doc Convert list of integers to list of ranges

 list_to_ranges(Ranges, []) -> lists:reverse(Ranges);
 list_to_ranges(Ranges, [N| Ints]) ->
    {Range, Rest} = get_range(N, Ints),
    list_to_ranges([Range|Ranges], Rest).

 list_to_ranges(List) ->
    list_to_ranges([], List).

 test_list_to_ranges() ->
    [{3,5},{7,7},{11,13}] = list_to_ranges([3, 4, 5, 7, 11, 12, 13]),
    [{3,5}] = list_to_ranges([3, 4, 4, 4, 5]),
    ok.

 %% @doc Return list with inserted element in the correct place

 insert(Xs, [], Z) ->
    lists:reverse(Xs) ++ [Z];
 insert(Xs, [Y| Ys], Z) when Z =< Y ->
    lists:reverse(Xs) ++ [Z, Y| Ys];
 insert(Xs, [Y| Ys], Z) ->
    insert([Y| Xs], Ys, Z).

 insert(List, Z) ->
    insert([], List, Z).

 test_insert() ->
    [1, 2, 3, 4] = insert([1, 2, 4], 3),
    [1, 2, 2, 3, 4] = insert([1, 2, 3, 4], 2),
    [1, 2, 3, 4] = insert([1, 2, 3], 4),
    ok.

 %% @doc Add N to ranges

 update_ranges(Ranges, N) ->
    L = ranges_to_list(Ranges),
    NewL = insert(L, N),
    list_to_ranges(NewL).

 test_update_ranges() ->
    [{3,5}] = update_ranges([{3,4}], 5),
   [{3,4}, {7,7}, {19,23}] = update_ranges([{3,4}, {7,7}, {19,23}], 20),
    ok.

 %% @doc Split index (linear version)
 %% Left contains elements with words less than Word
 %% Right - not less than Word

 split_index(Word, Index) ->
    IsLess =
        fun (Ind) ->
                {W, _} = Ind,
                W < Word
        end,
    lists:partition(IsLess, Index).

 test_split_index() ->
    Index = [{"a", [{1,1}]}, {"b", [{2,3}]}],
    Left = [{"a", [{1,1}]}],
    Right = [{"b", [{2,3}]}],
    {Left, Right} = split_index("b", Index),
    ok.

 %% @doc Add Left list to Right

 add([], Sum) -> Sum;
 add([L| Left], Right) ->
    add(Left, [L| Right]).

 %% @doc Split index (binary version)
 %% Left contains elements with words less than Word
 %% Right - not less than Word
 %% ToSplit - list to be splitted into Left and Right
 %%
 %% To be tail recursive I avoid using ++ so lists must be reversed back and
 %% forth
 %%
 %% Binary split is faster than linear one. For "dickens-christmat.txt" it takes
 %% about 1s (vs. 2s in linear case).

 split_index_bin(Left, Right, _Word, []) -> {lists:reverse(Left), Right};
 split_index_bin(Left, Right, Word, [Element]) ->
    {W, _} = Element,
    case W < Word of
        true -> split_index_bin([Element| Left], Right, Word, []);
        false -> split_index_bin(Left, [Element| Right], Word, [])
    end;
 split_index_bin(Left, Right, Word, ToSplit) ->
    N = length(ToSplit),
    {L, R} = lists:split(N div 2, ToSplit),
    {W, _} = hd(R),
    case W < Word of
        %% all words in L are less then Word so add L
        %% to Left
        true ->
            split_index_bin(add(L, Left), Right, Word, R);
        %% Word is not greater than a word in R so
        %% R is joined with Right
        false ->
            split_index_bin(Left, add(lists:reverse(R), Right), Word, L)
    end.

 split_index_bin(_Word, []) -> {[], []};
 split_index_bin(Word, Index) ->
    split_index_bin([], [], Word, Index).

 test_split_index_bin() ->
    Index = [{"a", [{1,1}]}, {"b", [{2,3}]}],
    Left = [{"a", [{1,1}]}],
    Right = [{"b", [{2,3}]}],
    {Left, Right} = split_index_bin("b", Index),
    {Index, []} = split_index_bin("c", Index),
    {[], Index} = split_index_bin("a", Index),
    ok.

 update_index({Word, Line}, []) -> [{Word, [{Line, Line}]}];
 update_index({Word, Line}, Index) ->
    %% Split Index
    %% Left contains elements less than Word
    %%
    %% Binary split for "dickens-christmas.txt" is 2x faster than
    %% linear split. Function main/2 measures those times.
    %%
    %% Uncomment to switch between different flavours of split index
    %%
    %% {Left, Right} = split_index(Word, Index), % linear split
    {Left, Right} = split_index_bin(Word, Index), % binary split
    case length(Right) == 0 of
        %% Word is the largest element in the index
        true -> Left ++ [{Word, [{Line, Line}]}];
        false ->
            %% take an element from Right
            %% and compare to Word
            [{W, Ranges}| Rest] = Right,
            case Word == W of
                true ->
                    NewRanges = update_ranges(Ranges, Line),
                    Left ++ [{W, NewRanges}| Rest];
                false ->
                    %% Add element in front of Right
                    NewRight = [{Word, [{Line, Line}]}, {W, Ranges}| Rest],
                    Left ++ NewRight
            end
    end.

 test_update_index() ->
    [{"elephant", [{8, 8}]}] = update_index({"elephant", 8}, []),
    [{"elephant", [{8, 10}]}, {"home", [{1, 2}]}] =
        update_index(
          %% new word
          {"elephant", 8},
          %% index
          [{"elephant", [{9,10}] }, {"home", [{1, 2}]}]),
    ok.

 %% @doc Write a list to file

 write_list(_File, []) -> ok;
 write_list(File, [Word| Words]) ->
    file:write_file(File, io_lib:fwrite("~p\n", [Word]), [append]),
    write_list(File, Words).

 add_line_number(_N, [], Out) -> Out;
 add_line_number(N, [L|Ls], Out) ->
    add_line_number(N, Ls, [{L, N}|Out]).

 words_to_index([], Index) -> Index;
 words_to_index([L|Ls], Index) ->
    NewIndex = update_index(L, Index),
    words_to_index(Ls, NewIndex).

 traverse(_N, [], Index) -> Index;
 traverse(N, [Line| Lines], Index) ->
    Words = line_to_words(Line),
    Inds = add_line_number(N, Words, []),
    NewIndex = words_to_index(Inds, Index),
    traverse(N + 1, Lines, NewIndex).

 %% @doc Main function
 %% Input, Output - names of files

 main(Input, Output) ->
    %% Read file
    %% Timer measures execution time of a function
    {Time1, Lines} = timer:tc(index, get_file_contents, [Input]),
    {Time2, Index} = timer:tc(index, traverse, [1, Lines, []]),
    file:write_file(Output, io_lib:fwrite("Reading from file takes ~p ms.\n\n", [Time1])),
    %% Generating index for "dickens-christmas.txt" takes about 1s for if
    %% update_index uses binary split and 2s for linear split
    file:write_file(Output, io_lib:fwrite("Generating index takes ~p ms.\n\n", [Time2]), [append]),
    {Time3, _} = timer:tc(index, write_list, [Output, Index]),
    %% Writing to file takes about 4s.
    file:write_file(Output, io_lib:fwrite("\nWriting to file takes ~p ms.\n", [Time3]), [append]),
    ok.

 test() ->
    ok = test_range_to_list(),
    ok = test_ranges_to_list(),
    ok = test_member(),
    ok = test_get_range(),
    ok = test_list_to_ranges(),
    ok = test_insert(),
    ok = test_update_ranges(),
    ok = test_split_index(),
    ok = test_split_index_bin(),
    ok = test_update_index(),
    ok.
	%% FP in Erlang 3.3
	%%
	%% Programming challenge: indexing a file

	-module(index).
	-export(
	[ get_file_contents/1
	, show_file_contents/1
	, split_index/2
	, split_index_bin/2
	, write_list/2
	, traverse/3
	, main/2 % main function
	, test/0
	]).

	%% To generate index from a text file use main/2:
	%% Example:
	%% index:main("dickens-christmas.txt", "index.txt")
	%%
	%% Output of the main function contains also times of execution of reading from
	%% file, generating index and writing to file.

	% Used to read a file into a list of lines.
	% Example files available in:
	% gettysburg-address.txt (short)
	% dickens-christmas.txt (long)


	% Get the contents of a text file into a list of lines.
	% Each line has its trailing newline removed.

	get_file_contents(Name) ->
	{ok,File} = file:open(Name,[read]),
	Rev = get_all_lines(File,[]),
	lists:reverse(Rev).

	% Auxiliary function for get_file_contents.
	% Not exported.

	get_all_lines(File,Partial) ->
	case io:get_line(File,"") of
	eof -> file:close(File),
	Partial;
	Line -> {Strip,_} = lists:split(length(Line)-1,Line),
	get_all_lines(File,[Strip\|Partial])
	end.

	% Show the contents of a list of strings.
	% Can be used to check the results of calling get_file_contents.

	show_file_contents([L\|Ls]) ->
	io:format("~s~n",[L]),
	show_file_contents(Ls);
	show_file_contents([]) ->
	ok.


	%% Auxiliary functions


	%% @doc Test whether first argument is a member of the second argument (list)
	%%

	member(_X, []) -> false;
	member(X, [X \| _Ys]) -> true;
	member(X, [_Y \| Ys]) -> member(X, Ys).

	test_member() ->
	true = member(2,[2,0,0,1]),
	false = member(20,[2,0,0,1]),
	ok.

	%% @doc Is a word common?

	is_common(Word) ->
	Common =
	[ "about"
	, "after"
	, "again"
	, "all"
	, "and"
	, "any"
	, "are"
	, "been"
	, "before"
	, "but"
	, "came"
	, "can"
	, "come"
	, "could"
	, "couldn't"
	, "did"
	, "didn't"
	, "don't"
	, "done"
	, "every"
	, "for"
	, "from"
	, "had"
	, "has"
	, "have"
	, "having"
	, "her"
	, "him"
	, "himself"
	, "his"
	, "i'd"
	, "i'll"
	, "i'm"
	, "i've"
	, "into"
	, "isn't"
	, "it's"
	, "its"
	, "knew"
	, "know"
	, "like"
	, "man"
	, "man's"
	, "men"
	, "more"
	, "mrs"
	, "much"
	, "not"
	, "now"
	, "out"
	, "said"
	, "she"
	, "should"
	, "than"
	, "that"
	, "that's"
	, "the"
	, "their"
	, "them"
	, "then"
	, "there"
	, "there's"
	, "these"
	, "they"
	, "they'd"
	, "this"
	, "those"
	, "upon"
	, "very"
	, "was"
	, "wasn't"
	, "were"
	, "what"
	, "when"
	, "will"
	, "with"
	, "would"
	, "wouldn't"
	, "you"
	, "you'd"
	, "you'll"
	, "you're"
	, "your"
	],
	member(Word, Common).

	%% @doc Input: a list of words.
	%% Output: list of words not shorter then 3 chars and not common

	clean([], Cleaned) -> Cleaned;
	clean([Word\|Words], Cleaned) ->
	case length(Word) < 3 of
	true -> clean(Words, Cleaned);
	false ->
	%% drop common words
	case is_common(Word) of
	true -> clean(Words, Cleaned);
	false -> clean(Words, [Word\|Cleaned])
	end
	end.

	clean(Words) ->
	clean(Words, []).

	%% @doc Get words from a line

	line_to_words(Line) ->
	%% Let's be case-agnostic
	CF = string:casefold(Line),
	%% return strings separated by a char
	Words = string:lexemes(CF, "\s\t.,:;-+()[]{}~*=%&#\"!-_\\`?"),
	clean(Words).

	%% Auxiliary functions that relate list of integers and list of ranges
	%%
	%% @doc Convert ranges to list of ordered integers

	range_to_list(Range) ->
	{From, To} = Range,
	lists:seq(From, To).

	test_range_to_list() ->
	[1, 2, 3] = range_to_list({1, 3}),
	[11, 12, 13] = range_to_list({11, 13}),
	[8] = range_to_list({8, 8}),
	ok.

	ranges_to_list([], Out) -> Out;
	ranges_to_list([R\|Rs], Out) ->
	L = range_to_list(R),
	ranges_to_list(Rs, Out ++ L).

	ranges_to_list(Ranges) ->
	ranges_to_list(Ranges, []).

	test_ranges_to_list() ->
	[3, 4] = ranges_to_list([{3,4}]),
	[3, 4, 5, 7, 11, 12, 13] = ranges_to_list([{3,5},{7,7},{11,13}]),
	ok.

	%% @doc Input: an integer and a list of integers
	%% Output Range for given integer and the rest of list of integers

	get_range_aux({From, To}, []) ->
	{{From, To}, []};
	get_range_aux({From, To}, [N\| Ints]) when N =< To->
	get_range_aux({From, To}, Ints);
	get_range_aux({From, To}, [N\| Ints]) ->
	case N == To + 1 of
	true -> get_range_aux({From, N}, Ints);
	false -> {{From, To}, [N\| Ints]}
	end.

	get_range(N, List) ->
	get_range_aux({N, N}, List).

	test_get_range() ->
	{{1,3}, [5, 6]} = get_range(1, [1,2,3,5,6]),
	{{4,6}, [8, 9]} = get_range(4, [1,2,3,4,5,6, 8, 9]),
	ok.

	%% @doc Convert list of integers to list of ranges

	list_to_ranges(Ranges, []) -> lists:reverse(Ranges);
	list_to_ranges(Ranges, [N\| Ints]) ->
	{Range, Rest} = get_range(N, Ints),
	list_to_ranges([Range\|Ranges], Rest).

	list_to_ranges(List) ->
	list_to_ranges([], List).

	test_list_to_ranges() ->
	[{3,5},{7,7},{11,13}] = list_to_ranges([3, 4, 5, 7, 11, 12, 13]),
	[{3,5}] = list_to_ranges([3, 4, 4, 4, 5]),
	ok.

	%% @doc Return list with inserted element in the correct place

	insert(Xs, [], Z) ->
	lists:reverse(Xs) ++ [Z];
	insert(Xs, [Y\| Ys], Z) when Z =< Y ->
	lists:reverse(Xs) ++ [Z, Y\| Ys];
	insert(Xs, [Y\| Ys], Z) ->
	insert([Y\| Xs], Ys, Z).

	insert(List, Z) ->
	insert([], List, Z).

	test_insert() ->
	[1, 2, 3, 4] = insert([1, 2, 4], 3),
	[1, 2, 2, 3, 4] = insert([1, 2, 3, 4], 2),
	[1, 2, 3, 4] = insert([1, 2, 3], 4),
	ok.

	%% @doc Add N to ranges

	update_ranges(Ranges, N) ->
	L = ranges_to_list(Ranges),
	NewL = insert(L, N),
	list_to_ranges(NewL).

	test_update_ranges() ->
	[{3,5}] = update_ranges([{3,4}], 5),
	[{3,4}, {7,7}, {19,23}] = update_ranges([{3,4}, {7,7}, {19,23}], 20),
	ok.

	%% @doc Split index (linear version)
	%% Left contains elements with words less than Word
	%% Right - not less than Word

	split_index(Word, Index) ->
	IsLess =
	fun (Ind) ->
	{W, _} = Ind,
	W < Word
	end,
	lists:partition(IsLess, Index).

	test_split_index() ->
	Index = [{"a", [{1,1}]}, {"b", [{2,3}]}],
	Left = [{"a", [{1,1}]}],
	Right = [{"b", [{2,3}]}],
	{Left, Right} = split_index("b", Index),
	ok.

	%% @doc Add Left list to Right

	add([], Sum) -> Sum;
	add([L\| Left], Right) ->
	add(Left, [L\| Right]).

	%% @doc Split index (binary version)
	%% Left contains elements with words less than Word
	%% Right - not less than Word
	%% ToSplit - list to be splitted into Left and Right
	%%
	%% To be tail recursive I avoid using ++ so lists must be reversed back and
	%% forth
	%%
	%% Binary split is faster than linear one. For "dickens-christmat.txt" it takes
	%% about 1s (vs. 2s in linear case).

	split_index_bin(Left, Right, _Word, []) -> {lists:reverse(Left), Right};
	split_index_bin(Left, Right, Word, [Element]) ->
	{W, _} = Element,
	case W < Word of
	true -> split_index_bin([Element\| Left], Right, Word, []);
	false -> split_index_bin(Left, [Element\| Right], Word, [])
	end;
	split_index_bin(Left, Right, Word, ToSplit) ->
	N = length(ToSplit),
	{L, R} = lists:split(N div 2, ToSplit),
	{W, _} = hd(R),
	case W < Word of
	%% all words in L are less then Word so add L
	%% to Left
	true ->
	split_index_bin(add(L, Left), Right, Word, R);
	%% Word is not greater than a word in R so
	%% R is joined with Right
	false ->
	split_index_bin(Left, add(lists:reverse(R), Right), Word, L)
	end.

	split_index_bin(_Word, []) -> {[], []};
	split_index_bin(Word, Index) ->
	split_index_bin([], [], Word, Index).

	test_split_index_bin() ->
	Index = [{"a", [{1,1}]}, {"b", [{2,3}]}],
	Left = [{"a", [{1,1}]}],
	Right = [{"b", [{2,3}]}],
	{Left, Right} = split_index_bin("b", Index),
	{Index, []} = split_index_bin("c", Index),
	{[], Index} = split_index_bin("a", Index),
	ok.

	update_index({Word, Line}, []) -> [{Word, [{Line, Line}]}];
	update_index({Word, Line}, Index) ->
	%% Split Index
	%% Left contains elements less than Word
	%%
	%% Binary split for "dickens-christmas.txt" is 2x faster than
	%% linear split. Function main/2 measures those times.
	%%
	%% Uncomment to switch between different flavours of split index
	%%
	%% {Left, Right} = split_index(Word, Index), % linear split
	{Left, Right} = split_index_bin(Word, Index), % binary split
	case length(Right) == 0 of
	%% Word is the largest element in the index
	true -> Left ++ [{Word, [{Line, Line}]}];
	false ->
	%% take an element from Right
	%% and compare to Word
	[{W, Ranges}\| Rest] = Right,
	case Word == W of
	true ->
	NewRanges = update_ranges(Ranges, Line),
	Left ++ [{W, NewRanges}\| Rest];
	false ->
	%% Add element in front of Right
	NewRight = [{Word, [{Line, Line}]}, {W, Ranges}\| Rest],
	Left ++ NewRight
	end
	end.

	test_update_index() ->
	[{"elephant", [{8, 8}]}] = update_index({"elephant", 8}, []),
	[{"elephant", [{8, 10}]}, {"home", [{1, 2}]}] =
	update_index(
	%% new word
	{"elephant", 8},
	%% index
	[{"elephant", [{9,10}] }, {"home", [{1, 2}]}]),
	ok.

	%% @doc Write a list to file

	write_list(_File, []) -> ok;
	write_list(File, [Word\| Words]) ->
	file:write_file(File, io_lib:fwrite("~p\n", [Word]), [append]),
	write_list(File, Words).

	add_line_number(_N, [], Out) -> Out;
	add_line_number(N, [L\|Ls], Out) ->
	add_line_number(N, Ls, [{L, N}\|Out]).

	words_to_index([], Index) -> Index;
	words_to_index([L\|Ls], Index) ->
	NewIndex = update_index(L, Index),
	words_to_index(Ls, NewIndex).

	traverse(_N, [], Index) -> Index;
	traverse(N, [Line\| Lines], Index) ->
	Words = line_to_words(Line),
	Inds = add_line_number(N, Words, []),
	NewIndex = words_to_index(Inds, Index),
	traverse(N + 1, Lines, NewIndex).

	%% @doc Main function
	%% Input, Output - names of files

	main(Input, Output) ->
	%% Read file
	%% Timer measures execution time of a function
	{Time1, Lines} = timer:tc(index, get_file_contents, [Input]),
	{Time2, Index} = timer:tc(index, traverse, [1, Lines, []]),
	file:write_file(Output, io_lib:fwrite("Reading from file takes ~p ms.\n\n", [Time1])),
	%% Generating index for "dickens-christmas.txt" takes about 1s for if
	%% update_index uses binary split and 2s for linear split
	file:write_file(Output, io_lib:fwrite("Generating index takes ~p ms.\n\n", [Time2]), [append]),
	{Time3, _} = timer:tc(index, write_list, [Output, Index]),
	%% Writing to file takes about 4s.
	file:write_file(Output, io_lib:fwrite("\nWriting to file takes ~p ms.\n", [Time3]), [append]),
	ok.

	test() ->
	ok = test_range_to_list(),
	ok = test_ranges_to_list(),
	ok = test_member(),
	ok = test_get_range(),
	ok = test_list_to_ranges(),
	ok = test_insert(),
	ok = test_update_ranges(),
	ok = test_split_index(),
	ok = test_split_index_bin(),
	ok = test_update_index(),
	ok.