arthurmolina · March 10, 2017 02:03
diff --git a/index.erl b/index.erl
 -module(index).
 -export([get_dickens/0,get_gettysburg/0]).

 % Used to read a file into a list of lines.
 % Example files available in:
 %   gettysburg-address.txt (short)
 %   dickens-christmas.txt  (long)
  

 % Get the contents of a text file into a list of lines.
 % Each line has its trailing newline removed.

 get_file_contents(Name) ->
    {ok,File} = file:open(Name,[read]),
    Rev = get_all_lines(File,[]),
 lists:reverse(Rev).

 % Auxiliary function for get_file_contents.
 % Not exported.

 get_all_lines(File,Partial) ->
    case io:get_line(File,"") of
        eof -> file:close(File),
               Partial;
        Line -> {Strip,_} = lists:split(length(Line)-1,Line),
                get_all_lines(File,[Strip|Partial])
    end.

 % Show the contents of a list of strings.
 % Can be used to check the results of calling get_file_contents.

 show_file_contents([L|Ls]) ->
    io:format("~s~n",[L]),
    show_file_contents(Ls);
 show_file_contents([]) ->
    ok.    
     
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 % Rearrange the list of elements into the way asked: { "foo" , [{3,5},{7,7},{11,13}] }
 rearrange( [X | Xs] ) -> rearrange( Xs, X, X, [] ).

 rearrange( [], First, Last, Acc) ->
    [{ element(1, First), element(2, First), element(2, Last)}, Acc];

 rearrange([X | Xs ], First, Last, Acc) ->
    case element(1,X) == element(1, First) of
        true ->
            case element(2,X) == element(2,Last) + 1 of
                true -> rearrange( Xs, First, X, Acc);
                false -> rearrange( Xs, X, X, Acc ++ [{element(2, First), element(2, Last)}])
            end;

        false ->
            [{ element(1, First), Acc ++ [{element(2, First), element(2, Last)}] }] ++ rearrange( Xs, X, X, [])
    end.

 % Create a list with all words and the line that it belongs.
 indexator(Lines) -> indexator(Lines, 1).

 indexator([], _Pos) -> [];
 indexator([X | Xs ], Pos) ->
  word_per_line(split_words(X), Pos) ++ indexator(Xs, Pos+1).

 % Transform the line in a list of words.
 word_per_line( [], _Pos) -> [];
 word_per_line( [ X | Xs ], Pos) ->
    [ {X, Pos} | word_per_line( Xs, Pos)].


 % Split the words of a string.
 split_words(Str) ->
    remove_short_words(string:tokens(trans(Str), " "), 3).


 % Remove the caps and punctuations
 trans([]) -> [];
 trans([X|Xs]) ->
    case lists:member(X, "()!@#$%*_-+='`[{]}^\~?<>.,;:\t\n\'\"") of
        true -> trans(Xs);
        false -> [ nocap(X) | trans(Xs) ]
    end.

 % The same as string:to_lower(str)
 nocap(X) ->
    case $A =< X andalso X =< $Z of
        true ->
            X + 32;
        false ->
            X
    end.

 % Remove the words that is less than "Length"
 remove_short_words([], _Length) -> [];
 remove_short_words([X|Xs], Length) ->
    case string:len(X) < Length of
        true -> remove_short_words(Xs, Length);
        false -> [X | remove_short_words(Xs, Length)]
    end.

 % Run all the fuctions in the order (I miss |> from Elixir :( )
 get_list(File) -> 
    rearrange(lists:sort(indexator(get_file_contents(File)))).


 get_dickens() -> get_list("dickens-christmas.txt").
 get_gettysburg() -> get_list("gettysburg-address.txt").
	-module(index).
	-export([get_dickens/0,get_gettysburg/0]).

	% Used to read a file into a list of lines.
	% Example files available in:
	% gettysburg-address.txt (short)
	% dickens-christmas.txt (long)


	% Get the contents of a text file into a list of lines.
	% Each line has its trailing newline removed.

	get_file_contents(Name) ->
	{ok,File} = file:open(Name,[read]),
	Rev = get_all_lines(File,[]),
	lists:reverse(Rev).

	% Auxiliary function for get_file_contents.
	% Not exported.

	get_all_lines(File,Partial) ->
	case io:get_line(File,"") of
	eof -> file:close(File),
	Partial;
	Line -> {Strip,_} = lists:split(length(Line)-1,Line),
	get_all_lines(File,[Strip\|Partial])
	end.

	% Show the contents of a list of strings.
	% Can be used to check the results of calling get_file_contents.

	show_file_contents([L\|Ls]) ->
	io:format("~s~n",[L]),
	show_file_contents(Ls);
	show_file_contents([]) ->
	ok.

	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

	% Rearrange the list of elements into the way asked: { "foo" , [{3,5},{7,7},{11,13}] }
	rearrange( [X \| Xs] ) -> rearrange( Xs, X, X, [] ).

	rearrange( [], First, Last, Acc) ->
	[{ element(1, First), element(2, First), element(2, Last)}, Acc];

	rearrange([X \| Xs ], First, Last, Acc) ->
	case element(1,X) == element(1, First) of
	true ->
	case element(2,X) == element(2,Last) + 1 of
	true -> rearrange( Xs, First, X, Acc);
	false -> rearrange( Xs, X, X, Acc ++ [{element(2, First), element(2, Last)}])
	end;

	false ->
	[{ element(1, First), Acc ++ [{element(2, First), element(2, Last)}] }] ++ rearrange( Xs, X, X, [])
	end.

	% Create a list with all words and the line that it belongs.
	indexator(Lines) -> indexator(Lines, 1).

	indexator([], _Pos) -> [];
	indexator([X \| Xs ], Pos) ->
	word_per_line(split_words(X), Pos) ++ indexator(Xs, Pos+1).

	% Transform the line in a list of words.
	word_per_line( [], _Pos) -> [];
	word_per_line( [ X \| Xs ], Pos) ->
	[ {X, Pos} \| word_per_line( Xs, Pos)].


	% Split the words of a string.
	split_words(Str) ->
	remove_short_words(string:tokens(trans(Str), " "), 3).


	% Remove the caps and punctuations
	trans([]) -> [];
	trans([X\|Xs]) ->
	case lists:member(X, "()!@#$%*_-+='`[{]}^\~?<>.,;:\t\n\'\"") of
	true -> trans(Xs);
	false -> [ nocap(X) \| trans(Xs) ]
	end.

	% The same as string:to_lower(str)
	nocap(X) ->
	case $A =< X andalso X =< $Z of
	true ->
	X + 32;
	false ->
	X
	end.

	% Remove the words that is less than "Length"
	remove_short_words([], _Length) -> [];
	remove_short_words([X\|Xs], Length) ->
	case string:len(X) < Length of
	true -> remove_short_words(Xs, Length);
	false -> [X \| remove_short_words(Xs, Length)]
	end.

	% Run all the fuctions in the order (I miss \|> from Elixir :( )
	get_list(File) ->
	rearrange(lists:sort(indexator(get_file_contents(File)))).


	get_dickens() -> get_list("dickens-christmas.txt").
	get_gettysburg() -> get_list("gettysburg-address.txt").
No results found