Created
March 16, 2017 07:00
-
-
Save mrc/8b1eaa27112196fe297ea8e2867575e0 to your computer and use it in GitHub Desktop.
week 2 assignment from https://www.futurelearn.com/courses/functional-programming-erlang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(index). | |
-export([get_file_contents/1,show_file_contents/1, index/1, index_map/1, index_helper/3, add_line_number/2]). | |
% Used to read a file into a list of lines. | |
% Example files available in: | |
% gettysburg-address.txt (short) | |
% dickens-christmas.txt (long) | |
% Get the contents of a text file into a list of lines. | |
% Each line has its trailing newline removed. | |
get_file_contents(Name) -> | |
{ok,File} = file:open(Name,[read]), | |
Rev = get_all_lines(File,[]), | |
lists:reverse(Rev). | |
% Auxiliary function for get_file_contents. | |
% Not exported. | |
get_all_lines(File,Partial) -> | |
case io:get_line(File,"") of | |
eof -> file:close(File), | |
Partial; | |
Line -> {Strip,_} = lists:split(length(Line)-1,Line), | |
get_all_lines(File,[Strip|Partial]) | |
end. | |
% Show the contents of a list of strings. | |
% Can be used to check the results of calling get_file_contents. | |
show_file_contents([L|Ls]) -> | |
io:format("~s~n",[L]), | |
show_file_contents(Ls); | |
show_file_contents([]) -> | |
ok. | |
% Split a line of text into words, ignoring non-alpha characters, and | |
% downcasing all alpha characters. | |
words(L) -> | |
Lower = string:to_lower(L), | |
Alpha = re:replace(Lower, "[^a-z ]", " ", [global,{return,list}]), | |
string:tokens(Alpha, " "). | |
% An index is a map of words each to a list of ranges of lines that | |
% the word occurs on. | |
% Add the current line number to the list of ranges, extending the | |
% current range (head of the list) if possible, otherwise creating a | |
% new range. | |
add_line_number(N, []) -> | |
[{N,N}]; | |
add_line_number(N, Ranges=[{Rmin,Rmax}|Rs]) -> | |
if | |
N==Rmax -> Ranges; | |
N==Rmax+1 -> [ {Rmin,N } | Rs]; | |
true -> [{N,N} | Ranges] | |
end. | |
% Update a map of words to ranges for any words that occur in the | |
% given list of words. | |
update_ranges_with_words(_LineNumber, [], Acc) -> | |
Acc; | |
update_ranges_with_words(LineNumber, [W|Ws], Acc) -> | |
Acc1 = Acc#{W => add_line_number(LineNumber, maps:get(W, Acc, []))}, | |
update_ranges_with_words(LineNumber, Ws, Acc1). | |
% Iterate through a list of lines, numbering them, and updating the | |
% index for all words that occur on each line. | |
index_helper(_LineNumber, [], Acc) -> | |
Acc; | |
index_helper(LineNumber, [L|Ls], Acc) -> | |
Acc1 = update_ranges_with_words(LineNumber, words(L), Acc), | |
index_helper(LineNumber+1, Ls, Acc1). | |
% Build an index from a list of lines. | |
index_map(L) -> | |
Index = index_helper(1, L, #{}), | |
maps:map(fun(_K,V) -> lists:reverse(V) end, Index). | |
% Convert to the representation required for the exercise (a list of | |
% tuples). | |
index(L) -> | |
maps:to_list(index_map(L)). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment