jameswalton · May 22, 2020 19:42
diff --git a/gettysburg-address.txt b/gettysburg-address.txt
 Four score and seven years ago our fathers brought
 forth on this continent, a new nation, conceived in Liberty,
 and dedicated to the proposition that all men are created equal.

 Now we are engaged in a great civil war, testing whether
 that nation, or any nation so conceived and dedicated,
 can long endure. We are met on a great battle-field of that war.
 We have come to dedicate a portion of that field, as a final
 resting place for those who here gave their lives that 
 that nation might live. It is altogether fitting and proper
 that we should do this.

 But, in a larger sense, we can not dedicate --
 we can not consecrate -- we can not hallow -- this ground.
 The brave men, living and dead, who struggled here, have
 \consecrated it, far above our poor power to add or detract.
 The world will little note, nor long remember what we say here, 
 but it can never forget what they did here. It is for us the
 living, rather, to be dedicated here to the unfinished work
 which they who fought here have thus far so nobly advanced.
 It is rather for us to be here dedicated to the great task
 remaining before us -- that from these honored dead we take
 increased devotion to that cause for which they gave the
 last full measure of devotion -- that we here highly
 resolve that these dead shall not have died in vain --
 that this nation, under God, shall have a new birth 
 of freedom -- and that government of the people,
 by the people, for the people, shall not perish from the earth.
diff --git a/index.erl b/index.erl
 -module(index).
 -export([get_file_contents/1, show_file_contents/1, index/1, test_all/0]).

 %% Helper functions which can be exported for testing individually
 -export([add_line_numbers/1, index_line/1, index_words/2, aggregate_words/1, remove_word/2, find_occurrences/2, partition_ranges/1]).

 -define(MinWordLength, 3).

 % Used to read a file into a list of lines.
 % Example files available in:
 %   gettysburg-address.txt (short)
 %   dickens-christmas.txt  (long)


 % Get the contents of a text file into a list of lines.
 % Each line has its trailing newline removed.

 get_file_contents(Name) ->
  {ok, File} = file:open(Name, [read]),
  Rev = get_all_lines(File, []),
  lists:reverse(Rev).

 % Auxiliary function for get_file_contents.
 % Not exported.

 get_all_lines(File, Partial) ->
  case io:get_line(File, "") of
    eof -> file:close(File),
      Partial;
    Line -> {Strip, _} = lists:split(length(Line) - 1, Line),
      get_all_lines(File, [Strip | Partial])
  end.

 % Show the contents of a list of strings.
 % Can be used to check the results of calling get_file_contents.

 show_file_contents([L | Ls]) ->
  io:format("~s~n", [L]),
  show_file_contents(Ls);
 show_file_contents([]) ->
  ok.
     
 %% Main function to perform the indexing of a file given a FileName.
 index(FileName) ->
  aggregate_words(lists:flatten(index_line(add_line_numbers(get_file_contents(FileName))))).

 %% Add line number to each line
 %% [ {1, ["List", "of", "Words"]} ]
 add_line_numbers(L) -> add_line_numbers(L, 1).
 add_line_numbers([], _) -> [];
 add_line_numbers([H|T], N) ->
  [ {N, string:split(H, " ", all)} | add_line_numbers(T, N + 1) ].

 %% Higher level function to index the words
 index_line([]) ->
  [];
 index_line([{LineNumber, Line}|T]) ->
  [ index_words(LineNumber, Line) | index_line(T) ].

 %% Build a list of { Word, LineNumber } tuples of words with a minimum length of MinWordLength
 %% This function also normalizes (lowercases) the word and cleans any unwanted symbols
 index_words(_, []) -> [];
 index_words(LineNumber, [Word|Words]) when length(Word) >= ?MinWordLength ->
  Normalized = string:to_lower(Word),
  Cleaned = string:trim(Normalized, both, ".'\\`,;:!"),
  [ {Cleaned, LineNumber} | index_words(LineNumber, Words)];
 index_words(LineNumber, [_Word|Words]) -> index_words(LineNumber, Words).

 %% Find all occurrences
 %% Create list of [1, 1, 2, 4, 6]
 %% Partition list of lines into [{1, 1}, {3, 5}]
 aggregate_words(L) -> aggregate_words(L, {}).
 aggregate_words([], _Index) -> [];
 aggregate_words([{Word, _LineNumber}|Words] = List, _Index) ->
  [ {Word, partition_ranges(find_occurrences(Word, List))} | aggregate_words(remove_word(Word, Words)) ].

 %% Select all occurrences of a given word
 find_occurrences(_Word, []) -> [];
 find_occurrences(Word, [{Word, LineNumber} | Words]) ->
  [ LineNumber | find_occurrences(Word, Words) ];
 find_occurrences(Word, [_|Words]) ->
  find_occurrences(Word, Words).

 %% Remove all occurrences of a given word
 remove_word(_Word, []) -> [];
 remove_word(Word, [{Word, _}|Words]) -> remove_word(Word, Words);
 remove_word(Word, [{_NotWord, _} = Keep |T]) -> [ Keep | remove_word(Word, T) ].

 %% Higher level function to partition a list of lines into a list of tuple ranges
 partition_ranges([]) -> [];
 partition_ranges([X|Lines]) -> partition_ranges(Lines, {X, X}, []).

 %% Base case, we've run out of lines so the last range is complete so
 %% add it to the list of ranges
 partition_ranges([], Range, Ranges) -> Ranges ++ [Range];
 %% With the tuple {1, 2} and the next line is 2 iterate again
 partition_ranges([Y|Lines], {X, Y}, Ranges) ->
  partition_ranges(Lines, {X, Y}, Ranges);
 %% With the tuple {1, 2} and the next line is 3 (2 + 1) then set the
 %% second element to of the tuple to 3
 partition_ranges([Z|Lines], {X, Y}, Ranges) when Y + 1 =:= Z ->
  partition_ranges(Lines, {X, Z}, Ranges);
 %% With the tuple {1, 2} and the next line is 4 then the range is
 %% broken and we add the previous range to the list and initially
 %% set the new range tuple to {4, 4}
 partition_ranges([Z|Lines], {_X, _Y} = Range, Ranges) ->
  partition_ranges(Lines, {Z, Z}, Ranges ++ [Range]).

 %% Tests
 test_all() ->
  test_add_line_numbers(),
  test_index_line(),
  test_partition_ranges(),
  test_index(),
  ok.

 test_add_line_numbers() ->
  [] = add_line_numbers([]),
  WithNumbers =
     [{1, ["Four", "score", "and"]},
      {2, ["forth", "on", "this", "continent,"]},
      {3, ["and", "dedicated", "to."]}],
  Input = ["Four score and",  "forth on this continent,", "and dedicated to."],
  WithNumbers = add_line_numbers(Input).

 test_index_line() ->
  [] = index_line([]),
  NumberedWords = [[{"four",1},{"score",1},{"and",1}],
    [{"forth",2},{"this",2},{"continent",2}],
    [{"and",3},{"dedicated",3},{"to",3}]],
  Input = [{1, ["Four", "score", "and"]},
    {2, ["forth", "on", "this", "continent,"]},
    {3, ["and", "dedicated", "to."]}],
  NumberedWords = index_line(Input).

 test_partition_ranges() ->
  [] = partition_ranges([]),
  [{3, 3}] = partition_ranges([3]),
  [{1, 1}, {3, 6}, {9, 9}] = partition_ranges([1, 3, 4, 5, 6, 9, 9, 9]).

 test_index() ->
  [{"four",[{1,1}]},
    {"score",[{1,1}]},
    {"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]},
    {"seven",[{1,1}]},
    {"years",[{1,1}]},
    {"ago",[{1,1}]},
    {"our",[{1,1},{16,16}]},
    {"fathers",[{1,1}]},
    {"brought",[{1,1}]},
    {"forth",[{2,2}]},
    {"this",[{2,2},{11,11},{14,14},{26,26}]},
    {"continent",[{2,2}]},
    {"new",[{2,2},{26,26}]},
    {"nation",[{2,2},{6,6},{10,10},{26,26}]},
    {"conceived",[{2,2},{6,6}]},
    {"liberty",[{2,2}]},
    {"dedicated",[{3,3},{6,6},{19,19},{21,21}]},
    {"the",[{3,3},{15,15},{17,19},{21,21},{23,23},{27,28}]},
    {"proposition",[{3,3}]},
    {"that",[{3,3},{6,11},{22,27}]},
    {"all",[{3,3}]},
    {"men",[{3,3},{15,15}]},
    {"are",[{3,3},{5,5},{7,7}]},
    {"created",[{3,3}]},
    {"equal",[{3,3}]},
    {"now",[{5,5}]},
    {"engaged",[{5,5}]},
    {"great",[{5,5},{7,7},{21,21}]},
    {"civil",[{5,5}]},
    {"war",[{5,5},{7,7}]},
    {"testing",[{5,5}]},
    {"whether",[{5,5}]},
    {"any",[{6,6}]},
    {"can",[{7,7},{13,14},{18,18}]},
    {"long",[{7,7},{17,17}]},
    {"endure",[{7,7}]},
    {"met",[{7,7}]},
    {"battle-field",[{7,7}]},
    {"have",[{8,8},{15,15},{20,20},{25,26}]},
    {"come",[{8,8}]},
    {"dedicate",[{8,8},{13,13}]},
    {"portion",[{8,8}]},
    {"field",[{8,8}]},
    {"final",[{8,8}]},
    {"resting",[{9,9}]},
    {"place",[{9,9}]},
    {"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]},
    {"those",[{9,9}]},
    {"who",[{9,9},{15,15},{20,20}]},
    {"here",[{9,9},{15,15},{17,21},{24,24}]},
    {"gave",[{9,9},{23,23}]},
    {"their",[{9,9}]},
    {"lives",[{9,9}]},
    {"might",[{10,10}]},
    {"live",[{10,10}]},
    {"altogether",[{10,10}]},
    {"fitting",[{10,10}]},
    {"proper",[{10,10}]},
    {"should",[{11,11}]},
    {"but",[{13,13},{18,18}]},
    {"larger",[{13,13}]},
    {"sense",[{13,13}]},
    {"not",[{13,14},{25,25},{28,28}]},
    {"consecrate",[{14,14}]},
    {"hallow",[{14,14}]},
    {"ground",[{14,14}]},
    {"brave",[{15,15}]},
    {"living",[{15,15},{19,19}]},
    {"dead",[{15,15},{22,22},{25,25}]},
    {"struggled",[{15,15}]},
    {"consecrated",[{16,16}]},
    {"it",[{16,16}]},
    {"far",[{16,16},{20,20}]},
    {"above",[{16,16}]},
    {"poor",[{16,16}]},
    {"power",[{16,16}]},
    {"add",[{16,16}]},
    {"detract",[{16,16}]},
    {"world",[{17,17}]},
    {"will",[{17,17}]},
    {"little",[{17,17}]},
    {"note",[{17,17}]},
    {"nor",[{17,17}]},
    {"remember",[{17,17}]},
    {"what",[{17,18}]},
    {"say",[{17,17}]},
    {"never",[{18,18}]},
    {"forget",[{18,18}]},
    {"they",[{18,18},{20,20},{23,23}]},
    {"did",[{18,18}]},
    {"rather",[{19,19},{21,21}]},
    {"unfinished",[{19,19}]},
    {"work",[{19,19}]},
    {"which",[{20,20},{23,23}]},
    {"fought",[{20,20}]},
    {"thus",[{20,20}]},
    {"nobly",[{20,20}]},
    {"advanced",[{20,20}]},
    {"task",[{21,21}]},
    {"remaining",[{22,22}]},
    {"before",[{22,22}]},
    {"from",[{22,22},{28,28}]},
    {"these",[{22,22},{25,25}]},
    {"honored",[{22,22}]},
    {"take",[{22,22}]},
    {"increased",[{23,23}]},
    {"devotion",[{23,24}]},
    {"cause",[{23,23}]},
    {"last",[{24,24}]},
    {"full",[{24,24}]},
    {"measure",[{24,24}]},
    {"highly",[{24,24}]},
    {"resolve",[{25,25}]},
    {"shall",[{25,26},{28,28}]},
    {"died",[{25,25}]},
    {"vain",[{25,25}]},
    {"under",[{26,26}]},
    {"god",[{26,26}]},
    {"birth",[{26,26}]},
    {"freedom",[{27,27}]},
    {"government",[{27,27}]},
    {"people",[{27,28}]},
    {"perish",[{28,28}]},
    {"earth",[{28,28}]}] = index("gettysburg-address.txt").
diff --git a/summary.txt b/summary.txt
 The code implements the basic indexing as suggested in the assignment, along with cleaning and normalising the words.
 Here is how I approached the assignment and the individual functions that make up the overall indexer.
 The indexer can be run with index:index(Filename) and the tests can be run with index:test_all/0.


 Given the input:
 ["Four score and seven years ago our fathers brought",
 "forth on this continent, a new nation, conceived in Liberty,",
 "and dedicated to the proposition that all men are created equal."]

 Give lines a number:

 [{1,"Four score and seven years ago our fathers brought"},
 {2,"forth on this continent, a new nation, conceived in Liberty,"},
 {3,"and dedicated to the proposition that all men are created equal."}]

 Split the line in the same function using string:split based on spaces

 [{1,["Four", "score", "and", "seven", "years", "ago", "our", "fathers", "brought"]}]

 Index each word in the line. Create a tuple for each word in the line in the form {Word, Line}.
 This is where I decided to do the cleaning and normalising since I am already accessing each word here.

 [{"four",1},{"score",1},{"forth",2},{"continent",2}]

 Reduce the {Word,Number} entries by creating {Word, LineNumber} for each line the word occurs on.

 Take the first entry then look in the remaining list for occurrences of the word.
 Add these occurrences to the word entry as a list of lines that the word appears on and remove further occurrences
 of the word from the remaining list.

 Create ranges in the form [{X,X}, {X,Y}] to denote the inclusive start and end lines of occurrences

 Note on performance
 The word list is traversed several times but the same traversal is repeated to both find occurrences of a given word
 and remove occurrences of a given word. Having studied the `lists` module docs I could have used the lists:partition
 function to achieve this, no doubt more efficiently.
	Four score and seven years ago our fathers brought
	forth on this continent, a new nation, conceived in Liberty,
	and dedicated to the proposition that all men are created equal.

	Now we are engaged in a great civil war, testing whether
	that nation, or any nation so conceived and dedicated,
	can long endure. We are met on a great battle-field of that war.
	We have come to dedicate a portion of that field, as a final
	resting place for those who here gave their lives that
	that nation might live. It is altogether fitting and proper
	that we should do this.

	But, in a larger sense, we can not dedicate --
	we can not consecrate -- we can not hallow -- this ground.
	The brave men, living and dead, who struggled here, have
	\consecrated it, far above our poor power to add or detract.
	The world will little note, nor long remember what we say here,
	but it can never forget what they did here. It is for us the
	living, rather, to be dedicated here to the unfinished work
	which they who fought here have thus far so nobly advanced.
	It is rather for us to be here dedicated to the great task
	remaining before us -- that from these honored dead we take
	increased devotion to that cause for which they gave the
	last full measure of devotion -- that we here highly
	resolve that these dead shall not have died in vain --
	that this nation, under God, shall have a new birth
	of freedom -- and that government of the people,
	by the people, for the people, shall not perish from the earth.
	-module(index).
	-export([get_file_contents/1, show_file_contents/1, index/1, test_all/0]).

	%% Helper functions which can be exported for testing individually
	-export([add_line_numbers/1, index_line/1, index_words/2, aggregate_words/1, remove_word/2, find_occurrences/2, partition_ranges/1]).

	-define(MinWordLength, 3).

	% Used to read a file into a list of lines.
	% Example files available in:
	% gettysburg-address.txt (short)
	% dickens-christmas.txt (long)


	% Get the contents of a text file into a list of lines.
	% Each line has its trailing newline removed.

	get_file_contents(Name) ->
	{ok, File} = file:open(Name, [read]),
	Rev = get_all_lines(File, []),
	lists:reverse(Rev).

	% Auxiliary function for get_file_contents.
	% Not exported.

	get_all_lines(File, Partial) ->
	case io:get_line(File, "") of
	eof -> file:close(File),
	Partial;
	Line -> {Strip, _} = lists:split(length(Line) - 1, Line),
	get_all_lines(File, [Strip \| Partial])
	end.

	% Show the contents of a list of strings.
	% Can be used to check the results of calling get_file_contents.

	show_file_contents([L \| Ls]) ->
	io:format("~s~n", [L]),
	show_file_contents(Ls);
	show_file_contents([]) ->
	ok.

	%% Main function to perform the indexing of a file given a FileName.
	index(FileName) ->
	aggregate_words(lists:flatten(index_line(add_line_numbers(get_file_contents(FileName))))).

	%% Add line number to each line
	%% [ {1, ["List", "of", "Words"]} ]
	add_line_numbers(L) -> add_line_numbers(L, 1).
	add_line_numbers([], _) -> [];
	add_line_numbers([H\|T], N) ->
	[ {N, string:split(H, " ", all)} \| add_line_numbers(T, N + 1) ].

	%% Higher level function to index the words
	index_line([]) ->
	[];
	index_line([{LineNumber, Line}\|T]) ->
	[ index_words(LineNumber, Line) \| index_line(T) ].

	%% Build a list of { Word, LineNumber } tuples of words with a minimum length of MinWordLength
	%% This function also normalizes (lowercases) the word and cleans any unwanted symbols
	index_words(_, []) -> [];
	index_words(LineNumber, [Word\|Words]) when length(Word) >= ?MinWordLength ->
	Normalized = string:to_lower(Word),
	Cleaned = string:trim(Normalized, both, ".'\\`,;:!"),
	[ {Cleaned, LineNumber} \| index_words(LineNumber, Words)];
	index_words(LineNumber, [_Word\|Words]) -> index_words(LineNumber, Words).

	%% Find all occurrences
	%% Create list of [1, 1, 2, 4, 6]
	%% Partition list of lines into [{1, 1}, {3, 5}]
	aggregate_words(L) -> aggregate_words(L, {}).
	aggregate_words([], _Index) -> [];
	aggregate_words([{Word, _LineNumber}\|Words] = List, _Index) ->
	[ {Word, partition_ranges(find_occurrences(Word, List))} \| aggregate_words(remove_word(Word, Words)) ].

	%% Select all occurrences of a given word
	find_occurrences(_Word, []) -> [];
	find_occurrences(Word, [{Word, LineNumber} \| Words]) ->
	[ LineNumber \| find_occurrences(Word, Words) ];
	find_occurrences(Word, [_\|Words]) ->
	find_occurrences(Word, Words).

	%% Remove all occurrences of a given word
	remove_word(_Word, []) -> [];
	remove_word(Word, [{Word, _}\|Words]) -> remove_word(Word, Words);
	remove_word(Word, [{_NotWord, _} = Keep \|T]) -> [ Keep \| remove_word(Word, T) ].

	%% Higher level function to partition a list of lines into a list of tuple ranges
	partition_ranges([]) -> [];
	partition_ranges([X\|Lines]) -> partition_ranges(Lines, {X, X}, []).

	%% Base case, we've run out of lines so the last range is complete so
	%% add it to the list of ranges
	partition_ranges([], Range, Ranges) -> Ranges ++ [Range];
	%% With the tuple {1, 2} and the next line is 2 iterate again
	partition_ranges([Y\|Lines], {X, Y}, Ranges) ->
	partition_ranges(Lines, {X, Y}, Ranges);
	%% With the tuple {1, 2} and the next line is 3 (2 + 1) then set the
	%% second element to of the tuple to 3
	partition_ranges([Z\|Lines], {X, Y}, Ranges) when Y + 1 =:= Z ->
	partition_ranges(Lines, {X, Z}, Ranges);
	%% With the tuple {1, 2} and the next line is 4 then the range is
	%% broken and we add the previous range to the list and initially
	%% set the new range tuple to {4, 4}
	partition_ranges([Z\|Lines], {_X, _Y} = Range, Ranges) ->
	partition_ranges(Lines, {Z, Z}, Ranges ++ [Range]).

	%% Tests
	test_all() ->
	test_add_line_numbers(),
	test_index_line(),
	test_partition_ranges(),
	test_index(),
	ok.

	test_add_line_numbers() ->
	[] = add_line_numbers([]),
	WithNumbers =
	[{1, ["Four", "score", "and"]},
	{2, ["forth", "on", "this", "continent,"]},
	{3, ["and", "dedicated", "to."]}],
	Input = ["Four score and", "forth on this continent,", "and dedicated to."],
	WithNumbers = add_line_numbers(Input).

	test_index_line() ->
	[] = index_line([]),
	NumberedWords = [[{"four",1},{"score",1},{"and",1}],
	[{"forth",2},{"this",2},{"continent",2}],
	[{"and",3},{"dedicated",3},{"to",3}]],
	Input = [{1, ["Four", "score", "and"]},
	{2, ["forth", "on", "this", "continent,"]},
	{3, ["and", "dedicated", "to."]}],
	NumberedWords = index_line(Input).

	test_partition_ranges() ->
	[] = partition_ranges([]),
	[{3, 3}] = partition_ranges([3]),
	[{1, 1}, {3, 6}, {9, 9}] = partition_ranges([1, 3, 4, 5, 6, 9, 9, 9]).

	test_index() ->
	[{"four",[{1,1}]},
	{"score",[{1,1}]},
	{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]},
	{"seven",[{1,1}]},
	{"years",[{1,1}]},
	{"ago",[{1,1}]},
	{"our",[{1,1},{16,16}]},
	{"fathers",[{1,1}]},
	{"brought",[{1,1}]},
	{"forth",[{2,2}]},
	{"this",[{2,2},{11,11},{14,14},{26,26}]},
	{"continent",[{2,2}]},
	{"new",[{2,2},{26,26}]},
	{"nation",[{2,2},{6,6},{10,10},{26,26}]},
	{"conceived",[{2,2},{6,6}]},
	{"liberty",[{2,2}]},
	{"dedicated",[{3,3},{6,6},{19,19},{21,21}]},
	{"the",[{3,3},{15,15},{17,19},{21,21},{23,23},{27,28}]},
	{"proposition",[{3,3}]},
	{"that",[{3,3},{6,11},{22,27}]},
	{"all",[{3,3}]},
	{"men",[{3,3},{15,15}]},
	{"are",[{3,3},{5,5},{7,7}]},
	{"created",[{3,3}]},
	{"equal",[{3,3}]},
	{"now",[{5,5}]},
	{"engaged",[{5,5}]},
	{"great",[{5,5},{7,7},{21,21}]},
	{"civil",[{5,5}]},
	{"war",[{5,5},{7,7}]},
	{"testing",[{5,5}]},
	{"whether",[{5,5}]},
	{"any",[{6,6}]},
	{"can",[{7,7},{13,14},{18,18}]},
	{"long",[{7,7},{17,17}]},
	{"endure",[{7,7}]},
	{"met",[{7,7}]},
	{"battle-field",[{7,7}]},
	{"have",[{8,8},{15,15},{20,20},{25,26}]},
	{"come",[{8,8}]},
	{"dedicate",[{8,8},{13,13}]},
	{"portion",[{8,8}]},
	{"field",[{8,8}]},
	{"final",[{8,8}]},
	{"resting",[{9,9}]},
	{"place",[{9,9}]},
	{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]},
	{"those",[{9,9}]},
	{"who",[{9,9},{15,15},{20,20}]},
	{"here",[{9,9},{15,15},{17,21},{24,24}]},
	{"gave",[{9,9},{23,23}]},
	{"their",[{9,9}]},
	{"lives",[{9,9}]},
	{"might",[{10,10}]},
	{"live",[{10,10}]},
	{"altogether",[{10,10}]},
	{"fitting",[{10,10}]},
	{"proper",[{10,10}]},
	{"should",[{11,11}]},
	{"but",[{13,13},{18,18}]},
	{"larger",[{13,13}]},
	{"sense",[{13,13}]},
	{"not",[{13,14},{25,25},{28,28}]},
	{"consecrate",[{14,14}]},
	{"hallow",[{14,14}]},
	{"ground",[{14,14}]},
	{"brave",[{15,15}]},
	{"living",[{15,15},{19,19}]},
	{"dead",[{15,15},{22,22},{25,25}]},
	{"struggled",[{15,15}]},
	{"consecrated",[{16,16}]},
	{"it",[{16,16}]},
	{"far",[{16,16},{20,20}]},
	{"above",[{16,16}]},
	{"poor",[{16,16}]},
	{"power",[{16,16}]},
	{"add",[{16,16}]},
	{"detract",[{16,16}]},
	{"world",[{17,17}]},
	{"will",[{17,17}]},
	{"little",[{17,17}]},
	{"note",[{17,17}]},
	{"nor",[{17,17}]},
	{"remember",[{17,17}]},
	{"what",[{17,18}]},
	{"say",[{17,17}]},
	{"never",[{18,18}]},
	{"forget",[{18,18}]},
	{"they",[{18,18},{20,20},{23,23}]},
	{"did",[{18,18}]},
	{"rather",[{19,19},{21,21}]},
	{"unfinished",[{19,19}]},
	{"work",[{19,19}]},
	{"which",[{20,20},{23,23}]},
	{"fought",[{20,20}]},
	{"thus",[{20,20}]},
	{"nobly",[{20,20}]},
	{"advanced",[{20,20}]},
	{"task",[{21,21}]},
	{"remaining",[{22,22}]},
	{"before",[{22,22}]},
	{"from",[{22,22},{28,28}]},
	{"these",[{22,22},{25,25}]},
	{"honored",[{22,22}]},
	{"take",[{22,22}]},
	{"increased",[{23,23}]},
	{"devotion",[{23,24}]},
	{"cause",[{23,23}]},
	{"last",[{24,24}]},
	{"full",[{24,24}]},
	{"measure",[{24,24}]},
	{"highly",[{24,24}]},
	{"resolve",[{25,25}]},
	{"shall",[{25,26},{28,28}]},
	{"died",[{25,25}]},
	{"vain",[{25,25}]},
	{"under",[{26,26}]},
	{"god",[{26,26}]},
	{"birth",[{26,26}]},
	{"freedom",[{27,27}]},
	{"government",[{27,27}]},
	{"people",[{27,28}]},
	{"perish",[{28,28}]},
	{"earth",[{28,28}]}] = index("gettysburg-address.txt").
	The code implements the basic indexing as suggested in the assignment, along with cleaning and normalising the words.
	Here is how I approached the assignment and the individual functions that make up the overall indexer.
	The indexer can be run with index:index(Filename) and the tests can be run with index:test_all/0.


	Given the input:
	["Four score and seven years ago our fathers brought",
	"forth on this continent, a new nation, conceived in Liberty,",
	"and dedicated to the proposition that all men are created equal."]

	Give lines a number:

	[{1,"Four score and seven years ago our fathers brought"},
	{2,"forth on this continent, a new nation, conceived in Liberty,"},
	{3,"and dedicated to the proposition that all men are created equal."}]

	Split the line in the same function using string:split based on spaces

	[{1,["Four", "score", "and", "seven", "years", "ago", "our", "fathers", "brought"]}]

	Index each word in the line. Create a tuple for each word in the line in the form {Word, Line}.
	This is where I decided to do the cleaning and normalising since I am already accessing each word here.

	[{"four",1},{"score",1},{"forth",2},{"continent",2}]

	Reduce the {Word,Number} entries by creating {Word, LineNumber} for each line the word occurs on.

	Take the first entry then look in the remaining list for occurrences of the word.
	Add these occurrences to the word entry as a list of lines that the word appears on and remove further occurrences
	of the word from the remaining list.

	Create ranges in the form [{X,X}, {X,Y}] to denote the inclusive start and end lines of occurrences

	Note on performance
	The word list is traversed several times but the same traversal is repeated to both find occurrences of a given word
	and remove occurrences of a given word. Having studied the `lists` module docs I could have used the lists:partition
	function to achieve this, no doubt more efficiently.