Created
May 25, 2020 08:53
-
-
Save Joakineee/8912ab3b495c47021e6bf6f3286c302b to your computer and use it in GitHub Desktop.
index file exercice
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(index). | |
-export([get_file_contents/1,show_file_contents/1,index_words/1,test/0]). | |
% Used to read a file into a list of lines. | |
% Example files available in: | |
% gettysburg-address.txt (short) | |
% dickens-christmas.txt (long) | |
% Get the contents of a text file into a list of lines. | |
% Each line has its trailing newline removed. | |
get_file_contents(Name) -> | |
{ok,File} = file:open(Name,[read]), | |
Rev = get_all_lines(File,[]), | |
lists:reverse(Rev). | |
% Auxiliary function for get_file_contents. | |
% Not exported. | |
get_all_lines(File,Partial) -> | |
case io:get_line(File,"") of | |
eof -> file:close(File), | |
Partial; | |
Line -> {Strip,_} = lists:split(length(Line)-1,Line), | |
get_all_lines(File,[Strip|Partial]) | |
end. | |
% Show the contents of a list of strings. | |
% Can be used to check the results of calling get_file_contents. | |
show_file_contents([L|Ls]) -> | |
io:format("~s~n",[L]), | |
show_file_contents(Ls); | |
show_file_contents([]) -> | |
ok. | |
% | |
%Recives an list of sentences and returns a list of lists of words. | |
%Example: | |
%List in: | |
%["Four score and seven years ago our fathers brought", | |
% "forth on this continent, a new nation, conceived in Liberty,"] | |
% List out: | |
%["Four","score","and","seven","years","ago","our","fathers","brought"], | |
%["forth","on","this","continent","a","new","nation","conceived","in","Liberty"] | |
-spec get_tokens([list()],[T]) -> [T]. | |
get_tokens([],Acc) -> | |
lists:reverse(Acc); | |
get_tokens([H|T],Acc) -> | |
get_tokens(T,[string:tokens(H, ".,- ")|Acc]). | |
% | |
%Recives a list sentences and generates a list of tumples with and {index,sentence} | |
%Example: | |
%recives: | |
%["Four","score","and","seven","years","ago","our","fathers","brought"], | |
%["forth","on","this","continent","a","new","nation","conceived","in","Liberty"] | |
%returns: | |
%[{1,["Four","score","and","seven","years","ago","our","fathers","brought"]}, | |
%{2,["forth","on","this","continent","a","new","nation","conceived","in","Liberty"]}] | |
-spec add_index([T],[{integer(),[T]}],integer()) -> | |
[{integer(),[T]}]. | |
add_index([],Acc,_) -> | |
lists:reverse(Acc); | |
add_index([H|T],Acc,Index) -> | |
add_index(T,[{Index,H}|Acc],Index+1). | |
% | |
%Main function, we provide the file name as parameter. | |
%we build a list N with al list with all words | |
%["Four","score","and","seven","years","ago","our","fathers","brought","forth","on","this","continent",...] | |
%and we search word by word in the list M with is the indexed word list: | |
%[{1,["Four","score","and","seven","years","ago","our","fathers","brought"]}, | |
%{2,["forth","on","this","continent","a","new","nation","conceived","in","Liberty"]}] | |
-spec index_words(string()) -> list(). | |
index_words(File) -> | |
F = get_file_contents(File), | |
L = get_tokens(F,[]), | |
M = add_index(L,[],1), | |
N = lists:umerge(get_tokens(F,[])), | |
search_words(N,M,[]). | |
%sear word by word of the list N. | |
-spec search_words(list(),list(),[T]) -> [T]. | |
search_words([],_,Acc) -> Acc; | |
search_words([H|T],M,Acc) -> search_words(T,M,[{H,text_coincidences(H,M,[])}|Acc]). | |
%generates the list of lines where a word is. | |
%for example | |
%{foo, [1,2,3,6,7]} | |
%an then calls tuple_list/4 wich transforms this in to: | |
%{foo, [{1,3},{6,7}]} | |
% | |
-spec text_coincidences(list(),list(),[T]) -> [T]. | |
text_coincidences(_,[],[V|_] = Acc) -> tuple_list(lists:reverse(Acc),V,V,[]); | |
text_coincidences(H,[{X,L}|T],Acc) -> | |
case lists:member(H,L) of | |
true -> text_coincidences(H,T,[X|Acc]); | |
false -> text_coincidences(H,T,Acc) | |
end. | |
% | |
% | |
%function that from a list of lines, for exmaple: [1,2,3,6,7] | |
%returns the list of tuples,for example: [{1,3},{6,7}] | |
-spec tuple_list(list(),integer(),integer(),[T]) -> [T]. | |
tuple_list([],_,_,Acc) -> lists:keysort(1,Acc); | |
tuple_list([H|T],X,Y,Acc) when H == Y + 1 -> | |
tuple_list(T,X,H,Acc); | |
tuple_list([H|T],X,Y,Acc) -> | |
tuple_list(T,H,H,[{X,Y}|Acc]). | |
%test function, be sure that the "getisburg.txt" file is the folder. | |
test() -> | |
[{"whether",[{5,5}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"testing",[{5,5}]}, | |
{"war",[{5,5},{7,7}]}, | |
{"lives",[{9,9}]}, | |
{"their",[{9,9}]}, | |
{"gave",[{9,9},{23,23}]}, | |
{"here",[{9,9},{15,15},{17,21},{24,24}]}, | |
{"who",[{9,9},{15,15},{20,20}]}, | |
{"final",[{8,8}]}, | |
{"civil",[{5,5}]}, | |
{"great",[{5,5},{7,7},{21,21}]}, | |
{"brought",[{1,1}]}, | |
{"fathers",[{1,1}]}, | |
{"our",[{1,1},{16,16}]}, | |
{"ago",[{1,1}]}, | |
{"years",[{1,1}]}, | |
{"work",[{19,19}]}, | |
{"here",[{9,9},{15,15},{17,21},{24,24}]}, | |
{"say",[{17,17}]}, | |
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]}, | |
{"what",[{18,18}]}, | |
{"remember",[{17,17}]}, | |
{"long",[{7,7},{17,17}]}, | |
{"nor",[{17,17}]}, | |
{"note",[{17,17}]}, | |
{"little",[{17,17}]}, | |
{"will",[{17,17}]}, | |
{"world",[{17,17}]}, | |
{"have",[{8,8},{15,15},{20,20},{26,26}]}, | |
{"here",[{9,9},{15,15},{17,21},{24,24}]}, | |
{"struggled",[{15,15}]}, | |
{"who",[{9,9},{15,15},{20,20}]}, | |
{"ground",[{14,14}]}, | |
{"this",[{2,2},{11,11},{14,14},{26,26}]}, | |
{"hallow",[{14,14}]}, | |
{"not",[{13,14},{25,25},{28,28}]}, | |
{"detract",[{16,16}]}, | |
{"or",[{6,6},{16,16}]}, | |
{"can",[{7,7},{13,14},{18,18}]}, | |
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]}, | |
{"consecrate",[{14,14}]}, | |
{"not",[{13,14},{25,25},{28,28}]}, | |
{"can",[{7,7},{13,14},{18,18}]}, | |
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]}, | |
{"dedicate",[{8,8},{13,13}]}, | |
{"not",[{13,14},{25,25},{28,28}]}, | |
{"can",[{7,7},{13,14},{18,18}]}, | |
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]}, | |
{"this",[{2,2},{11,11},{14,14},{26,26}]}, | |
{"do",[{11,11}]}, | |
{"should",[{11,11}]}, | |
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"sense",[{13,13}]}, | |
{"larger",[{13,13}]}, | |
{"advanced",[{20,20}]}, | |
{"nobly",[{20,20}]}, | |
{"so",[{6,6},{20,20}]}, | |
{"far",[{16,16},{20,20}]}, | |
{"thus",[{20,20}]}, | |
{"have",[{8,8},{15,15},{20,20},{26,26}]}, | |
{"here",[{9,9},{15,15},{17,21},{24,24}]}, | |
{"fought",[{20,20}]}, | |
{"who",[{9,9},{15,15},{20,20}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"highly",[{24,24}]}, | |
{"here",[{9,9},{15,15},{17,21},{24,24}]}, | |
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]}, | |
{"vain",[{25,25}]}, | |
{"gave",[{9,9},{23,23}]}, | |
{"they",[{18,18},{20,20},{23,23}]}, | |
{"which",[{20,20},{23,23}]}, | |
{"they",[{18,18},{20,20},{23,23}]}, | |
{"which",[{20,20},{23,23}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"us",[{18,18},{22,22}]}, | |
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]}, | |
{"is",[{10,10},{18,18},{21,21}]}, | |
{"add",[{16,16}]}, | |
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]}, | |
{"power",[{16,16}]}, | |
{"poor",[{16,16}]}, | |
{"our",[{1,1},{16,16}]}, | |
{"dead",[{15,15},{22,22},{25,25}]}, | |
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]}, | |
{"living",[{15,15},{19,19}]}, | |
{"men",[{3,3},{15,15}]}, | |
{"above",[{16,16}]}, | |
{"far",[{16,16},{20,20}]}, | |
{"it",[{16,16},{18,18}]}, | |
{"brave",[{15,15}]}, | |
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]}, | |
{"in",[{2,2},{5,5},{13,13},{25,25}]}, | |
{"\\consecrated",[{16,16}]}, | |
{"The",[{15,15},{17,17}]}, | |
{"It",[{10,10},{18,18},{21,21}]}, | |
{"here",[{9,9},{15,15},{17,21},{24,24}]}, | |
{"did",[{18,18}]}, | |
{"they",[{18,18},{20,20},{23,23}]}, | |
{"what",[{18,18}]}, | |
{"take",[{22,22}]}, | |
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]}, | |
{"equal",[{3,3}]}, | |
{"created",[{3,3}]}, | |
{"are",[{3,3},{5,5},{7,7}]}, | |
{"men",[{3,3},{15,15}]}, | |
{"all",[{3,3}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"proposition",[{3,3}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]}, | |
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]}, | |
{"as",[{8,8}]}, | |
{"field",[{8,8}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"of",[{7,8},{24,24},{27,27}]}, | |
{"portion",[{8,8}]}, | |
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]}, | |
{"dedicate",[{8,8},{13,13}]}, | |
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]}, | |
{"those",[{9,9}]}, | |
{"proper",[{10,10}]}, | |
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]}, | |
{"fitting",[{10,10}]}, | |
{"altogether",[{10,10}]}, | |
{"is",[{10,10},{18,18},{21,21}]}, | |
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]}, | |
{"in",[{2,2},{5,5},{13,13},{25,25}]}, | |
{"engaged",[{5,5}]}, | |
{"are",[{3,3},{5,5},{7,7}]}, | |
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]}, | |
{"war",[{5,5},{7,7}]}, | |
{"earth",[{28,28}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"from",[{22,22},{28,28}]}, | |
{"perish",[{28,28}]}, | |
{"not",[{13,14},{25,25},{28,28}]}, | |
{"shall",[{25,26},{28,28}]}, | |
{"people",[{28,28}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]}, | |
{"people",[{28,28}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"people",[{28,28}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"of",[{7,8},{24,24},{27,27}]}, | |
{"government",[{27,27}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"dead",[{15,15},{22,22},{25,25}]}, | |
{"honored",[{22,22}]}, | |
{"these",[{22,22},{25,25}]}, | |
{"from",[{22,22},{28,28}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"us",[{18,18},{22,22}]}, | |
{"task",[{21,21}]}, | |
{"great",[{5,5},{7,7},{21,21}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]}, | |
{"dedicated",[{3,3},{6,6},{19,19},{21,21}]}, | |
{"here",[{9,9},{15,15},{17,21},{24,24}]}, | |
{"birth",[{26,26}]}, | |
{"new",[{2,2},{26,26}]}, | |
{"be",[{19,19},{21,21}]}, | |
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]}, | |
{"us",[{18,18},{22,22}]}, | |
{"unfinished",[{19,19}]}, | |
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]}, | |
{"cause",[{23,23}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]}, | |
{"before",[{22,22}]}, | |
{"remaining",[{22,22}]}, | |
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]}, | |
{"rather",[{19,19},{21,21}]}, | |
{"is",[{10,10},{18,18},{21,21}]}, | |
{"devotion",[{24,24}]}, | |
{"increased",[{23,23}]}, | |
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]}, | |
{"freedom",[{27,27}]}, | |
{"of",[{7,8},{24,24},{27,27}]}, | |
{"by",[{28,28}]}, | |
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]}, | |
{"have",[{8,8},{15,15},{20,20},{26,26}]}, | |
{"shall",[{25,26},{28,28}]}, | |
{"It",[{10,10},{18,18},{21,21}]}, | |
{"God",[{26,26}]}, | |
{"under",[{26,26}]}, | |
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]}, | |
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]}, | |
{"here",[{9,9},{15,15},{17,21},{24,24}]}, | |
{"dedicated",[{3,3},{6,6},{19,19},{21,21}]}, | |
{"be",[{19,19},{21,21}]}, | |
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]}, | |
{"nation",[{2,2},{6,6},{10,10},{26,26}]}, | |
{"this",[{2,2},{11,11},{14,14},{26,26}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"dedicated",[{3,3},{6,6},{19,19},{21,21}]}, | |
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]}, | |
{"conceived",[{2,2},{6,6}]}, | |
{"so",[{6,6},{20,20}]}, | |
{"nation",[{2,2},{6,6},{10,10},{26,26}]}, | |
{"any",[{6,6}]}, | |
{"or",[{6,6},{16,16}]}, | |
{"nation",[{2,2},{6,6},{10,10},{26,26}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"of",[{7,8},{24,24},{27,27}]}, | |
{"field",[{8,8}]}, | |
{"battle",[{7,7}]}, | |
{"great",[{5,5},{7,7},{21,21}]}, | |
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]}, | |
{"on",[{2,2},{7,7}]}, | |
{"met",[{7,7}]}, | |
{"are",[{3,3},{5,5},{7,7}]}, | |
{"We",[{8,8}]}, | |
{"endure",[{7,7}]}, | |
{"long",[{7,7},{17,17}]}, | |
{"can",[{7,7},{13,14},{18,18}]}, | |
{"Now",[{5,5}]}, | |
{"Liberty",[{2,2}]}, | |
{"in",[{2,2},{5,5},{13,13},{25,25}]}, | |
{"conceived",[{2,2},{6,6}]}, | |
{"nation",[{2,2},{6,6},{10,10},{26,26}]}, | |
{"new",[{2,2},{26,26}]}, | |
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]}, | |
{"continent",[{2,2}]}, | |
{"this",[{2,2},{11,11},{14,14},{26,26}]}, | |
{"in",[{2,2},{5,5},{13,13},{25,25}]}, | |
{"died",[{25,25}]}, | |
{"have",[{8,8},{15,15},{20,20},{26,26}]}, | |
{"not",[{13,14},{25,25},{28,28}]}, | |
{"shall",[{25,26},{28,28}]}, | |
{"dead",[{15,15},{22,22},{25,25}]}, | |
{"these",[{22,22},{25,25}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"seven",[{1,1}]}, | |
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]}, | |
{"score",[{1,1}]}, | |
{"on",[{2,2},{7,7}]}, | |
{"forth",[{2,2}]}, | |
{"dedicated",[{3,3},{6,6},{19,19},{21,21}]}, | |
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]}, | |
{"It",[{10,10},{18,18},{21,21}]}, | |
{"live",[{10,10}]}, | |
{"might",[{10,10}]}, | |
{"nation",[{2,2},{6,6},{10,10},{26,26}]}, | |
{"that",[{3,3},{6,11},{27,27}]}, | |
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]}, | |
{"place",[{9,9}]}, | |
{"resting",[{9,9}]}, | |
{"resolve",[{25,25}]}, | |
{"rather",[{19,19},{21,21}]}, | |
{"devotion",[{24,24}]}, | |
{"of",[{7,8},{24,24},{27,27}]}, | |
{"forget",[{18,18}]}, | |
{"never",[{18,18}]}, | |
{"measure",[{24,24}]}, | |
{"living",[{15,15},{19,19}]}, | |
{"full",[{24,24}]}, | |
{"last",[{24,24}]}, | |
{"can",[{7,7},{13,14},{18,18}]}, | |
{"it",[{16,16},{18,18}]}, | |
{"come",[{8,8}]}, | |
{"have",[{8,8},{15,15},{20,20},{26,26}]}, | |
{"but",[{18,18}]}, | |
{"We",[{8,8}]}, | |
{"Four",[{1,1}]}, | |
{"But",[{13,13}]}] = index:index_words("getisburg.txt"), | |
ok. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment