mtao · March 28, 2018 18:21
diff --git a/get_five.sh b/get_five.sh
 if [ ! -e words.txt ]; then
    wget https://raw.githubusercontent.com/dwyl/english-words/master/words.txt > /dev/null 2&>1
    cat words.txt | tr '[:upper:]' '[:lower:]'| sort -u | grep -v "-" | grep -v "\." | grep -v "'" | grep -v "/" | grep -v "&" > /tmp/words_sorted.txt

 fi
 rm /tmp/my_dict.txt

 prefix="$1"
 grep "^$1" words.txt | head -n 5
 charset=$( cat /tmp/words_sorted.txt | grep -o . | sort -u | tr -d "\n" )

 charset_size=${#charset}

 return_size=5

 function oracle() {
    prefix="$1"
    grep "^$1" /tmp/words_sorted.txt | head -n $return_size
 }

 function get_char() {
    echo ${charset:$1:1}
 }

 function char_index() {
    expr $( expr index "$charset" "$1" ) - 1
 }

 #max similar substring from the beginning
 function max_similar_substring_len() {
    s1="$1"
    s2="$2"
    if [ ${#s1} -lt ${#s2} ]; then
        len=${#s1}
    else
        len=${#s2}
    fi
    for m in $( seq 0 $( expr ${len} - 1 ) )
    do
        if [ ${s1:${m}:1} != ${s2:${m}:1} ];
        then
            echo $m
            return
        fi
    done
    echo $len
 }

 #current prefix starts with the first character
 current_prefix=${charset:0:1}

 #last character in current prefix
 function prefix_end() {
    prefix_last_index=$( expr ${#current_prefix} - 1 )
    #last character of prefix
    echo ${current_prefix:${prefix_last_index}:1}
 }

 #remove last character in prefix
 function pop_prefix() {
    prefix_last_index=$( expr ${#current_prefix} - 1 )
    #Pop a prefix
    new_current_prefix="${current_prefix:0:${prefix_last_index}}"
    current_prefix="${new_current_prefix}"
 }


 #lexicographical increment
 function increment_prefix() {
    start_prefix=${current_prefix}
    if [ "${#current_prefix}" -eq 0 ] 
    then return
    fi
    tail=$( prefix_end )
    pop_prefix
    charidx=$( char_index ${tail} )
    nextidx=$( expr ${charidx} + 1 )
    if [ ${nextidx} -eq ${charset_size} ];
    then
        increment_prefix
    else
        nextchar=$( get_char ${nextidx} )
        current_prefix="${current_prefix}${nextchar}"
    fi
    #>&2 echo "${start_prefix} -> ${current_prefix}"

 }


 function oracle_last() {
    prefix="$1"
    res=$( oracle $prefix )
    >&2 echo $prefix: $res
    echo "$res" | tail -n 1

 }


 function next_prefix() {
    echo "Current prefix: ${current_prefix}"
    res=$( oracle ${current_prefix} )
    #if [ -n "${res}" ]
    #then >&2 echo "current:" ${res}
    #fi
    len=$( echo "$res" | wc -l )
    last2=$( echo "$res" | tail -n 2 )
    lastnd=$( echo "$last2" | head -n 1 )
    last=$( echo "$last2" | tail -n 1 )
    if [ $len -eq $return_size ];
    then
        #write to file
        echo "$res" | head -n $(expr ${return_size} - 1) >> /tmp/my_dict.txt
        
        #new prefix is the smallest prefix not shared by the last two elements
        new_prefix_len=$( expr $( max_similar_substring_len $last $lastnd ) + 1 )
        new_current_prefix="${last:0:${new_prefix_len}}"
        >&2 echo "Digging down: ${current_prefix} -> ${new_current_prefix}"
        current_prefix="${new_current_prefix}"
    else
        #write to file but beware of empty newlines by empty newwords
        newwords=${res}
        if [ -n "${newwords}" ]; then 
            echo "${newwords}" >> /tmp/my_dict.txt
        fi
        #jump up until the word-graph until there's something to the right and start searching to that right
        myprefix=${current_prefix}
        while [ "$( oracle_last ${myprefix} )" = "$last" ]
        do
            prefix_last_index=$( expr ${#myprefix} - 1 )
            #Pop a prefix
            new_myprefix="${myprefix:0:${prefix_last_index}}"
            myprefix="${new_myprefix}"
        done
        current_prefix=${current_prefix:0:$( expr ${#myprefix} + 1 )}
        increment_prefix
    fi


 }



 #char_loop_str=$( seq 0 $( expr ${charset_size} - 1 )  )
 #
 #for m in $char_loop_str;
 #do echo "$m) $( get_char $m ) $( char_index $( get_char $m ) )"
 #done

 while [ ${#current_prefix} -gt 0 ]
 do next_prefix
 done
	if [ ! -e words.txt ]; then
	wget https://raw.githubusercontent.com/dwyl/english-words/master/words.txt > /dev/null 2&>1
	cat words.txt \| tr '[:upper:]' '[:lower:]'\| sort -u \| grep -v "-" \| grep -v "\." \| grep -v "'" \| grep -v "/" \| grep -v "&" > /tmp/words_sorted.txt

	fi
	rm /tmp/my_dict.txt

	prefix="$1"
	grep "^$1" words.txt \| head -n 5
	charset=$( cat /tmp/words_sorted.txt \| grep -o . \| sort -u \| tr -d "\n" )

	charset_size=${#charset}

	return_size=5

	function oracle() {
	prefix="$1"
	grep "^$1" /tmp/words_sorted.txt \| head -n $return_size
	}

	function get_char() {
	echo ${charset:$1:1}
	}

	function char_index() {
	expr $( expr index "$charset" "$1" ) - 1
	}

	#max similar substring from the beginning
	function max_similar_substring_len() {
	s1="$1"
	s2="$2"
	if [ ${#s1} -lt ${#s2} ]; then
	len=${#s1}
	else
	len=${#s2}
	fi
	for m in $( seq 0 $( expr ${len} - 1 ) )
	do
	if [ ${s1:${m}:1} != ${s2:${m}:1} ];
	then
	echo $m
	return
	fi
	done
	echo $len
	}

	#current prefix starts with the first character
	current_prefix=${charset:0:1}

	#last character in current prefix
	function prefix_end() {
	prefix_last_index=$( expr ${#current_prefix} - 1 )
	#last character of prefix
	echo ${current_prefix:${prefix_last_index}:1}
	}

	#remove last character in prefix
	function pop_prefix() {
	prefix_last_index=$( expr ${#current_prefix} - 1 )
	#Pop a prefix
	new_current_prefix="${current_prefix:0:${prefix_last_index}}"
	current_prefix="${new_current_prefix}"
	}


	#lexicographical increment
	function increment_prefix() {
	start_prefix=${current_prefix}
	if [ "${#current_prefix}" -eq 0 ]
	then return
	fi
	tail=$( prefix_end )
	pop_prefix
	charidx=$( char_index ${tail} )
	nextidx=$( expr ${charidx} + 1 )
	if [ ${nextidx} -eq ${charset_size} ];
	then
	increment_prefix
	else
	nextchar=$( get_char ${nextidx} )
	current_prefix="${current_prefix}${nextchar}"
	fi
	#>&2 echo "${start_prefix} -> ${current_prefix}"

	}


	function oracle_last() {
	prefix="$1"
	res=$( oracle $prefix )
	>&2 echo $prefix: $res
	echo "$res" \| tail -n 1

	}


	function next_prefix() {
	echo "Current prefix: ${current_prefix}"
	res=$( oracle ${current_prefix} )
	#if [ -n "${res}" ]
	#then >&2 echo "current:" ${res}
	#fi
	len=$( echo "$res" \| wc -l )
	last2=$( echo "$res" \| tail -n 2 )
	lastnd=$( echo "$last2" \| head -n 1 )
	last=$( echo "$last2" \| tail -n 1 )
	if [ $len -eq $return_size ];
	then
	#write to file
	echo "$res" \| head -n $(expr ${return_size} - 1) >> /tmp/my_dict.txt

	#new prefix is the smallest prefix not shared by the last two elements
	new_prefix_len=$( expr $( max_similar_substring_len $last $lastnd ) + 1 )
	new_current_prefix="${last:0:${new_prefix_len}}"
	>&2 echo "Digging down: ${current_prefix} -> ${new_current_prefix}"
	current_prefix="${new_current_prefix}"
	else
	#write to file but beware of empty newlines by empty newwords
	newwords=${res}
	if [ -n "${newwords}" ]; then
	echo "${newwords}" >> /tmp/my_dict.txt
	fi
	#jump up until the word-graph until there's something to the right and start searching to that right
	myprefix=${current_prefix}
	while [ "$( oracle_last ${myprefix} )" = "$last" ]
	do
	prefix_last_index=$( expr ${#myprefix} - 1 )
	#Pop a prefix
	new_myprefix="${myprefix:0:${prefix_last_index}}"
	myprefix="${new_myprefix}"
	done
	current_prefix=${current_prefix:0:$( expr ${#myprefix} + 1 )}
	increment_prefix
	fi


	}



	#char_loop_str=$( seq 0 $( expr ${charset_size} - 1 ) )
	#
	#for m in $char_loop_str;
	#do echo "$m) $( get_char $m ) $( char_index $( get_char $m ) )"
	#done

	while [ ${#current_prefix} -gt 0 ]
	do next_prefix
	done