Created
September 9, 2022 15:00
-
-
Save glickmac/385a16e7de92c7cbe793c699b85a3bf6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def suffix_array(string): | |
return(list(sorted(range(len(string)), key=lambda i:string[i:]))) | |
def bwt_from_suffix(string, s_array=None): | |
if s_array is None: | |
s_array = suffix_array(string) | |
return("".join(string[idx - 1] for idx in s_array)) | |
def lf_mapping(bwt, letters=None): | |
if letters is None: | |
letters = set(bwt) | |
result = {letter:[0] for letter in letters} | |
result[bwt[0]] = [1] | |
for letter in bwt[1:]: | |
for i, j in result.items(): | |
j.append(j[-1] + (i == letter)) | |
return(result) | |
from collections import Counter | |
def count_occurences(string, letters=None): | |
count = 0 | |
result = {} | |
if letters is None: | |
letters = set(s) | |
c = Counter(string) | |
for letter in sorted(letters): | |
result[letter] = count | |
count += c[letter] | |
return result | |
def update(begin, end, letter, lf_map, counts, string_length): | |
beginning = counts[letter] + lf_map[letter][begin - 1] + 1 | |
ending = counts[letter] + lf_map[letter][end] | |
return(beginning,ending) | |
def generate_all(input_string, s_array=None, eos="$"): | |
letters = set(input_string) | |
try: | |
assert eos not in letters | |
counts = count_occurences(input_string, letters) | |
input_string = "".join([input_string, eos]) | |
if s_array is None: | |
s_array = suffix_array(input_string) | |
bwt = bwt_from_suffix(input_string, s_array) | |
lf_map = lf_mapping(bwt, letters | set([eos])) | |
for i, j in lf_map.items(): | |
j.extend([j[-1], 0]) | |
return letters, bwt, lf_map, counts, s_array | |
except: | |
print("End of string character found in text, deleted EOS from input string") | |
input_string = input_string.replace(eos, "") | |
letters = set(input_string) | |
counts = count_occurences(input_string, letters) | |
input_string = "".join([input_string, eos]) | |
if s_array is None: | |
s_array = suffix_array(input_string) | |
bwt = bwt_from_suffix(input_string, s_array) | |
lf_map = lf_mapping(bwt, letters | set([eos])) | |
for i, j in lf_map.items(): | |
j.extend([j[-1], 0]) | |
return letters, bwt, lf_map, counts, s_array |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment