Skip to content

Instantly share code, notes, and snippets.

@leongkui
Last active July 6, 2023 23:44
Show Gist options
  • Save leongkui/c69c840eda526243017cdb19d1551ed2 to your computer and use it in GitHub Desktop.
Save leongkui/c69c840eda526243017cdb19d1551ed2 to your computer and use it in GitHub Desktop.
Fuzzy String Matching and Removal
# fuzzy_removal will scan through the text and replace the text with replacement if the text is similar to the target
# default replacement is <Patient Name>
# default similarity score is 80
def fuzzy_removal(
text: str,
target: str,
replacement: str='<Patient Name>',
min_score: int=80,
debug: bool=False,
) -> str:
final_texts = ''
target_length = len(target.split(' '))
words = text.split(' ')
full_length = len(words)
# iterate through texts list and form a string of words with length equal to target_length plus 2
# compare the string with target_name and print match if matched
i: int = 0
if debug:
print("text=%s" % text)
print("Full length = %d" % full_length)
final_texts = text
while i < full_length:
breakout = False
for increment_length in range(0, target_length + 2):
final_pos = i + increment_length
if final_pos > full_length:
final_pos = full_length
text = ' '.join(words[i:final_pos])
score = fuzz.ratio(text, target)
if debug:
print("comparing (%s) with (%s), score = %d" % (text, target, score))
if score > min_score:
if debug:
print('Match: %s' % text)
i = final_pos
final_texts = final_texts.replace(text, replacement)
# this is to break out of the for loop and continue with the while loop
breakout = True
break
if breakout:
continue
i += 1
return final_texts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment