Last active
July 24, 2021 19:36
-
-
Save amustaque97/0dc27cf18e28199f5a0d950d3fe44e64 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Online Python compiler (interpreter) to run Python online. | |
# Write Python 3 code in this online editor and run it. | |
from heapq import * | |
def minimum_cost_get_pattern(string, pattern): | |
n = len(string)+1 | |
m = len(pattern)+1 | |
distance = [[0 for x in range(n)] for x in range(m)] | |
for x in range(n): | |
distance[0][x] = x | |
for x in range(m): | |
distance[x][0] = x | |
for x in range(1, m): | |
for y in range(1, n): | |
cost = 1 if pattern[x-1] != string[y-1] else 0 | |
distance[x][y] = min([distance[x-1][y] + 1, distance[x][y-1] + 1, distance[x-1][y-1] + cost]) | |
return distance[m-1][n-1] | |
# list of string for matching | |
string_list = set(['Lorem', 'Ipsum', 'is', 'simply', 'dummy', 'text', 'of', 'the', 'printing', 'and', 'typesetting', 'industry.', 'Lorem', 'Ipsum', 'has', 'been', 'the', 'industrys', 'standard', 'dummy', 'text', 'ever', 'since', 'the', '1500s', 'when', 'an', 'unknown', 'printer', 'took', 'a', 'galley', 'of', 'type', 'and', 'scrambled', 'it', 'to', 'make', 'a', 'type', 'specimen', 'book.', 'It', 'has', 'survived', 'not', 'only', 'five', 'centuries', 'but', 'also', 'the', 'leap', 'into', 'electronic', 'typesetting', 'remaining', 'essentially', 'unchanged.', 'It', 'was', 'popularised', 'in', 'the', '1960s', 'with', 'the', 'release', 'of', 'Letraset', 'sheets', 'containing', 'Lorem', 'Ipsum', 'passages', 'and', 'more', 'recently', 'with', 'desktop', 'publishing', 'software', 'like', 'Aldus', 'PageMaker', 'including', 'versions', 'of', 'Lorem', 'Ipsum', 'lrem', 'rem', 'lreom']) | |
pattern = 'rem' | |
# since we need to find an approximate pattern where cost is minimum. | |
# Min heap/Min priority queue will work best in our case. | |
heap = [] | |
for string in string_list: | |
cost = minimum_cost_get_pattern(string, pattern) | |
heappush(heap, (cost, string)) | |
# print most relevant 5 results | |
for _ in range(5): | |
print(heappop(heap)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment