Skip to content

Instantly share code, notes, and snippets.

@pcarrier
Created July 30, 2009 02:15
Show Gist options
  • Save pcarrier/158505 to your computer and use it in GitHub Desktop.
Save pcarrier/158505 to your computer and use it in GitHub Desktop.
function pack(...)
return arg
end --function
function as_table(s)
return pack(s:byte(1, #s))
end --function
function min(t)
local min = t[1]
for _, v in ipairs(t) do
if(v < min) then min = v end
end --for
return min
end --function
-- Levenshtein distance
function distance_between_words(t1,t2)
local l1 = #t1
local l2 = #t2
local mt = {}
local cost = 0
-- Initial matrix
for i = 0, l1 do
mt[i] = {}
mt[i][0] = i
end --for
for j = 0, l2 do
mt[0][j] = j
end --for
for j = 1, l2 do
for i = 1, l1 do
if(t1[i] == t2[j])
then cost = 0
else cost = 1
end --if
mt[i][j] = min{
mt[i-1][j] + 1, -- insertion
mt[i][j-1] + 1, -- deletion
mt[i-1][j-1] + cost -- substitution
}
end --for i
end --for j
return mt[l1][l2]
end --function
-- Loads an Unix-like dictionary in words
f = io.open(arg[1],"r")
local words = {}
for line in f:lines() do
words[#words + 1] = line
end --for
-- KISS
local r = {}
local mine = arg[2]
local mine_as_table = as_table(mine)
for k, w in ipairs(words) do
r[k] = {w, distance_between_words(mine_as_table, as_table(w))}
end --for
function r_order(a, b)
return (a[2] < b[2])
end --function
table.sort(r, r_order)
for _, wc in ipairs(r) do
print(wc[1], wc[2])
end --for
--[[
-- For each word, this evaluates its distance from every other word
local total = #words
local dist = {}
local t1
local t2
for counter, w1 in pairs(words) do
print("Done "..(counter/total))
dist[w1] = {}
t1 = as_table(w1)
for _, w2 in pairs(words) do
if(w2 ~= w1) then
if(dist[w2]) then -- already computed distances for w2
dist[w1][w2] = dist[w2][w1]
else
t2 = as_table(w2)
dist[w1][w2] = dist_between_words(t1,t2)
end --if
end --if
end --for w2
end --for w1
--]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment