Created
February 5, 2020 13:13
-
-
Save NielsLiisberg/7881bd320eb281b566dab44c7ceeb50e to your computer and use it in GitHub Desktop.
SQL implementation of the ”Longest common subsequence” algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This implements the ”Longest common subsequence” algorithm by | |
-- loading two source physical file members into arrays. This | |
-- showcases the use of arrays in SQL but also the performance. | |
-- I.e. 1000 lines of code will compare up to 1.000.000 times, | |
-- hoewever, this runs quite fast because of the feature | |
-- to use memory in memory to do the magic. The LCS algorithm is i.e. used | |
-- in GIT for tracking changes. | |
-- | |
-- Read more here: | |
-- https://en.wikipedia.org/wiki/Longest_common_subsequence_problem | |
------------------------------------------------------------------- | |
-- qgpl is just used here. Use your own library | |
cl:addlible qgpl; | |
create type qgpl.lcsSrcArr as char(200) array[]; | |
create type qgpl.lcsSmallintArr as smallint array[]; | |
create or replace procedure qgpl.Longest_common_subsequence () | |
no external action | |
set option dbgview = *source, commit=*none | |
begin | |
declare arrNew lcsSrcArr; | |
declare arrOld lcssrcArr; | |
declare arrLenNew int; | |
declare arrLenOld int; | |
declare relNew lcsSmallintArr; | |
declare relOld lcsSmallintArr; | |
declare ixNew int; | |
declare ixOld int; | |
declare ixCntNew int; | |
declare ixCntOld int; | |
declare cnt int; | |
declare topOld int; | |
declare topNew int; | |
declare maxcnt int default 0; | |
declare i int; | |
declare j int; | |
-- load the source files into the dynamic arrays | |
set arrNew = (select array_agg(srcdta) from xnew ); | |
set arrOld = (select array_agg(srcdta) from xold ); | |
-- Get the length of each array | |
set arrLenNew = cardinality(arrNew); | |
set arrLenOld = cardinality(arrOld); | |
-- Initialization is required, since random update | |
-- is unfortunately not allowed in dynamic arrays | |
set i =1; | |
while i <= arrLenNew do | |
set relNew [i] = null; | |
set i = i + 1; | |
end while; | |
set i =1; | |
while i <= arrLenOld do | |
set relOld [i] = null; | |
set i = i + 1; | |
end while; | |
-- Keep on until all sequences are found | |
-- and sorted with longest sequence first | |
repeat | |
set ixNew = 1; | |
set maxcnt =0; | |
while ixNew <= arrLenNew do | |
if relNew[ixNew] is null then | |
set ixOld =1; | |
while ixOld <= arrLenOld do | |
if relOld[ixOld] is null and arrNew[ixNew] = arrOld[ixOld] then | |
set cnt = 0; | |
repeat | |
set cnt = cnt + 1; | |
set ixNew = ixNew +1; | |
set ixOld = ixOld +1; | |
until ixNew > arrLenNew or ixOld > arrLenOld or arrNew[ixNew] <> arrOld[ixOld] | |
end repeat; | |
if cnt > maxcnt then | |
set maxcnt = cnt; | |
set topNew = ixNew - cnt; | |
set topOld = ixOld - cnt; | |
--insert into trace (values('max ' || maxcnt)); | |
end if; | |
set ixNew = ixNew -1; | |
set ixOld = ixOld -1; | |
end if; | |
set ixOld = ixOld +1; | |
end while; | |
end if; | |
set ixNew = ixNew +1; | |
end while; | |
-- flag all compared values, so we dont test/usem them again | |
set i = 0; | |
while i < maxcnt do | |
set relNew [topNew + i] = topOld + i; | |
set relOld [topOld + i] = topNew + i; | |
set i = i + 1; | |
end while; | |
--insert into trace (text) select 'new' || t.n from unnest (relNew) as T(n); | |
--insert into trace (text) select 'old' || t.n from unnest (relOld) as T(n); | |
until maxcnt =0 | |
end repeat; | |
-- This the result | |
insert into trace (text) select t.n from unnest (relNew) as T(n); | |
end; | |
-- Build test case: | |
-- Swap the sequence of a,b,c and d,e and insert an X in between | |
cl: crtsrcpf file(qtemp/xold) mbr(*file); | |
insert into xold (srcdta) values ('a'), ('b'), ('c'), ('d'), ('e'); | |
cl:crtsrcpf file(qtemp/xnew) mbr(*file); | |
insert into xnew (srcdta) values ('d'), ('e'),('X'),('a'), ('b'), ('c'); | |
-- Simple trace file | |
create or replace table qgpl.trace ( | |
text varchar(4096) | |
); | |
-- Run the test | |
call qgpl.Longest_common_subsequence(); | |
-- Show the result: | |
-- This is the rownumbers in "old" where the exists in "new" | |
select * from trace; | |
-- reset test: | |
cl: dltf qgpl/trace; | |
cl: dltf qtemp/xold; | |
cl: dltf qtemp/xnew; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment