Created
November 29, 2012 05:50
-
-
Save Benjit87/4167054 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Original code from SAS Documents*/ | |
data test; | |
infile datalines missover; | |
input String1 $char8. +1 String2 $char8. +1 Operation $40.; | |
GED=compged(string1, string2); | |
datalines; | |
baboon baboon match | |
baXboon baboon insert | |
baoon baboon delete | |
baXoon baboon replace | |
baboonX baboon append | |
baboo baboon truncate | |
babboon baboon double | |
babon baboon single | |
baobon baboon swap | |
bab oon baboon blank | |
bab,oon baboon punctuation | |
bXaoon baboon insert+delete | |
bXaYoon baboon insert+replace | |
bXoon baboon delete+replace | |
Xbaboon baboon finsert | |
aboon baboon trick question: swap+delete | |
Xaboon baboon freplace | |
axoon baboon fdelete+replace | |
axoo baboon fdelete+replace+truncate | |
axon baboon fdelete+replace+single | |
baby baboon replace+truncate*2 | |
balloon baboon replace+insert | |
; | |
/* Edit Distance Matrix */ | |
data matrix1(keep=String1 String3 GED); | |
set test; | |
i = RowCount; | |
do i=1 to RowCount; | |
set test (Rename=(String1=String3)) | |
nobs=RowCount /*Give the total number of obs in the dataset*/ | |
Point=i; /* Use direct access to point to the obs by number*/ | |
GED = COMPGED(string1,string3); | |
output; | |
end; | |
run; | |
/* Edit Distance Matrix (Symmetrical) */ | |
data matrix2(keep=String1 String3 GED); | |
set test; | |
*make sure the cost function are the same for insert/delete, finsert/fdelete, append/truncate, double/single; | |
if _n_ = 1 then call compcost('truncate=',50); | |
do i=1 to RowCount; | |
if ( i < _N_) then do; | |
set test (Rename=(String1=String3)) | |
nobs=RowCount /*Give the total number of obs in the dataset*/ | |
Point=i; /* Use direct access to point to the obs by number*/ | |
GED = COMPGED(string1,string3); | |
output; | |
end; | |
end; | |
run; | |
/* Edit Distance Matrix SQL */ | |
proc sql; | |
create table matrixSQL as | |
select a.String1,b.String1, COMPGED(a.String1,b.String1) as GEDSCORE | |
from Work.test a, Work.test b; | |
quit; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment