Created
March 14, 2010 15:40
-
-
Save kennytm/332041 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.List | |
import System | |
-- Encode a character range as regex. | |
bs :: Char -> Char -> String | |
bs '0' '9' = "\\d" | |
bs 'A' y = bs '1' y | |
bs x y | y == x = [x] | |
| y == succ x = ['[', x, y, ']'] | |
| True = ['[', x, '-', y, ']'] | |
-- The regex for a rule repeated some number of times. | |
nDigs2 :: Int -> Int -> String -> String | |
nDigs2 1 1 z = z | |
nDigs2 1 2 z = z ++ z ++ "?" | |
nDigs2 2 2 z = z ++ z | |
nDigs2 x y z | y == x = z ++ '{' : show x ++ "}" | |
| True = z ++ '{' : show x ++ ',' : show y ++ "}" | |
nDigs :: String -> String | |
nDigs xs = nDigs2 (length xs) (length xs) "\\d" | |
nDigsN :: (String -> String -> [String]) -> String -> String -> String -> [String] | |
nDigsN f px (x:xs) rep = (px ++ nDigs2 (1+length right) (1+length xs) rep) : f px right | |
where right = dropWhile (x==) xs | |
-- Integer part encoder -------------------------------------------------------- | |
-- 3xxx ~ 9999 = 3(xxx ~ 999) | [4-9]\d{3} | |
iPartL :: String -> String -> [String] | |
iPartL px [x] = [px ++ bs x '9'] | |
iPartL px y@('@':xs) = nDigsN iPartL px y "\\d" | |
iPartL px ( x :xs) = (px ++ bs (succ x) '9' ++ nDigs xs) : iPartL (px ++ [x]) xs | |
-- 0000 ~ 4xxx = 4(000 ~ xxx) | [0-3]?\d{3} | |
iPartR :: String -> String -> [String] | |
iPartR px [x] = [px ++ bs '0' x] | |
iPartR px ('0':xs) = iPartR (px ++ "0") xs | |
iPartR px ( x :xs) = (px ++ bs '0' (pred x) ++ nDigs xs) : iPartR (px ++ [x]) xs | |
-- 2xxx ~ 7yyy = 2xxx ~ 2999 | [3-6]\d{3} | 7000 ~ 7yyy | |
iPartA :: String -> String -> String -> [String] | |
iPartA px [x] [y] = [px ++ bs x y] | |
iPartA px (x:xs) (y:ys) | y == x = iPartA (px ++ [x]) xs ys | |
| y == succ x = boundary | |
| True = (px ++ bs (succ x) (pred y) ++ nDigs xs) : boundary | |
where boundary = iPartL (if x /= '@' then px ++ [x] else px) xs ++ iPartR (px ++ [y]) ys | |
-- Fractional part encoder ----------------------------------------------------- | |
-- 1.3xx ~ 1.999... = 1.3(xx ~ 99...) | 1.[4-9] | |
fPartL :: String -> String -> [String] | |
fPartL px "" = [px] | |
fPartL px [x] = [px ++ bs x '9'] | |
fPartL px (x:xs) = (px ++ bs (succ x) '9') : fPartL (px ++ [x]) xs | |
-- 1.000... ~ 1.3xx = 1.[0-2] | 1.3(00... ~ xx) | |
fPartR :: String -> String -> [String] | |
fPartR px "" = [] | |
fPartR px [x] = [px ++ '(': bs '0' (pred x) ++ "\\d*)?"] | |
fPartR px y@('0':xs) = nDigsN fPartR px y "0" | |
fPartR px (x:xs) = fPartR px [x] ++ fPartR (px ++ [x]) xs | |
-- 1.2xx ~ 1.7yy = 1.2xx ~ 1.299... | 1.[3-6] | 1.700 ~ 1.7yy | |
fPartA :: String -> String -> String -> [String] | |
fPartA px "" ys = fPartR px ys | |
fPartA px (x:xs) (y:ys) | y == x = fPartA (px ++ [x]) xs ys | |
| y == succ x = boundary | |
| True = (px ++ bs (succ x) (pred y)) : boundary | |
where boundary = fPartL (px ++ [x]) xs ++ fPartR (px ++ [y]) ys | |
-- Main encoder ---------------------------------------------------------------- | |
-- Create a regex that has a required integer and (maybe) optional fractional part. | |
matchDec :: String -> String -> String -> String | |
matchDec "" yi "" = yi ++ "\\.?" | |
matchDec "" yi yf = yi ++ "(\\." ++ yf ++ ")?" | |
matchDec _ yi yf = yi ++ "\\." ++ yf | |
-- Match multiple rules by or-ing them. | |
collect :: [String] -> String | |
collect [] = "" | |
collect [xs] = xs | |
collect xss = '(' : intercalate "|" xss ++ ")" | |
-- Regex to match number given integer and fraction parts of the bounds. | |
range' :: (String, String) -> (String, String) -> String | |
range' (xi, xf) (yi, yf) | yi == xi && xf == yf = matchDec xf xi $ xf++"0*" | |
| yi == xi = xi ++ "(\\." ++ collect (fPartA "" xf yf) ++ "\\d*|\\." ++ yf ++ "0*)" ++ if xf /= "" then "" else "?" | |
| yi == x1 = boundary | |
| True = boundary ++ '|' : collect (iPartA "" (replicate (length y1 - length x1) '@' ++ x1) y1) ++ "(\\.\\d*)?" | |
where boundary = matchDec "?" xi (collect (fPartL "" xf)) ++ "\\d*|" ++ | |
matchDec "" yi (collect ((yf ++ "0*") : fPartR "" yf)) | |
x1 = show $ read xi + 1 | |
y1 = show $ read yi - 1 | |
range :: String -> String -> String | |
range xs ys = range' (splitByDec xs) (splitByDec ys) | |
where splitByDec = (\(x,y)->(x,tail y)) . span(/='.') | |
main :: IO() | |
main = do | |
args <- getArgs | |
putStrLn $ if length args >= 2 then | |
range (args!!0) (args!!1) | |
else | |
"Usage: float-matching-regex <lower-bound> <upper-bound>" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 1,279 chars | |
import Data.List | |
import System | |
infixr 5& | |
b=True | |
(&)=(++) | |
d=length | |
j=show | |
a="\\d" | |
g="\\." | |
h=(==).succ | |
'0'?'9'=a | |
'A'?y='1'?y | |
x?y|y==x=[x]|h y x=['[',x,y,']']|b=['[',x,'-',y,']'] | |
x~<y=succ x?pred y | |
c 1 1z=z | |
c 1 2z=z&z&"?" | |
c 2 2z=z&z | |
c x y z|y==x=z&'{':j x&"}"|b=z&'{':j x&',':j y&"}" | |
e x=c(d x)(d x)a | |
f v w(x:y)z=(w&c(1+d u)(1+d y)z):v w u where u=dropWhile(x==)y | |
i z[x]=[z&x?'9'] | |
i z x@('@':y)=f i z x a | |
i z(x:y)=(z&x~<':'&e y):i(z&[x])y | |
k z[x]=[z&'0'?x] | |
k z('0':y)=k(z&"0")y | |
k z(x:y)=(z&'/'~<x&e y):k(z&[x])y | |
l z[x][y]=[z&x?y] | |
l z(x:u)(y:v)|y==x=l(z&[x])u v|h y x=w|b=(z&x~<y&e u):w where w=i(if x/='@'then z&[x]else z)u&k(z&[y])v | |
m z""=[z] | |
m z[x]=[z&x?'9'] | |
m z(x:u)=(z&x~<':'):m(z&[x])u | |
z%""=[] | |
z%[x]=[z&'(':'/'~<x&a&"*)?"] | |
z%x@('0':u)=f(%)z x"0" | |
z%(x:u)=z%[x]&(z&[x])%u | |
o z""ys=z%ys | |
o z(x:u)(y:v)|y==x=o(z&[x])u v|h y x=w|b=(z&x~<y):w where w=m(z&[x])u&(z&[y])%v | |
p""x""=x&g&"?" | |
p""x y=x&'(':g&y&")?" | |
p _ x y=x&g&y | |
q[]="" | |
q[x]=x | |
q x='(':intercalate"|"x&")" | |
r(x,u)(y,v)|y==x&&u==v=p u x$u&"0*"|y==x=x&'(':g&q(o""u v)&a&"*|"&g&v&"0*)"&if u/=""then""else"?"|y==w=z|b=z&'|':q(l""(replicate(d t-d w)'@'&w)t)&'(':g&a&"*)?"where z=p"?"x(q$m""u)&a&"*|"&p""y(q$(v&"0*"):""%v);w=j$read x+1;t=j$read y-1 | |
main=getArgs>>=(\x->putStrLn$r(w$x!!0)$w$x!!1)where w=(\(x,y)->(x,tail y)).span(/='.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment