-
-
Save iArnold/e75b3ae233fad0cb73dd3fc44fc8caa2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Red [ | |
Title: "Partial grep implementation" | |
Purpose: {To search the input for lines containing a match | |
to the given pattern, specified as a regular expression} | |
Author: "Rudolf W. MEIJER" | |
File: %grep.red | |
Version: 0.4.0 | |
Date: "24-Nov-2016" | |
Rights: "(c) Copyright 2016 Rudolf W. MEIJER" | |
History: [ | |
[0.0.0 "2-Nov-2017" {Start of project}] | |
[0.1.0 "9-Nov-2017" {First incomplete working version}] | |
[0.2.0 "22-Nov-2016" {Ranges and repetition implemented}] | |
[0.3.0 "23-Nov-2016" {Refactoring}] | |
[0.4.0 "24-Nov-2017" {Added /quiet option}] | |
] | |
Notes: {see GNU Grep 2.26, http://www.gnu.org/software/grep/manual/grep, | |
also the grep(1) Linux man page, e.g. https://linux.die.net/man/1/grep, | |
and https://en.wikipedia.org/wiki/Regular_expression | |
} | |
Language: 'English | |
] | |
;---|----1----|----2----|----3----|----4----|----5----|----6----|----7----|- | |
grep-impl: context [ | |
; charset constants | |
_ASCII: charset [#"^(00)" - #"^(7F)"] | |
_upper: charset [#"A" - #"Z"] | |
_lower: charset [#"a" - #"z"] | |
_alpha: union _upper _lower | |
_digit: charset [#"0" - #"9"] | |
_xdigit: union _digit charset [#"A" - #"F" #"a" - #"f"] | |
_alnum: union _alpha _digit | |
_blank: charset " ^-" | |
_space: union _blank charset [#"^(0A)" - #"^(0D)"] | |
_cntrl: charset [#"^(00)" - #"^(1F)" #"^(7F)"] | |
_punct: charset {!"#$%&'()*+,-./:;<=>?@[\]^^_`{|}~} | |
_graph: union _alnum _punct | |
_print: union _graph charset " " | |
_meta: charset "|()?*+.[]{}\" | |
_escape: charset "|()?*+.[{\" | |
_atom: intersect _ASCII complement _meta | |
char-classes: reduce [ | |
"upper" _upper | |
"lower" _lower | |
"alpha" _alpha | |
"digit" _digit | |
"xdigit" _xdigit | |
"alnum" _alnum | |
"blank" _blank | |
"space" _space | |
"cntrl" _cntrl | |
"punct" _punct | |
"graph" _graph | |
"print" _print | |
] | |
rules: none | |
options: context [ | |
case: none | |
invert: none | |
count: none | |
quiet: none | |
] | |
stats: context [ | |
files: 0 | |
lines: 0 | |
matched: 0 | |
time: 0 | |
] | |
make-rules: func [ | |
{takes a regex and constructs the corresponding Red parser rules | |
which will do the matching; returns the rules} | |
regex [string!] "the pattern (regex)" | |
/local res pattern rule-stk rng-nr limits ll lo hi fch frst rng r qf | |
ch nr cr el lc rst class clb chset escaped compl brexp | |
][ | |
unless parse regex [some _ASCII] [ | |
print "pattern error, non-ASCII character(s) found" | |
return none | |
] | |
; set up | |
pattern: copy regex | |
rule-stk: copy [] | |
rng-nr: 0 | |
res: copy [] | |
insert/only rule-stk res | |
escaped: false | |
; main loop | |
while [not tail? pattern][ | |
cr: first rule-stk ; current rule | |
ch: take pattern | |
case [ | |
any [ | |
escaped | |
find _atom ch | |
all [#")" = ch 1 = length? rule-stk] | |
all [#"\" = ch tail? pattern] | |
][ | |
case [ | |
any [ | |
empty? cr | |
not string? last cr | |
][ | |
insert tail cr form ch | |
] | |
string? last cr [ | |
insert tail last cr ch | |
] | |
] | |
escaped: false | |
] | |
#"\" = ch [ | |
unless find _escape first pattern [ | |
print ["pattern error: meta character expected after \, found" first pattern] | |
return none | |
] | |
escaped: true | |
] | |
#"." = ch [ | |
insert tail cr 'skip | |
] | |
#"(" = ch [ | |
insert/only tail cr nr: copy [] | |
insert/only rule-stk nr ; new rule | |
] | |
#")" = ch [ | |
remove rule-stk | |
] | |
#"|" = ch [ | |
insert tail cr '| | |
] | |
#"[" = ch [ | |
fch: none | |
frst: true | |
compl: false | |
rng: false | |
brexp: make bitset! 128 | |
while [not tail? pattern][ | |
ch: take pattern | |
if all [frst #"^^" = ch][compl: true frst: false continue] | |
if all [not frst #"]" = ch][ break] | |
frst: false | |
either all [not rng #"-" = ch] | |
[ | |
rng: true | |
][ | |
either rng | |
[ | |
either ch > fch | |
[ | |
brexp: union brexp charset compose [(fch) - (ch)] | |
rng: false | |
fch: none | |
][ | |
print ["pattern error: range inverted" fch "-" ch] | |
return none | |
] | |
][ | |
either #"["= ch | |
[ | |
either #":" = first pattern | |
[ | |
remove pattern | |
unless clb: find pattern ":]" [ ; closing bracket | |
print ["pattern error: expected :], found" pattern] | |
return none | |
] | |
class: take/part pattern clb | |
remove/part pattern 2 | |
unless chset: select char-classes class [ | |
print ["pattern error: wrong character class" class] | |
return none | |
] | |
brexp: union brexp chset | |
][ | |
print ["pattern error: expected :, found" first pattern] | |
return none | |
] | |
][ | |
either fch | |
[ | |
brexp: union brexp charset fch | |
fch: none | |
][ | |
fch: ch | |
] | |
brexp: union brexp charset ch | |
] | |
] | |
] | |
] | |
either #"]" <> ch | |
[ | |
print "pattern error: range not closed" | |
return none | |
][ | |
if compl [brexp: complement brexp] | |
rng-nr: rng-nr + 1 | |
r: to word! rejoin ["_range" rng-nr] | |
set r brexp | |
insert tail cr r | |
] | |
] | |
#"{" = ch [ | |
unless clb: find pattern #"}" [ | |
print ["pattern error: expected }, found" pattern] | |
return none | |
] | |
limits: split take/part pattern clb #"," | |
ll: length? limits | |
if any [ll < 1 ll > 2][ | |
print ["pattern error, expected one or two numbers, found" limits] | |
return none | |
] | |
unless lo: attempt [to integer! limits/1] [ | |
print ["pattern error: expected number, found" limits/1] | |
return none | |
] | |
unless lo >= 0 [ | |
print ["pattern error: negative number" lo] | |
return none | |
] | |
if 2 = ll [ | |
unless hi: attempt [to integer! limits/2] [ | |
print ["pattern error: expected number, found" limits/2] | |
return none | |
] | |
unless hi > lo [ | |
print ["pattern error: upper bound lower than lower one" hi] | |
return none | |
] | |
] | |
el: last cr | |
either string? el | |
[ | |
lc: last el | |
rst: head remove back tail el | |
either empty? rst | |
[ | |
remove back tail cr | |
][ | |
change back tail cr form rst | |
] | |
insert tail cr lo | |
if ll = 2 [insert tail cr hi] | |
insert tail cr form lc | |
][ | |
insert back tail cr lo | |
if ll = 2 [insert back tail cr hi] | |
] | |
] | |
any [ | |
#"?" = ch #"*" = ch #"+" = ch | |
][ | |
qf: switch ch [ | |
#"?" ['opt] | |
#"*" ['any] | |
#"+" ['some] | |
] | |
el: last cr | |
either string? el | |
[ | |
lc: last el | |
rst: head remove back tail el | |
either empty? rst | |
[ | |
remove back tail cr | |
][ | |
change back tail cr form rst | |
] | |
insert tail cr qf | |
insert tail cr form lc | |
][ | |
insert back tail cr qf | |
] | |
] | |
] | |
; print "after" | |
; print mold res | |
] | |
; final check for balanced ( ) | |
unless 1 = length? rule-stk [ | |
print "pattern error: unmatched parentheses" | |
return none | |
] | |
copy res | |
] | |
set 'grep func [ | |
{partial implementation of Unix/Linux grep pattern matching engine | |
documentation consulted: GNU Grep 2.26 and grep(1) - Linux man page} | |
pattern [string!] "the pattern (extended regular expression) to match" | |
input [string! file! url! block!] {a single string, possibly containing | |
newlines, a file, a url or a block of strings, or files and/or urls} | |
/case "case-sensitive comparison" | |
/invert "print only non-matching lines" | |
/count "print a count of (non-)matching lines only" | |
/quiet "suppress printing when no lines found" | |
/local fst tm | |
][ | |
if empty? input [ | |
print "empty input" | |
exit | |
] | |
if empty? pattern [ | |
print "error, no pattern specified" | |
exit | |
] | |
unless rules: make-rules pattern [ | |
print "error in rules" | |
exit | |
] | |
rules: head insert/only next copy [thru to end] rules | |
options/case: case | |
options/invert: invert | |
options/count: count | |
options/quiet: quiet | |
stats/lines: 0 | |
stats/files: 0 | |
stats/matched: 0 | |
stats/time: now/time/precise | |
switch type?/word input [ | |
string! [ | |
report split input #"^/" "matching text lines" | |
] | |
file! [ | |
either #"/" = first input | |
[ | |
process reduce [input] %"" | |
][ | |
process reduce [input] what-dir | |
] | |
] | |
url! [ | |
process reduce [input] %"" | |
] | |
block! [ | |
fst: first input | |
unless any [string? fst file? fst url? fst][ | |
print ["error, string, file or url expected:" fst] | |
exit | |
] | |
either string? fst | |
[ | |
report input "matching text lines" | |
][ | |
process input what-dir | |
] | |
] | |
] | |
tm: to integer! now/time/precise - stats/time * 1000 | |
prin ["done, checked" stats/lines "lines in "] | |
unless stats/files = 0 [ | |
prin [stats/files "files/urls in "] | |
] | |
print [tm "msec, found" stats/matched "matches"] | |
] | |
process: func [ | |
{process a block of files/urls; | |
if some of the files are directories, | |
process the files in them recursively} | |
input [block!] "block of files/urls" | |
pdir [file!] "parent directory for files" | |
/local lines src | |
][ | |
foreach src input [ | |
if file? src [src: append copy pdir src] | |
case [ | |
any [ | |
url? src | |
all [file? src not dir? src] | |
][ | |
either lines: attempt [read/lines src] | |
[ | |
report lines ["matching" fmt length? lines 5 "lines from" mold src] | |
][ | |
unless options/quiet [ | |
print [mold src "cannot be read, skipped"] | |
] | |
] | |
] | |
file? src [ ; this is a dir!!! | |
process read src src | |
] | |
true [ | |
print ["error, file or url expected, found:" type? src] | |
] | |
] | |
] | |
] | |
report: func [ | |
{match and report according to options} | |
lines [block!] "block of lines to match" | |
text [string! block!] "heading for each string/file/url" | |
/local buffer line res line-count i | |
][ | |
if any [not lines empty? lines ][exit] | |
if block? text [stats/files: stats/files + 1] | |
stats/lines: stats/lines + length? lines | |
buffer: copy [] | |
line-count: 0 | |
repeat i length? lines [ | |
line: lines/:i | |
unless string? line [ | |
print ["error, string expected, found:" type? line] | |
exit | |
] | |
res: either options/case [parse/case line rules][parse line rules] | |
if options/invert [res: not res] | |
if res [ | |
line-count: line-count + 1 | |
insert tail buffer rejoin [fmt i 5 " " line] | |
] | |
] | |
stats/matched: stats/matched + line-count | |
if any [ | |
line-count <> 0 | |
not options/quiet | |
][ | |
print text | |
either options/count | |
[ | |
print [fmt line-count 5 "matching line(s) found"] | |
][ | |
repeat i length? buffer [print buffer/:i] | |
] | |
] | |
] | |
fmt: func [ | |
{convert an integer to a right aligned string of given size; | |
pad with blanks on left; if too narrow, show ***} | |
i [integer!] "the integer to convert" | |
s [integer!] "the size in characters" | |
/local f lf | |
][ | |
s: max 1 s | |
f: form i | |
lf: length? f | |
either lf <= s | |
[ | |
f: head insert/dup f #" " s - lf | |
][ | |
f: head insert/dup copy "" #"*" s | |
] | |
] | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment