toomasv · November 20, 2017 18:22
diff --git a/regex.red b/regex.red
 Red [
 	Author: 	"Toomas Vooglaid"
 	file: 		"%regex.red"
 	Purpose: 	{Regex to parse converter}
 	History:	{Started: 2017-05-09
 				v.0.1: 2017-05-12 -- first working version with: 
 					start `^^` and end `$` anchors, 
 					non-capturing groups (one level) with alterns, 
 					quantifiers (possessive), 
 					some character-classes.
 				v.0.2: 2017-05-15 -- 
 					hack to avoid recursive calls, got nested groups,
 					added some wordclasses
 				v.0.3: 2017-05-16 --
 					made re-ctx reactive, 
 					implemented singleline and multiline refinements,
 					wordboundaries,
 					added some special characters
 				2017-05-17 --
 					added negative and positive on-spot charclasses, without subtraction
 				v.0.4: 2017-05-18 --
 					noncapturing groups (just correct syntax)
 					added capturing groups with backreferences to captured strings (\1, \2...)
 				2017-05-19 -- restructured the output: by default `regex` now just takes `re` and returns the spec block. 
 					With refinement `/string str` string to be parsed by regex may be provided. In this 
 					case spec block is created and provided string parsed with this spec. 
 					With refinement `/withspec` spec block is also printed out. 
 					`/case` refinement still there, as well as 
 					`/singleline` and `/multiline`.
 					Also added primitive `/debug` refinement, which shows collected quantifiers and sequences on each step, 
 					and preliminary `/try flavor` refinement, which will be used to try out major flavors of `re`s.
 				2017-05-20 -- changed `/string str` refinement into `/parse str` 
 					and `/withspec` refinement into `spec`
 				2017-05-22 -- corrected groups nesting and backreferences, compacted code
 				v.0.5: 2017-05-23 --
 					added soubroutines with several syntactic flavors (PCRE, Perl, Ruby):
 					a) named groups declaration
 						(?P<name>regex), (?'name'regex), (?<name>regex)
 					b) referring subroutines
 						absolute reference
 						(?1), \g'1', \g<1> - number referring to the capturing group (see example with ip-v4)
 						relative reference
 						(?+1), (?-2), \g'+3', \g<-4>
 						named reference
 						(?&name), (?P>name), \g'name', \g<name>
 					added backreferences with several syntactic flavors:
 					a) numbered backreferences
 						\1, \2, ...
 					b) named backreferences to captured strings
 						(?P=name), \k'name', \k<name>, \k{name} 
 					added some examples
 				2017-05-23 -- made regex case-sensitive by default, 
 					changed `/case` refinement into `/icase` to turn it case-insensitive
 					added inline comments syntax - (?# Comment )
 				2017-05-24 -- added `/freespace` refinement - see corresponding example
 				2017-05-25 -- added short refinements `/i`, `/s`, `/m` and `/x`, with same meaning as 
 					`/icase`, `/singleline`, `/multiline` and `/freespace` correspondingly.
 					Also added `/modes options` refinement, where shortcoded switches can be set in one string.
 					Made freespace more permissive and made an enhanced example for it.
 				v.0.6:	2017-05-27 -- 
 					normalized freespace handling
 					added capturing of overall match into `br_0` (changed later into `&0`)
 					added global mode refinements `/global` and `/g`
 					and a map `br_`, which holds captured strings on numbered or named keys:
 						if in global mode -- in blocks, otherwise in strings (see examples)
 				2017-05-28 -- added refinements `/simplex` with short alias `/n` to turn on non-capturing mode, 
 					where only named groups are captured (and numbered)
 				2017-05-29 -- changed `br_` into tcl-like `&` and made it changeable. It can be changed
 					as eg. `re-ctx/symbol: '¤`. See examples. Earlier examples presume symbol `br_`.
 				v.0.7:	2017-05-30 --
 					added first bit of `/replace replacement` refinement. `replacement` is string (so far), 
 					which may include backreferences to captured strings in the same form as in matching, 
 					or as `\'1'` or `\'name'`. `name` may be assigned a value in matching, but also outside 
 					of the `regex`. The overall match is replaced by evaluated replacement. 
 					Also in `global` mode. Examples.
 				2017-05-31 -- corrected some mistakes. Reorganised replacement code. 
 					Added block! as replacement. In case of global parsing replaces matches in the block order,
 					eg. `a: "first" regex/parse/g/replace "\w+" x: "1 second 3" [a "\0" "third"] head x` would yield
 					`"first second third"`. Only as many matches will be changed as there are elements in the block, 
 					superfluous elements in the block will be disregarded. 
 					Some stylistic change of code in `build` function.
 				2017-06-01 -- Allowed any-string! as `/parse string` argument, 
 					so that emails, urls, tags and filenames could be parsed directly.
 				2017-06-18 -- Further development in repo: https://github.com/toomasv/regex
 			}
 	TBD: 	{atomic groups, "soft" quantifiers, character-class subtraction, switches, substitution, look-around etc
 				inline created words are leaking into global environment}
 ]

 re-ctx: make reactor! [
 	_spec: 			clear []
 	starting: 		'loose
 	ending: 		'loose
 	nest-level:		0
 	sl: ml: 		off
 	nocase: 		off
 	freespace: 		off
 	glob: 			off
 	simp:			off
 	debugging: 		off
 	longout: 		clear []
 	shortout:		clear []
 	rpt: seq: 		clear []
 	levelgrp: 		clear []
 	levelgr2:		clear []
 	levelcap: 		clear []
 	levelsym: 		clear []
 	levelnam: 		clear []
 	capturing: 		off
 	to-short: 		off
 	symbol:			'&
 	brsymb:			is [to-string symbol]
 	full-match:		is [to-word append copy brsymb 0]
 	sbrsymb1:		is [append copy "_" brsymb]
 	sbrsymb2:		is [append copy "_'" brsymb]
 	defs: 			clear []
 	sym:			none
 	sbrdef1:		none
 	sbrdef2:		none
 	bckref: 		none
 	assignments: 		clear []
 	replace:		off
 	replacement:		none
 	br-num: 		0
 	
 	next-br: does [br-num: br-num + 1]
 	map: function [series [series!] fn [any-function!] /only][
 		out: make type? series []
 		foreach i series [
 			either only [
 				append/only out fn i
 			][
 				append out fn i
 			]
 		]
 	]

 	cs-num: 0 
 	empty-cs?: false
 	cs-open?: false
 	next-cs: does [to-word append copy "cs_" cs-num: cs-num + 1] 							; charset-number-word generator
 		
 	make-charset: func [s /local c e s1 s2 negated cs rpt cb][							; cb -- charset definition; e -- continuation string
 		c: copy [] rpt: none											; c -- whole charset expression; rpt -- quantifier
 		cs-open?: true
 		if negated: to-logic find/match s #"^^"	[s: next s]							; if charset is negated, jump over ^
 		s1: index? s												; register position after possible ^
 		system/words/parse s [											; let's form the charset
 			collect set cb some [										; collect charset definitional elements
 				 #"]" s2: [										; register current position after ]
 					if (s1 < ((index? s2) - 1))	[						; if ] is not in the beginning of charclass
 						any #"]" opt [collect set rpt repeater]					; we are in the end of charset, check for quantifiers, register, exit
 						(cs-open?: false) e: break						; register closing of charset definition
 					] | [ 
 						if (not empty-cs?) [keep (#"]")] 					; if empty charsets are not allowed, then keep ] as part of charset definition
 						| (cs-open?: false) e: break						; otherwise, register closing, position and exit
 					]
 				]  											
 				| "-]" keep (#"-") 									; if - occurs before closing ]
 					   any #"]" opt [collect set rpt repeater] 					; collect quantifier, if any
 					   (cs-open?: false) e: break							; declare charclass closed, register position, go back
 				| "^^" keep (#"^^")									; we can keep ^, because its negation meaning is taken care of
 				| ["\n" keep (#"^/") | "\r" keep (#"^M")]						; keep linebreakers
 				| #"\" [
 					ahead 	[clmetaset | metaset] 							; metachars may be escaped 
 					keep 	[clmetaset | metaset] 							
 					| (cause-error 'user 'message ["Unescaped \ in char-class!"])			; only one, which sould necessarily be escaped
 				]					
 				| #"-" s2: [if (s1 = ((index? s2) - 1)) keep (#"-")					; we have just a dash
 					| keep ('-) ]									; we have a range
 				| keep clliteral 									; keep more or less anything
 				| (cause-error 'user 'message ["Malformed charset?"])					; just for checking
 			]
 		]
 		cs: next-cs												; new word to bind charset to
 		if (cs-open? and empty? e) [
 			cause-error 'user 'message ["Character class unclosed!"]
 		]
 		if to-logic negated [cb: compose/deep [not [(cb)]]]							; if charset is negated, make negation
 		append defs copy compose/deep [ 
 			(to-set-word cs) charset [(cb)]
 		] 													; escape parse to actually make a charset
 		if rpt [append c either block? rpt/1 [rpt/1][rpt]]							; did we have quantifiers? Append them here
 		append c cs 												; and finally a word referring to created charset
 		compose/deep [[(c)] (e)]										; send proudly back charset def and remaining string after charset
 	]
 	group: 	[if (not cs-open?) [
 		#"(" 													; named group
 		[	"?P<" 	copy gname to #">" 
 		| 	"?'" 	copy gname to #"'" 
 		| 	"?<" 	copy gname to #">"
 		] skip
 		(
 			insert levelcap capturing
 			capturing: on
 			if 1 < length? levelcap [to-short: on]
 			insert levelnam copy gname
 			insert levelsym next-br
 			insert/only levelgrp copy longout 
 			insert/only levelgr2 copy shortout
 			if debugging [probe compose ["lgrp:" (levelgrp)]]
 			longout:	clear []
 			shortout: 	clear []
 		)
 	|	[ #"("  "?+" 	copy n number  #")"									; relative subroutine (forward)
 		| "\g"  
 			[	"<+" 	copy n number  #">"
 			| 	"'+" 	copy n number  #"'"]
 		] keep (to-word append copy sbrsymb1 br-num + to-integer n)
 	|	[ #"("  "?-" 	copy n number  #")"									; relative subroutine (backward)
 		| "\g"  
 			[	"<-" 	copy n number  #">"
 			| 	"'-" 	copy n number  #"'"]
 		] keep (to-word append copy sbrsymb1 br-num + 1 - to-integer n)
 	|	[ #"("  #"?" 	 copy n number  #")" 									; absolute subroutine reference
 		| "\g"  
 			[	#"<" 	 copy n number  #">"
 			| 	#"'" 	 copy n number  #"'"]
 		] keep (to-word append copy sbrsymb1 n)
 	|	[ #"("  												; named subroutine reference
 			[	"?&" 	copy gname to #")" 
 			| 	"?P>" 	copy gname to  #")"]
 		| "\g"  
 			[	#"<" 	copy gname to  #">"
 			| 	#"'" 	copy gname to  #"'"]
 		] skip 
 		keep (to-word append copy "_" gname)
 	|	"(?:" (													; non-capturing group
 			insert levelcap capturing 
 			capturing: off
 			insert/only levelgrp copy longout 
 			insert/only levelgr2 copy shortout 
 			longout: 	clear []
 			shortout: 	clear []
 		) 
 	|	#"(" ( 													; capturing group
 			insert levelcap capturing
 			either simp [											; unless simplex
 				capturing: off
 			][
 				capturing: on
 				if 1 < length? levelcap [to-short: on]
 				insert levelsym next-br 
 				insert levelnam none 
 			]
 			insert/only levelgrp copy longout
 			insert/only levelgr2 copy shortout			
 			if debugging [probe compose ["lgrp:" (levelgrp)]]
 			longout: 	clear []
 			shortout: 	clear []
 		)
 	| 	#")"  opt collect set rpt repeater 									; end of group, check for quantifier
 		(
 			either capturing [										; are we capturing?
 				all [
 					nam: take levelnam 
 					append defs to-set-word copy append copy "_" nam
 					nam: to-word copy nam
 				] 
 				sym: take levelsym 
 				sbrdef1: to-word append copy sbrsymb1 sym
 				sbrdef2: to-word append either to-short [copy sbrsymb2][copy sbrsymb1] sym
 				bckref: to-word append copy brsymb sym
 				append/only append defs to-set-word sbrdef1 copy longout 
 				all [
 					starting = 'loose 
 					to-short
 					append/only append defs to-set-word sbrdef2 copy shortout 
 				]
 				if debugging [probe compose ["defs:" (defs)]]
 				if block? first rpt [rpt: first rpt]
 				longout: append append take levelgrp rpt compose [copy (bckref) (sbrdef1)]
 				shortout: append append take levelgr2 rpt sbrdef2
 				assignments: make block! 5
 				either glob [
 					unless block? select get symbol sym compose [extend (symbol) reduce [sym make block! 5]]
 					append assignments compose [append select (symbol) (sym) (bckref)]
 					if nam [
 						unless block? select get symbol nam compose [extend (symbol) reduce [nam make block! 5]]
 						append assignments compose [
 							(to-set-word nam) (bckref) 
 							append select (symbol) (to-lit-word nam) (bckref)
 						]
 					]
 				][
 					append assignments compose [put (symbol) (sym) (bckref)]
 					if nam [
 						append assignments compose [
 							(to-set-word nam) (bckref) 
 							put (symbol) (to-lit-word nam) (bckref)
 						]
 					]
 				]
 				append/only longout to-paren assignments
 				if debugging [probe compose ["lout:" (longout)]]
 			][												; we are not capturing
 				longout: 	append/only append take levelgrp rpt copy longout
 				shortout: 	append/only append take levelgr2 rpt copy shortout
 			]
 			capturing: take levelcap
 		) 
 	| 	keep literal
 	]]
 	class: [
 		"[:" copy cl to ":]" 2 skip keep (to-word cl)								; this is in wrong place?
 	| 	#"[" s: 
 		(
 			set [c e] make-charset s 									; send string to the charset-factory
 			append longout c										; put the charset into place
 			append shortout c
 			e: any [find/last s e tail s]
 		)
 		:e													; continue after the charclass
 	]
 	special: [
 		  "\d" 		keep ('digit)
 		| "\D"		keep ('nondigit)
 		| "\w" 		keep ('word)
 		| "\W"		keep ('nonword)
 		| "\s" 		keep ('wspace)
 		| "\S"		keep ('nonwspace)
 		| "\t"		keep (#"^-")
 		| "\n"		keep (#"^/")
 		| "\N"		keep ('nonlinebreak)
 		| "\r"		keep (#"^M")
 		| "\f"		keep (#"^L")
 		| "\v"		keep (#"^K")
 	]
 	backref: [
 		[ #"("	"?P=" 	copy gname to #")" 									; reference to named captured string
 		| "\k"  
 			[	#"'" 	copy gname to #"'" 
 			| 	#"<" 	copy gname to #">" 
 			| 	#"{" 	copy gname to #"}"]
 		] skip 
 		keep (to-word gname)
 	|	#"\" copy n number 
 		keep (to-word append copy brsymb n)
 	]
 	replref: [#"\" [
 		[	#"'"	copy n number #"'" 
 		| 	#"<"	copy n number #">" 
 		| 	#"^{"	copy n number #"^}" 
 		] 	keep (to-word append copy brsymb n)
 	| 	[	#"'"	copy gname to #"'" 
 		| 	#"<"	copy gname to #">"	
 		| 	#"^{"	copy gname to #"^}"
 		] 	skip keep (to-word gname)
 	]]
 comment {		]
 		|
 		[if (glob) 
 			#"#" copy n number
 			#"/" copy m number
 			keep (
 				to-paren compose [pick select (symbol) (to-integer n) (to-integer m)]
 			)
 		]
 }
 	linestart: 		is [either ml [[#"^^" keep (#"^/")]][[#"^^" (starting: 'strict)]]]
 	lineend:		is [either ml [[#"$" keep ([ahead [opt #"^/" end | #"^/"]])]][[#"$" keep ([opt #"^/" end])]]] 
 	wordboundary:	["\b" keep (											; does not react on changing word / nonword values
 		[s: [
 			if ((1 = index? s) or find nonword first back s) 	[ahead word] 
 			| if (find word first back s) 				[ahead [nonword | end]]
 		]]
 	)]
 	anchor:			[linestart | lineend | wordboundary]
 	paren: 			charset "()"
 	square: 		charset "[]"
 	lower: 			charset [#"a" - #"z" #"ß" - #"ö" #"ø" - #"ÿ"]
 	upper: 			charset [#"A" - #"Z" #"À" - #"Ö" #"Ø" - #"Þ"]
 	alpha: 			is [union lower upper]
 	digit: 			charset "0123456789"
 	nondigit:		complement digit
 	number:			[some digit]
 	alnum:			is [union alpha digit]
 	word:			is [union alnum charset "_"]
 	nonword:		is [complement word]
 	blank:			charset [#" " #"^-"]
 	nonblank:		complement blank
 	linebreak:		charset [#"^M" #"^/" #"^L"]
 	nonlinebreak:		is [complement linebreak]
 	wspace: 		is [union blank linebreak] 
 	nonwspace:		is [complement wspace]
 	punct:			charset ",;!^"':-" ;"
 	nonpunct: 		is [complement punct]
 	meta: 			[#"\" #"^^" #"$"  #"." #"|" #"?" #"*" #"+" #"(" #")" #"[" #"^{"] 
 	metaset: 		charset meta
 	literal: 		is [complement metaset]
 	clmeta:			[#"\" #"^^" #"-" #"]"]
 	clmetas:		[#"\" | #"^^" | #"-" | #"]"]
 	clmetaset:		charset clmeta
 	clliteral:		is [complement clmetaset]
 	closing-paren:		charset [not #")"]
 	not-closing-paren: 	complement closing-paren
 	
 	comm: 			["(?#" thru #")"]
 	anychar: is [either sl [
 		union metaset literal
 	][
 		exclude union metaset literal charset [#"^/" #"^M"]]
 	]
 	escaped: [
 		#"\" keep metaset 
 	| 	"$$" keep #"$" 
 	]
 	altern: 		[#"|" keep ('|)]
 	char: 			[keep literal]
 	exception: 		[
 		  #"\" 		(cause-error 'user 'message ["Unescaped \!"])
 		| #")" 		(cause-error 'user 'message ["Invalid use of closing parentheses!"])
 	]
 	rmfree: [any [
 			remove [any wspace #"#" thru linebreak]							; comments
 		| 	change "\ " " "
 		|	"\[" | "\]"
 		|	#"[" (cs-open?: yes)
 		|	#"]" (cs-open?: no)
 		|	if (not cs-open?) remove some wspace
 		| 	skip
 	]]
 	sequence: [
 			escaped
 		| 	anchor
 		| 	altern
 		| 	comm
 		| 	backref
 		| 	group
 		| 	class
 		| 	special
 		| 	#"." 	keep ('anychar)  
 		| 	char 
 		| 	exception
 	]
 	repeater: [
 			#"^{"  copy n1 number  #","  copy n2 number  #"^}" 
 									keep (reduce [to-integer n1 to-integer n2])
 		| 	#"^{"  #","  copy n2 number  #"^}" 		keep (reduce [0  to-integer n2])
 		| 	#"^{"  copy n1 number  #","  "^}"		keep (reduce [to-integer n1 10000])
 		| 	#"^{"  copy n1 number  #"^}"			keep (to-integer n1)
 		| 	#"?" 						keep ('opt)
 		| 	#"+" 						keep ('some)
 		| 	#"*" 						keep ('any) 
 	]
 	build: func [inner /local s e c t r n out][				; main workhorse
 		longout: 	clear []
 		shortout: 	clear []
 		parse/case inner [
 			any [
 				collect set seq sequence  
 				opt collect set rpt repeater
 				(
 					if block? first rpt [rpt: first rpt] 
 					if debugging [probe compose ["seq rpt:" (seq) (rpt)]]
 					append longout 	compose [(rpt) (seq)] 
 					append shortout compose [(rpt) (seq)]
 					if debugging [probe compose/deep ["short long:" [(shortout)] [(longout)]]]
 				) 											 
 			]
 		]
 		unless ending = 'loose [
 			foreach out [shortout longout][
 				append get out switch ending [
 					strict 		[[ahead [opt #"^/" end]]] 
 					strictissima 	[[ahead end]]
 				]
 			]
 		]
 		compose/deep [[(shortout)] [(longout)]]
 	]

 	finish: func [inner /local short long repl][
 		set [short long] build copy inner 

 		append _spec either replace [
 			switch type?/word replacement [
 				string! [
 					parse replacement [collect set repl any [replref | backref | char]]
 					if debugging [probe compose/deep ["repl:" [(repl)]]]
 					compose/deep [
 						s: copy (full-match) (either starting = 'loose ['thru][]) [(long)] 
 						(to-paren append copy either glob [[append select]][[put]] compose [(symbol) 0 (full-match)])
 						:s change [(short)] (to-paren append/only copy [rejoin] compose [(repl)])
 					]
 				]
 				block! [
 					replacement: either empty? replacement [clear []][
 						map/only replacement func [rep][
 							parse reduce rep [collect any [replref | backref | char]]
 						]
 					]
 					compose/deep [
 						s: copy (full-match) (either starting = 'loose ['thru][]) [(long)] 
 						(to-paren append copy either glob [[append select]][[put]] compose [(symbol) 0 (full-match)])
 						:s change [(short)] 
 						(to-paren append/only copy [rejoin] 
 							compose [
 								(to-paren compose/deep [
 									either reduce pick replacement length? select (symbol) 0 [
 										reduce pick replacement length? select (symbol) 0
 									][(full-match)]
 								])
 							]
 						)
 					]					
 				]
 				map! [
 					
 				]
 				function! [
 					
 				]
 			]
 		][
 			compose/deep [
 				copy (full-match) (either starting = 'loose ['thru][]) [(long)] 
 				(to-paren append copy either glob [[append select]][[put]] compose [(symbol) 0 (full-match)])
 			]
 		]
 		all [
 			starting = 'loose 
 			insert _spec compose/deep [to [(short)]]
 			ending = 'loose
 			glob 
 			_spec: append/only copy [some] compose [(_spec)]
 		]
 		append _spec switch ending [
 			strict 		[[opt #"^/" end]] 
 			strictissima 	['end]
 			loose 		[[to end]]
 		]
 		if debugging [probe compose/deep ["_spec:" [(_spec)]]]
 	]
 	flavors: [JS [empty-cs?: true]]							; just a probe so far
 	
 	set 'regex func [
 		"Regex to parse converter"
 		re [string!]  
 		/parse str [any-string!] 						"string to parse"
 		/debug									"turns on debugging"
 		/spec 									"prints out generated spec"
 		/modes "passes all the modes in one string"  optstr [string!] 		"shortcoded modes"
 		/icase		"see next"	/i					"turns on case-insensitivity"
 		/multiline	"see next"	/m					"lets ^^ and $ match beginning and end of line also"
 		/singleline	"see next"	/s					"lets dot match linebreaks also"
 		/freespace	"see next"	/x					"lets you use whitespace, to make re more readable"
 		/global		"see next"	/g					"global mode, puts captured strings into block"
 		/simplex	"see next"	/n					"non-numbering mode, only named groups are captured"
 		/replace 	"Captured matches are used in replacements"
 			replacement [string! block!] {String replaces any overall matches, 
 				block replaces global overall matches in order 
 				and map specifies numbered and named groups to use in replacement}
 		/try		"try specific flavor of regexp"	flavor	[word!] "flavor to try"
 		/local inner
 	][  
 		debugging: 		any [debug off]
 		cs-num:   		0
 		br-num:			0
 		_spec:			make block! 100
 		inner: 			clear ""
 		nocase:			any [icase 	i all [modes find optstr "i"] off]
 		self/ml: 		any [multiline  m all [modes find optstr "m"] off]
 		self/sl: 		any [singleline s all [modes find optstr "s"] off]
 		freesp: 		any [freespace 	x all [modes find optstr "x"] off]
 		glob:			any [global 	g all [modes find optstr "g"] off]
 		simp:			any [simplex 	n all [modes find optstr "n"] off]
 		self/replace:		any [replace off]
 		self/replacement:	any [replacement none]
 		empty-cs?: 		false
 		cs-open?: 		false
 		levelgrp: 		make block! 5
 		levelgr2:		make block! 5
 		levelcap: 		make block! 5
 		levelsym: 		make block! 5
 		levelnam: 		make block! 5
 		capturing: 		off
 		to-short: 		off
 		defs: 			make block! 5
 		set symbol		either glob [make map! reduce [0 make block! 10]][make map! reduce [0 make string! 20]]
 		
 		if freesp [system/words/parse re rmfree cs-open?: no]
 		
 		if try [unless do select flavors flavor [print append to-string flavor " is not supported :("]]
 		
 		system/words/parse re [
 			[
 				  ["\A" | "\`" | if (not ml) ["^^"]]					(starting: 'strict)
 				| 									(starting: 'loose)
 			]
 			copy inner [
 				to [
 					#"\" copy s skip 
 					if (find [39 90] to-integer to-char s) end 			;#"Z" #"'"
 				| 	if (not ml) #"$" end
 				]									(ending: 'strict) 
 			| 	to [
 					#"\" copy s skip 
 					if (122 = to-integer to-char s) end
 				]									(ending: 'strictissima) ;#"z" 
 			| 	to end									(ending: 'loose)
 			] 
 			(finish copy inner)
 		]
 		_spec: either empty? defs [_spec][head insert/only _spec to-paren defs]
 		bind _spec: load mold _spec re-ctx							; rebinding corrects some strange behavior
 		if spec [print mold _spec]
 		either parse [
 			return either nocase [system/words/parse str _spec][system/words/parse/case str _spec]
 		][
 			return _spec
 		]
 	]
 ]
 examples: :comment 
 examples [
 regex/parse "[a-c]+" "abcaaabbbcaab"
 ;== true
 regex/parse/spec "[a-c]+" "abcaaabbbcaab"
 ; [(cs_1: charset [#"a" - #"c"]) to [some cs_1] copy br_0 thru [some cs_1] (br_/0: br_0) to end]

 ip-v4: "\A(25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?1)){3}\z"
 regex/parse/spec ip-v4 "127.0.0.25"
 ; [(cs_1: charset [#"0" - #"5"] cs_2: charset [#"0" - #"4"] cs_3: charset [#"1" - #"9"] 
 ;	_1: [#"2" #"5" cs_1 | #"2" cs_2 digit | #"1" digit digit | cs_3 digit | digit]) 
 ;	copy br_0 thru [copy br_1 _1 (put br_ 1 br_1) 3 [#"." _1]] (br_/0: br_0) end]
 regex/parse ip-v4 "255.255.255.256"
 ;== false

 email: "^^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9.-]+$"
 regex/spec/parse email "[email protected]"
 ; [(cs_1: charset [#"a" - #"z" #"A" - #"Z" #"0" - #"9" #"_" #"." #"+" #"-"] 
 ;	cs_2: charset [#"a" - #"z" #"A" - #"Z" #"0" - #"9" #"-"] 
 ;	cs_3: charset [#"a" - #"z" #"A" - #"Z" #"0" - #"9" #"." #"-"]) 
 ;	copy br_0 thru [some cs_1 #"@" some cs_2 #"." some cs_3] (br_/0: br_0) opt #"^/" end]

 parse "[email protected]" regex email
 ;== true

 ; named groups, captured strings 
 regex/parse/spec "<h1[^^>]*>\n?(?P<title>[^^<\n]*)\n?</h1>" read http://www.red-lang.org/
 ; [(cs_1: charset [not [#">"]] cs_2: charset [not [#"<" #"^/"]] _title: _1: [any cs_2]) 
 ;	to [#"<" #"h" #"1" any cs_1 #">" opt #"^/" _1 opt #"^/" #"<" #"/" #"h" #"1" #">"] 
 ;	copy br_0 thru [#"<" #"h" #"1" any cs_1 #">" opt #"^/" copy br_1 _1 
 ;		(put br_ 1 br_1 title: br_1 put br_ 'title br_1) opt #"^/" #"<" #"/" #"h" #"1" #">"] 
 ;	(br_/0: br_0) to end]
 ;== true
 &  ; was br_
 ;== #(
 ;	0 "<h1 class='title'>^/Red Programming Language^/</h1>"
 ;	1 "Red Programming Language"
 ;	title: "Red Programming Language"
 ;)
 print title
 ; Red Programming Language

 ; absolute, relative and named subroutines in Perl syntax
 regex/spec/parse "(?+1)(?'name'[abc])(?1)(?-1)(?&name)" "abcab"
 ; [(cs_1: charset [#"a" #"b" #"c"] _name: _1: [cs_1]) 
 ; to [_1 _1 _1 _1 _name] 
 ; copy br_0 thru [_1 copy br_1 _1 (put br_ 1 br_1 name: br_1 put br_ 'name br_1) _1 _1 _name] 
 ; (br_/0: br_0) to end] 

 ; same in Ruby syntax
 regex/spec/parse "\g'+1'(?'name'[abc])\g'1'\g'-1'\g'name'" "bbccc"
 ; [(cs_1: charset [#"a" #"b" #"c"] _name: _1: [cs_1]) 
 ; to [_1 _1 _1 _1 _name] 
 ; copy br_0 thru [_1 copy br_1 _1 (put br_ 1 br_1 name: br_1 put br_ 'name br_1) _1 _1 _name] 
 ; (br_/0: br_0) to end]

 ; PCRE
 regex/spec/parse "(?P<name>[abc])(?1)(?P>name)" "bca"
 ; [(cs_1: charset [#"a" #"b" #"c"] _name: _1: [cs_1]) 
 ; to [_1 _1 _name] 
 ; copy br_0 thru [copy br_1 _1 (put br_ 1 br_1 name: br_1 put br_ 'name br_1) _1 _name] 
 ; (br_/0: br_0) to end]

 ; comments
 regex/spec "abc(?# Simple re )"
 ; [to [#"a" #"b" #"c"] copy br_0 thru [#"a" #"b" #"c"] (br_/0: br_0) to end]

 ; freespace demonstration + named group + wordboundary + backreference
 ; without wordboundary invalid IP addresses like eg 192.186.1.265 will be 
 ; reckognized as 192.186.1.26
 ip-v4: {
 (   						# We are capturing the whole address
 	?P< ipaddr >				# Let's give it a name
 	(					# Begin definition of quad number
 		\b (?:					# Quad starts (wordboundary + noncapturing group)
 				25[0-5]				# Highest quads			250-255
 			|	2[0-4]\d			# Second highest quads 		200-249
 			|	1\d\d				# Quads in second hundred 	100-199
 			|	[1-9]\d				# Quads 			 10-99
 			|	\d				# Quads 			  0-9
 		) \b					# Quad ends (nc-group ends + wordboundary)
 	)					# End definition of quad number,
 						# and check first quad
 	(?: 					# Begin non-capturing group
 		\.					# check for dot
 		(? 2 ) 					# and call quad definition subroutine
 	)    					# End group
 	{ 3 }					# Iterate last group 3 times 
 )
 }
 regex/parse/freespace ip-v4 "some text 192.168.1.65 around the address"
 ;== true
 ipaddr
 ;== "192.168.1.65"
 regex/parse/freespace ip-v4 "some text 192.168.1.265 around the address"
 ;== false

 ; global mode with nested groups
 regex/parse/g "(\w(\w{1,2}))\W(\w+)" "per aspera ad astra"
 ;== true
 &  ; was br_
 ;== #(
 ;	0 ["per aspera" "ad astra"]
 ;	2 ["er" "d"]
 ;	1 ["per" "ad"]
 ;	3 ["aspera" "astra"]
 ;)

 ; global + simplex (non-capturing) modes 
 regex/parse/g/n "(\w(\w{1,2}))\W(\w+)" "per aspera ad astra"
 ;== true
 &  ; was br_
 ;== #(
 ;	0 ["per aspera" "ad astra"]
 ;)

 ; as previous + named groups
 regex/parse/g/n "(?<pre>\w(\w{1,2}))\W(?<nom>\w+)" "per aspera ad astra"
 ;== true
 &  ; was br_
 ;== #(
 ;	0 ["per aspera" "ad astra"]
 ;	1 ["per" "ad"]
 ;	pre: ["per" "ad"]
 ;	2 ["aspera" "astra"]
 ;	nom: ["aspera" "astra"]
 ;)

 ; different groups with same name
 regex/parse/g/n "(?<pre>\w(\w{1,2}))\W(?<pre>\w+)" "per aspera ad astra"
 ;== true
 br_ 
 ;== #(
 ;	0 ["per aspera" "ad astra"]
 ;	1 ["per" "ad"]
 ;	pre: ["per" "aspera" "ad" "astra"]
 ;	2 ["aspera" "astra"]
 ;)

 ; changing match-map's symbol
 re-ctx/symbol: '¤
 ;== ¤
 regex/parse "a(b)c" "xabcx"
 ¤0
 ;=="abc"
 ¤1
 ;== "b"
 ¤
 ;== #(
 ;	0 "abc"
 ;	1 "b"
 ;)
 re-ctx/symbol: 'backreference
 ;== backreference
 regex/parse "a(b)c" "xabcx" backreference
 ;== #(
 ;	0 "abc"
 ;	1 "b"
 ;)
 parse "xabcx" regex "a(b)c" backreference
 ;== #(
 ;	0 "abc"
 ;	1 "b"
 ;)

 ; replacement of overall match
 regex/parse/replace "like" x: "I like Red" "love" head x
 ;== "I love Red"
 ; replacement of outer word
 var: "trick" regex/parse/replace "abc" x: "xabcx" "\'var'" head x
 ;== "xtrickx"
 var: "track" parse x: "xabcx" regex/replace "abc" "\'var'" head x
 ;== "xtrackx"

 ;global replacement of overall match
 regex/parse/replace/g "(\w+) (\w+)( ?)" x: "first second third fourth" "\2 \1\3" head x
 ;== "second first fourth third"

 ;global replacement2
 regex/parse/replace/g "(\w\w)\w+" x: "first second third fourth" "\1" head x
 ;== "fi se th fo"

 ; global replacement with block
 a: "first" regex/parse/g/replace "\w+" x: "1 second 2" [a "\0" "third"] head x
 ;== "first second third"
 ]
No results found