meijeru · March 5, 2018 21:33
diff --git a/red-lexer-adapted.r b/red-lexer-adapted.r
 REBOL [
 	Title:   "Red Lexical Scanner"
 	Author:  "Nenad Rakocevic"
 	File: 	 %lexer.r
 	Tabs:	 4
 	Rights:  "Copyright (C) 2011-2015 Nenad Rakocevic. All rights reserved."
 	License: "BSD-3 - https://github.com/red/red/blob/master/BSD-3-License.txt"
 	Comments: {Adapted by Rudolf W. Meijer for making the Red concordance.
 			   This involves parsing Red, Red/System and REBOL programs by one lexer.
 			   The changes are:
 			   - the header rule accepts any word or path followed by a block
 			   - the escaped rule accepts anything within #[ and ]
 			   - throw-error prints the filename; global source-file must be set
 			   It is a testimony to the syntactic similarity of all three languages
 			   that the changes are so few ...
 			  }]

 ;-- Patch NEW-LINE and NEW-LINE? natives to accept paren! --
 append first find third :new-line  block! paren!
 append first find third :new-line? block! paren!


 lexer: context [
 	verbose: 0
 	
 	old-line: none
 	line: 	none									;-- source code lines counter
 	count?: yes										;-- if TRUE, lines counter is enabled
 	cnt:	none									;-- counts nested {} in multi-line strings
 	pos:	none									;-- source input position (error reporting)
 	mark:	none									;-- use for keeping input cursor at same position
 	path:	none									;-- path input position (error reporting)
 	s:		none									;-- mark start position of new value
 	e:		none									;-- mark end position of new value
 	series: none									;-- temporary hold last stack series
 	value:	none									;-- new value
 	value2:	none									;-- secondary new value
 	fail?:	none									;-- used for failing some parsing rules
 	type:	none									;-- define the type of the new value
 	rs?:	no 										;-- if TRUE, do lexing for Red/System
 	neg?:	no										;-- if TRUE, denotes a negative number value
 	base:	16										;-- binary base
 	otag: 	none
 	ot:		none
 	ct:		none
 	sep:	none
 	year:	none
 	month:	none
 	day:	none
 	hour:	none
 	mn:		none
 	sec:	none
 	date:	none
 	ee:		none
 	
 	;====== Parsing rules ======

 	four:	  charset "01234"
 	half:	  charset "012345"
 	non-zero: charset "123456789"
 	digit:	  union non-zero charset "0"
 	dot:	  #"."
 	comma:	  #","

 	byte: [
 		"25" half
 		| #"2" four digit
 		| #"1" digit digit
 		| opt #"0" non-zero digit
 		| 0 2 #"0" digit
 		| 1 2 #"0"
 	]

 	hexa:		 union digit charset "ABCDEF"
 	hexa-char:	 union hexa charset "abcdef"
 	alpha:		 charset [#"A" - #"Z" #"a" - #"z"]
 	base64-char: union digit union alpha charset "+/="
 	
 	;-- UTF-8 encoding rules from: http://tools.ietf.org/html/rfc3629#section-4
 	UTF-8-BOM: #{EFBBBF}
 	ws-ASCII:  charset " ^-^M"						;-- ASCII common whitespaces
 	ws-U+2k:   charset [#"^(80)" - #"^(8A)"]		;-- Unicode spaces in the U+2000-U+200A range
 	UTF8-tail: charset [#"^(80)" - #"^(BF)"]
 	UTF8-1:    charset [#"^(00)" - #"^(7F)"]
 	
 	UTF8-2: reduce [
 		charset [#"^(C2)" - #"^(DF)"]
 		UTF8-tail
 	]
 	
 	UTF8-3: reduce [
 		#{E0} 	 charset [#"^(A0)" - #"^(BF)"]   UTF8-tail
 		'| 		 charset [#"^(E1)" - #"^(EC)"] 2 UTF8-tail
 		'| #{ED} charset [#"^(80)" - #"^(9F)"]   UTF8-tail
 		'| 		 charset [#"^(EE)" - #"^(EF)"] 2 UTF8-tail
 	]
 	
 	UTF8-4: reduce [
 		#{F0} 	 charset [#"^(90)" - #"^(BF)"] 2 UTF8-tail 
 		'| 		 charset [#"^(F1)" - #"^(F3)"] 3 UTF8-tail
 		'| #{F4} charset [#"^(80)" - #"^(8F)"] 2 UTF8-tail
 	]
 	
 	UTF8-char: [pos: UTF8-1 | UTF8-2 | UTF8-3 | UTF8-4]
 	
 	not-word-char:	charset {/\^^,[](){}"#%$@:;}
 	not-word-1st:	union union not-word-char digit charset {'}
 	not-file-char:	charset {[](){}"@:;}
 	not-url-char:	charset {[](){}";}
 	not-email-char:	union not-file-char union ws-ASCII charset "<^/"
 	not-str-char:	#"^""
 	not-mstr-char:	#"}"
 	not-tag-1st:	complement union ws-ASCII charset "=><[](){};^""
 	not-tag-char:	complement charset ">"
 	tag-char:		charset "<>"
 	caret-char:		charset [#"^(40)" - #"^(5F)"]
 	non-printable-char: charset [#"^(00)" - #"^(1F)"]
 	pair-end:		charset {^{"[]();:}
 	integer-end:	charset {^{"[]();:xX}
 	path-end:		charset {^{"[]();}
 	file-end:		charset {^{[]();}
 	date-sep:		charset "/-"
 	time-sep:		charset "T/"
 	stop:			none

 	control-char: reduce [ 							;-- Control characters
 		charset [#"^(00)" - #"^(1F)"] 				;-- C0 control codes
 		'| #"^(C2)" charset [#"^(80)" - #"^(9F)"] 	;-- C1 control codes (UTF-8 encoded)
 	]
 	
 	UTF8-filtered-char: [
 		[pos: stop :pos (fail?: [end skip]) | UTF8-char e: (fail?: none)]
 		fail?
 	]
 	
 	UTF8-printable: [
 		[non-printable-char | not-str-char (fail?: [end skip]) | UTF8-char (fail?: none)]
 		fail?
 	]
 	
 	;-- Whitespaces list from: http://en.wikipedia.org/wiki/Whitespace_character
 	ws: [
 		#"^/" (
 			if count? [
 				line: line + 1
 				stack/nl?: yes
 			]
 		)
 		| ws-ASCII									;-- only the common whitespaces are matched
 		| #{C2} [
 			#{85}									;-- U+0085 (Newline)
 			| #{A0}									;-- U+00A0 (No-break space)
 		]
 		;| #{E1} [
 		;	#{9A80}									;-- U+1680 (Ogham space mark)
 		;	| #{A08E}								;-- U+180E (Mongolian vowel separator)
 		;]
 		;| #{E2} [
 		;	#{80} [
 		;		ws-U+2k								;-- U+2000-U+200A range
 		;		| #{A8}								;-- U+2028 (Line separator)
 		;		| #{A9}								;-- U+2029 (Paragraph separator)
 		;		| #{AF}								;-- U+202F (Narrow no-break space)
 		;	]
 		;	| #{819F}								;-- U+205F (Medium mathematical space)
 		;]
 		;| #{E38080}									;-- U+3000 (Ideographic space)
 	]
 	
 	newline-char: [
 		#"^/"
 		| #{C285}									;-- U+0085 (Newline)
 		| #{E280} [
 			#{A8}									;-- U+2028 (Line separator)
 			| #{A9}									;-- U+2029 (Paragraph separator)
 		]
 	]
 	
 	counted-newline: [pos: #"^/" (line: line + 1)]
 	
 	ws-no-count: [(count?: no) ws (count?: yes)]
 	
 	any-ws: [pos: any ws]
 	
 	symbol-rule: [
 		(stop: [not-word-char | ws-no-count | control-char | tag-char] otag: #"<" ot: none)
 		some [
 			otag ot: [#"/" (otag: [end skip] ot: back ot) :ot | none] ;-- a</b>
 			| #">" ct: (if ot [otag: [end skip] ct: back ot]) :ct	  ;-- a<b>
 			| UTF8-filtered-char
 		] e:
 	]
 	
 	begin-symbol-rule: [							;-- 1st char in symbols is restricted
 		(stop: [not-word-1st | ws-no-count | control-char])
 		UTF8-filtered-char
 		opt symbol-rule
 	]
 	
 	path-rule: [
 		pos: slash :pos (							;-- path detection barrier
 			stack/allocate block! 4
 			stack/push to type copy/part s e		;-- push 1st path element
 		)
 		some [
 			slash
 			s: [
 				integer-number-rule
 				| begin-symbol-rule			(type: word!)
 				| paren-rule 				(type: paren!)
 				| #":" s: begin-symbol-rule	(type: get-word!)
 				;@@ add more datatypes here
 			] (
 				stack/push either type = paren! [	;-- append path element
 					value
 				][
 					to type copy/part s e
 				]
 				type: path!
 			)
 		]
 		opt [#":" (type: set-path!)]
 		e: [path-end | ws-no-count | end | (pos: path throw-error)] :e ;-- detect invalid tail characters				
 		(value: stack/pop type)
 	]
 	
 	word-rule: 	[
 		(type: word!)
 		#"%" [ws-no-count | pos: file-end :pos | end] (value: "%")	;-- special case for remainder op!
 		| path: s: begin-symbol-rule [
 			url-rule
 			| path-rule 							;-- path matched
 			| (value: copy/part s e)				;-- word matched
 			  opt [#":" (type: set-word!)]
 		] 
 	]
 	
 	get-word-rule: [
 		#":" (type: get-word!) s: begin-symbol-rule [
 			path-rule (
 				value/1: to get-word! value/1		;-- workaround missing get-path! in R2
 			)
 			| (
 				type: get-word!
 				value: copy/part s e				;-- word matched
 			)
 		]
 	]
 	
 	lit-word-rule: [
 		#"'" (type: word!) [
 			#"/" (type: lit-word! value: "/")
 			| s: begin-symbol-rule [
 				path-rule (type: lit-path!)				;-- path matched
 				| (
 					type: lit-word!
 					value: copy/part s e				;-- word matched
 				)
 			]
 		][s: #":" :s (throw-error) | none]
 	]
 	
 	map-rule: [
 		"#(" (stack/allocate block! 10) any-value #")" (
 			stack/prefix #!map!
 			value: stack/pop block!
 		)
 	]
 	
 	issue-rule: [#"#" (type: issue!) s: symbol-rule]
 	
 	refinement-rule: [slash (type: refinement!) s: symbol-rule]
 	
 	slash-rule: [s: [slash opt slash] e:]
 	
 	hexa-rule: [2 8 hexa e: #"h" pos: [integer-end | ws-no-count | end ] :pos (type: integer!)]

 	sticky-word-rule: [								;-- protect from sticky words typos
 		mark: [integer-end | ws-no-count | end | (pos: s throw-error)] :mark
 	]

 	tuple-value-rule: [
 		(type: tuple!)
 		byte dot byte 1 12 [dot byte] e:
 	]

 	tuple-rule: [tuple-value-rule sticky-word-rule]
 	
 	time-rule: [
 		s: positive-integer-rule [
 			decimal-number-rule (value: as-time 0 value load-number copy/part s e neg?) ;-- mm:ss.dd
 			| (value2: load-number copy/part s e) [
 				#":" s: positive-integer-rule opt decimal-number-rule
 				  (value: as-time value value2 load-number copy/part s e neg?)	;-- hh:mm:ss[.dd]
 				| (value: as-time value value2 0 neg?)							;-- hh:mm
 			]
 		] (type: time!)
 	]

 	month-rule: [(m: none)
 		  "January"		(m: 1)
 		| "February"	(m: 2)
 		| "March"		(m: 3)
 		| "April"		(m: 4)
 		| "May"			(m: 5)
 		| "June"		(m: 6)
 		| "July"		(m: 7)
 		| "August"		(m: 8)
 		| "September"	(m: 9)
 		| "October"		(m: 10)
 		| "November"	(m: 11)
 		| "December"	(m: 12)
 	]
 	mon-rule: [(m: none)
 		  "Jan" (m: 1)
 		| "Feb" (m: 2)
 		| "Mar" (m: 3)
 		| "Apr" (m: 4)
 		| "May" (m: 5)
 		| "Jun" (m: 6)
 		| "Jul" (m: 7)
 		| "Aug" (m: 8)
 		| "Sep" (m: 9)
 		| "Oct" (m: 10)
 		| "Nov" (m: 11)
 		| "Dec" (m: 12)
 	]
 	
 	day-year-rule: [
 		(neg?: no) opt [#"-" (neg?: yes)]
 		s: 3 4 digit e: (year: load-number copy/part s e if neg? [year: 65536 - year])
 		| 1 2 digit e: (
 			value: load-number copy/part s e no
 			either day [year: value + pick [2000 1900] 50 > value][day: value]
 		)
 	]

 	date-rule: [
 		pos: [opt #"-" 1 4 digit date-sep | 8 digit #"T"] :pos [ ;-- quick lookhead
 			s: 8 digit ee: #"T" (							;-- yyyymmddT
 				year:  load-number copy/part s 4
 				month: load-number copy/part skip s 4 2
 				day:   load-number copy/part skip s 6 2
 				date:  make date! reduce [day month year]
 			) :ee
 			| day-year-rule sep: date-sep (sep: sep/1) [
 				s: 1 2 digit e: (month: load-number copy/part s e no)
 				| some alpha e: (
 					fail?: either all [parse/all copy/part s e [month-rule | mon-rule] m][month: m none][[end skip]]
 				) fail?
 			]
 			sep day-year-rule (
 				fail?: either all [day month year][
 					date: make date! reduce [day month year]
 					none
 				][[end skip]]
 			) fail?
 			| s: 4 digit #"-" (
 				year: load-number copy/part s 4
 				date: make date! reduce [1 1 year]
 			)[
 				"W" s: 2 digit (ee: none) opt [#"-" ee: non-zero] (	;-- yyyy-Www
 					date: to-iso-week date load-number copy/part s 2
 					if ee [date: to-weekday date to integer! s/4 - #"0"] ;-- yyyy-Www-d
 				)
 				| s: 3 digit (date: date + (load-number copy/part s 3) - 1) ;-- yyyy-ddd
 			] (month: -1)
 		](
 			type: date!
 			if all [
 				month <> -1 any [date/year <> year date/month <> month date/day <> day]
 			][throw-error]
 			day: month: year: none
 		) opt [
 			time-sep (ee: no) [
 				s: 6 digit opt [#"." 1 9 digit ee:] (	;-- Thhmmss[.sss]
 					hour: load-number copy/part s 2
 					mn:	  load-number copy/part skip s 2 2
 					sec: load-number either ee [copy/part skip s 4 ee][copy/part skip s 4 2]
 					date/time: as-time hour mn sec no
 				)
 				| 4 digit (								;-- Thhmm
 					hour: load-number copy/part s 2
 					mn:	  load-number copy/part skip s 2 2
 					date/time: as-time hour mn 0 no
 				)
 				| s: positive-integer-rule (value: load-number copy/part s e)
 				#":" [(neg?: no) time-rule (date/time: value) | (throw-error)]
 			]
 			opt [
 				#"Z" | [#"-" (neg?: yes) | #"+" (neg?: no)][
 					s: 4 digit (						;-- +/-hhmm
 						hour: load-number copy/part s e: skip s 2
 						mn:   load-number copy/part e e: skip e 2
 					)
 					| 1 2 digit e: (hour: load-number copy/part s e mn: none) ;-- +/-h, +/-hh
 					opt [#":" s: 2 digit e: (mn: load-number copy/part s e)]
 				](
 					either all [mn find [15 45] mn: round/floor/to mn 15][
 						date: reduce [#!date! date as-time hour mn 0 neg?] ;-- special encoding for 15/45
 					][
 						date/zone: as-time hour any [mn 0] 0 neg?
 					]
 				)
 			]
 		] sticky-word-rule (value: date)
 	]

 	positive-integer-rule: [digit any digit e: (type: integer!)]
 	
 	integer-number-rule: [
 		opt [#"-" (neg?: yes) | #"+" (neg?: no)] digit any [digit | #"'" digit] e:
 		(type: integer!)
 	]
 	
 	integer-rule: [
 		pos: decimal-special e:								;-- escape path for NaN, INFs
 		(type: issue! value: load-number copy/part s e)
 		|	(neg?: no) integer-number-rule
 			opt [decimal-number-rule | decimal-exp-rule e: (type: decimal!)]
 			opt [#"%" e: (type: issue!)]
 			sticky-word-rule
 			(value: load-number copy/part s e)
 			opt [
 				[#"x" | #"X"] (
 					type: pair!
 					value2: to pair! reduce [value 0]
 				)
 				[s: integer-number-rule | (type: pair! throw-error)]
 				mark: [pair-end | ws-no-count | end | (type: pair! throw-error)] :mark
 				(value2/2: load-number copy/part s e value: value2)
 			]
 			opt [#":" [time-rule | (throw-error)]]
 	]

 	decimal-special: [
 		s: "-0.0" | (neg?: no) opt [#"-" (neg?: yes)] "1.#" s: [
 			[[#"N" | #"n"] [#"a" | #"A"] [#"N" | #"n"]]
 			| [[#"I" | #"i"] [#"N" | #"n"] [#"F" | #"f"]]
 		]
 	]
 	
 	decimal-exp-rule: [
 		[[#"e" | #"E"] opt [#"-" | #"+"] 1 3 digit]
 	]
 	
 	decimal-number-rule: [
 		[dot | comma] digit any [digit | #"'" digit]
 		opt decimal-exp-rule e: (type: decimal!)
 	]

 	decimal-rule: [
 		decimal-number-rule opt [#"%" e: (type: issue!)]
 		sticky-word-rule
 	]
 		
 	block-rule: [#"[" (stack/allocate block! 10) any-value #"]" (value: stack/pop block!)]
 	
 	paren-rule: [#"(" (stack/allocate paren! 10) any-value	#")" (value: stack/pop paren!)]
 	
 	escaped-char: [
 		"^^(" [
 			[										;-- special case first
 				"null" 	 (value: #"^(00)")
 				| "back" (value: #"^(08)")
 				| "tab"  (value: #"^(09)")
 				| "line" (value: #"^(0A)")
 				| "page" (value: #"^(0C)")
 				| "esc"  (value: #"^(1B)")
 				| "del"	 (value: #"^~")
 			]
 			| pos: [2 6 hexa-char] e: (				;-- Unicode values allowed up to 10FFFFh
 					either rs? [
 						value: to-char to-integer debase/base copy/part pos e 16
 					][value: encode-UTF8-char pos e]
 			)
 		] #")"
 		| #"^^" [
 			[
 				#"/" 	(value: #"^/")
 				| #"-"	(value: #"^-")
 				| #"~" 	(value: #"^(del)")
 				| #"^^" (value: #"^^")				;-- caret escaping case
 				| #"{"	(value: #"{")
 				| #"}"	(value: #"}")
 				| #"^""	(value: #"^"")
 			]
 			| pos: caret-char (value: pos/1 - 64)
 		]
 	]
 	
 	char-rule: [
 		{#"} (type: char!) [
 			s: escaped-char
 			| copy value UTF8-printable (value: as-binary value)
 			| #"^-" (value: s/1)
 		] {"}
 	]
 	
 	line-string: [
 		{"} s: (type: string! stop: [not-str-char | newline-char])
 		any [{^^"} | escaped-char | UTF8-filtered-char]
 		e: {"}
 	]
 	
 	nested-curly-braces: [
 		(cnt: 1 fail?: none)
 		any [[
 				counted-newline 
 				| "^^^^"
 				| "^^{"
 				| "^^}"
 				| #"{" (cnt: cnt + 1)
 				| e: #"}" (if zero? cnt: cnt - 1 [fail?: [end skip]])
 				| UTF8-char
 			] fail?
 		]
 		#"}" (old-line: line)
 	]
 	
 	multiline-string: [#"{" s: (type: string!) nested-curly-braces]
 	
 	string-rule: [line-string | multiline-string]
 	
 	tag-rule: [
 		#"<" s: not-tag-1st (type: tag!)
 		 any [#"^"" thru #"^"" | #"'" thru #"'" | not-tag-char] e: #">"
 	]
 	
 	email-rule: [
 		(stop: [not-email-char])
 		s: opt [some UTF8-filtered-char] #"@" (type: email!)
 		any UTF8-filtered-char e: (value: dehex copy/part s e)
 	]

 	base-2-rule: [
 		"2#{" (type: binary!) [
 			s: any [counted-newline | 8 [#"0" | #"1" ] | ws-no-count | comment-rule]
 			e: #"}" (base: 2)
 			| (pos: skip s -3 throw-error)
 		]
 	]
 	
 	base-16-rule: [
 		opt "16" "#{" (type: binary!) [
 			s: any [counted-newline | 2 hexa-char | ws-no-count | comment-rule]
 			e: #"}" (base: 16)
 			| (pos: skip s -2 throw-error)
 		]
 	]

 	base-64-rule: [
 		"64#{" (type: binary!) [
 			s: any [counted-newline | base64-char | ws-no-count | comment-rule]
 			e: #"}" (
 				cnt: offset? s e
 				if all [0 < cnt cnt < 4][pos: skip s -4 throw-error]
 				base: 64
 			)
 			| (pos: skip s -4 throw-error)
 		]
 	]

 	binary-rule: [[base-16-rule | base-64-rule | base-2-rule] (old-line: line)]

 	file-rule: [
 		pos: #"%" (type: file! stop: [not-file-char | ws-no-count]) [
 			#"{" (throw-error)
 			| line-string e: (value: encode-file s e)
 			| s: any UTF8-filtered-char e: (value: copy/part s e)
 		]
 	]

 	url-rule: [
 		#":" (type: url! stop: [not-url-char | ws-no-count])
 		some UTF8-filtered-char e: (value: dehex copy/part s e)
 	]

 	escaped-rule: [
 		"#["
 		any-value
 ;		any-ws [
 ;			  "true"  (value: true)
 ;			| "false" (value: false)
 ;			| s: [
 ;				"none!" | "logic!" | "block!" | "integer!" | "word!" 
 ;				| "set-word!" | "get-word!" | "lit-word!" | "refinement!"
 ;				| "binary!" | "string!"	| "char!" | "bitset!" | "path!"
 ;				| "set-path!" | "lit-path!" | "native!"	| "action!"
 ;				| "issue!" | "paren!" | "function!" 
 ;			] e: (value: get to word! copy/part s e)
 ;			| "none" (value: none)
 ;		]  any-ws
 		 #"]"
 	]
 	
 	comment-rule: [#";" [to #"^/" | to end]]
 	
 	wrong-end: [(
 			ending: either 1 < length? stack/stk [
 				value: switch type?/word stack/top [
 					block! [#"]"]
 					paren! [#")"]
 				]
 				first [(throw-error/with ["missing" mold value "character"])]
 			][none]
 		)
 		ending
 	]

 	literal-value: [
 		pos: (e: none) s: [
 			comment-rule
 			| escaped-rule    (stack/push value)
 			| tuple-rule	  (stack/push load-tuple	 copy/part s e)
 			| hexa-rule		  (stack/push decode-hexa	 copy/part s e)
 			| binary-rule	  (stack/push load-binary s e base)
 			| email-rule	  (stack/push to email! value)
 			| date-rule		  (stack/push value)
 			| integer-rule	  (stack/push value)
 			| decimal-rule	  (stack/push load-decimal	 copy/part s e)
 			| tag-rule		  (stack/push to tag!		 copy/part s e)
 			| word-rule		  (stack/push to type value)
 			| lit-word-rule	  (stack/push to type value)
 			| get-word-rule	  (stack/push to type value)
 			| refinement-rule (stack/push to refinement! copy/part s e)
 			| slash-rule	  (stack/push to word!		 copy/part s e)
 			| file-rule		  (stack/push load-file value)
 			| char-rule		  (stack/push decode-UTF8-char value)
 			| block-rule	  (stack/push value)
 			| paren-rule	  (stack/push value)
 			| string-rule	  (stack/push load-string s e)
 			| map-rule		  (stack/push value)
 			| issue-rule	  (stack/push to issue!		 copy/part s e)
 		]
 	]
 	
 	any-value: [pos: any [literal-value | ws]]

 	header: [
 		pos:  ["Red/System" | "Red" | "REBOL"]
 		any-ws block-rule (stack/push value)
 		| (throw-error/with "Invalid Red/REBOL program") end skip
 	]

 	program: [
 		pos: opt UTF-8-BOM
 		header
 		any-value
 		opt wrong-end
 	]
 	
 	;====== Helper functions ======
 	
 	stack: context [
 		stk: []
 		nl?: no
 		
 		allocate: func [type [datatype!] size [integer!] /local new pos][
 			pos: insert/only tail stk new: make type size
 			if nl? [new-line back pos yes nl?: no]
 			new
 		]
 		
 		prefix: func [value][insert/only last stk :value]
 		
 		push: func [value][
 			value: insert/only tail last stk :value
 			if nl? [new-line back value yes nl?: no]
 			value
 		]
 		
 		pop: func [type [datatype!] /local pos][
 			pos: back tail stk
 			nl?: new-line? pos
 			
 			either any [type = path! type = set-path!][
 				change/only pos to type pos/1
 			][
 				if type <> type? pos/1 [
 					throw-error/with ["invalid" mold type "closing delimiter"]
 				]
 			]
 			also pos/1 remove pos
 		]
 		
 		top: does [last stk]
 		
 		reset: does [clear stk]
 	]
 	
 	throw-error: func [/with msg [string! block!]][
 		print rejoin [
 			"*** Syntax Error: " either with [
 				uppercase/part reform msg 1
 			][
 				reform ["Invalid" mold type "value"]
 			]
 			"^/*** line: " line
 			"^/*** at: " mold copy/part pos 40
 			"^/*** in: " mold source-file
 		]
 ;		either encap? [quit][halt]
 	]

 	
 	pad-head: func [s [string!]][
 		head insert/dup s #"0" 8 - length? s
 	]
 	
 	encode-UTF8-char: func [s [string!] e [string!] /local c code new][
 		c: debase/base pad-head copy/part s e 16
 		while [c/1 = 0][c: next c]					;-- trim heading zeros
 		code: to integer! c
 		
 		case [
 			code <= 127  [
 				new: to char! code					;-- c <= 7Fh
 			]
 			code <= 2047 [							;-- c <= 07FFh
 				new: (shift/left (shift code 6) or #"^(C0)" 8)
 						or (code and #"^(3F)") or #"^(80)"
 			]
 			code <= 65535 [							;-- c <= FFFFh
 				new: (shift/left (shift code 12) or #"^(E0)" 16)
 						or (shift/left (shift code 6) and #"^(3F)" or #"^(80)" 8)
 						or (code and #"^(3F)") or #"^(80)"
 			]
 			code <= 1114111 [						;-- c <= 10FFFFh
 				new: (shift/left (shift code 18) or #"^(F0)" 24)
 						or (shift/left (shift code 12) and #"^(3F)" or #"^(80)" 16)
 						or (shift/left (shift code 6)  and #"^(3F)" or #"^(80)" 8)
 						or (code and #"^(3F)") or #"^(80)"
 			]
 			'else [
 				throw-error/with "Codepoints above U+10FFFF are not supported"
 			]
 		]
 		if integer? new [
 			new: debase/base to-hex new 16
 			remove-each byte new [byte = #"^(null)"]
 		]	
 		new
 	]
 	
 	decode-UTF8-char: func [value /redbin][
 		if all [not redbin char? value][return encode-char to integer! value]
 		
 		value: switch/default length? value [
 			1 [value]
 			2 [
 				value: value and #{1F3F}
 				value: add shift/left value/1 6 value/2
 			]
 			3 [
 				value: value and #{0F3F3F}
 				value: add add
 					shift/left value/1 12
 					shift/left value/2 6
 					value/3
 			]
 			4 [
 				value: value and #{073F3F3F}
 				value: add add add
 					shift/left value/1 18
 					shift/left value/2 12
 					shift/left value/3 6
 					value/4
 			]
 		][
 			throw-error/with "Unsupported or invalid UTF-8 encoding"
 		]
 		
 		either redbin [value][
 			encode-char to integer! value				;-- special encoding for Unicode char!
 		]
 	]
 	
 	decode-UTF8-string: func [str [string!] /local new s e][
 		new: make string! length? str
 		parse/all str [
 			some [
 				s: UTF8-char e: (
 					append new debase/base skip decode-UTF8-char as-binary copy/part s e 7 16
 				)
 			]
 		]
 		head change/part str new tail str
 	]
 	
 	encode-char: func [value [integer!]][
 		head insert to-hex value #"'"
 	]
 	
 	decode-hexa: func [s [string!]][
 		to integer! debase/base s 16
 	]
 	
 	as-time: func [h [integer!] m [integer!] s [integer! decimal!] neg? [logic!] /local t][
 		if any [all [h <> 0 m < 0] all [s s < 0]][type: time! throw-error]
 		t: to time! reduce [abs h abs m abs s]
 		either neg? [negate t][t]
 	]
 	
 	to-weekday: func [d [date!] wd [integer!]][
 		if negative? wd: wd - d/weekday [wd: 7 + wd]
 		d + wd
 	]
 	
 	to-iso-week: func [d [date!] w [integer!] /local wd d1][
 		d1: make date! reduce [1 1 d/year]
 		wd: d1/weekday
 		d1 + (w - 1 * 7 + (either wd < 5 [1][8]) - wd)
 	]
 	
 	load-tuple: func [s [string!] /local new byte p e][
 		new: join make issue! 1 + length? s #"~"
 		byte: [p: 1 3 digit e: (append new skip to-hex load copy/part p e 6)]
 		unless parse s [byte 2 11 [dot byte]][throw-error]
 		new
 	]

 	load-number: func [s [string!]][
 		switch/default type [
 			#[datatype! decimal!][s: load-decimal s]
 			#[datatype! issue!  ][
 				if s = "-0.0" [s: "0-"]					;-- re-encoded for consistency
 				s: to issue! either #"%" = last s [s][join "." s]
 				if neg? [append s #"-"]
 			]
 		][
 			unless find [integer! decimal!] type?/word s: to integer! s [throw-error]
 		]
 		s
 	]

 	load-decimal: func [s [string!]][
 		unless attempt [s: to decimal! s][throw-error]
 		s
 	]

 	load-string: func [s [string!] e [string!] /local new filter][
 		new: make string! offset? s e				;-- allocated size close to final size
 		filter: get pick [UTF8-char UTF8-filtered-char] s/-1 = #"{"

 		parse/all/case copy/part s e [
 			any [
 				escaped-char   (insert tail new value)
 				| s: filter e: (insert/part tail new s e)
 			]										;-- exit on matching " or }
 		]
 		new
 	]
 	
 	load-binary: func [s [string!] e [string!] base [integer!] /local new str][
 		new: make string! offset? s e				;-- allocated size above final size

 		parse/all/case s [
 			some [
 				copy str some base64-char (insert tail new str)
 				| ws | comment-rule
 				| #"}" end skip
 			]
 		]
 		new: debase/base new base
 		if none? new [throw-error]
 		new
 	]

 	load-file: func [s [string!]][
 		parse s [any [#"%" [2 hexa | (pos: skip pos negate 1 + length? s throw-error)] | skip]]
 		to file! replace/all dehex s #"\" #"/"
 	]
 	
 	encode-file: func [s [string!] e [string!]][
 		replace/all copy/part s back e "%" "%25"
 	]
 	
 	process: func [src [string! binary!] /local blk][
 		old-line: line: 1
 		count?: yes
 		blk: stack/allocate block! 100				;-- root block
 		
 		unless parse/all/case src program [throw-error]
 		stack/reset
 		blk
 	]
 ]
No results found