Last active
June 2, 2016 03:55
-
-
Save greggirwin/66d7c6892fc310097cd91ab354189542 to your computer and use it in GitHub Desktop.
An updated `split` function for Red
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
map-each: function [ | |
"Evaluates body for each value(s) in a series, returning all results." | |
'word [word! block!] "Word, or words, to set on each iteration" | |
data [series! map!] | |
body [block!] | |
] [ | |
collect [ | |
foreach :word data [ | |
if not unset? set/any 'tmp do body [keep/only :tmp] | |
] | |
] | |
] | |
collect: function [ | |
"Collect in a new block all the values passed to KEEP function from the body block" | |
body [block!] "Block to evaluate" | |
/into "Insert into a buffer instead (returns position after insert)" | |
collected [series!] "The buffer series (modified)" | |
][ | |
keep: func [v /only][either only [append/only collected v][append collected v]] | |
unless collected [collected: make block! 16] | |
parse body rule: [ ;-- selective binding (needs BIND/ONLY support) | |
any [pos: ['keep | 'collected] (pos/1: bind pos/1 'keep) | any-string! | into rule | skip] | |
] | |
do body | |
either into [collected][head collected] | |
] | |
split: function [ | |
"Split a series into pieces; fixed or variable size, fixed number, or at delimiters" | |
series [series!] "The series to split" | |
;!! If we support /at, dlm could be any-value. | |
dlm ;[block! integer! char! bitset! any-string! any-function!] "Split size, delimiter(s), predicate, or rule(s)." | |
/parts "If dlm is an integer, split into n pieces, rather than pieces of length n." | |
/at "Split into 2, at the index position if an integer or the first occurrence of the dlm" | |
][ | |
if any-function? :dlm [ | |
res: reduce [ copy [] copy [] ] | |
foreach value series [ | |
append/only pick res make logic! dlm :value :value | |
] | |
return res | |
] | |
if at [ | |
return reduce either integer? dlm [ | |
[ | |
copy/part series dlm | |
copy system/words/at series dlm + 1 | |
] | |
][ | |
;-- Without adding a /tail refinement, we don't know if they want | |
; to split at the head or tail of the delimiter, so we'll exclude | |
; the delimiter from the result entirely. They know what the dlm | |
; was that they passed in, so they can add it back to either side | |
; of the result if they want to. | |
[ | |
copy/part series find series :dlm | |
copy find/tail series :dlm | |
] | |
] | |
] | |
;print ['split 'parts? parts mold series mold dlm] | |
either all [block? dlm parse dlm [some integer!]][ | |
map-each len dlm [ | |
either positive? len [ | |
copy/part series series: skip series len | |
][ | |
series: skip series negate len | |
() ;-- return unset so that nothing is added to output | |
] | |
] | |
][ | |
size: dlm ;-- alias for readability | |
res: collect [ | |
;print ['split 'parts? parts mold series mold dlm newline] | |
parse series case [ | |
all [integer? dlm parts][ | |
if size < 1 [cause-error 'Script 'invalid-arg size] | |
count: size - 1 | |
piece-size: to integer! round/down divide length? series size | |
if zero? piece-size [piece-size: 1] | |
[ | |
count [copy series piece-size skip (keep/only series)] | |
copy series to end (keep/only series) | |
] | |
] | |
integer? dlm [ | |
if size < 1 [cause-error 'Script 'invalid-arg size] | |
[any [copy series 1 size skip (keep/only series)]] | |
] | |
'else [ ;-- = any [bitset? dlm any-string? dlm char? dlm] | |
[any [mk1: some [mk2: dlm break | skip] (keep/only copy/part mk1 mk2)]] | |
] | |
] | |
] | |
;-- Special processing, to handle cases where they spec'd more items in | |
; /parts than the series contains (so we want to append empty items), | |
; or where the dlm was a char/string/charset and it was the last char | |
; (so we want to append an empty field that the above rule misses). | |
fill-val: does [copy either any-block? series [ [] ][ "" ]] | |
add-fill-val: does [append/only res fill-val] | |
case [ | |
all [integer? size parts][ | |
;-- If the result is too short, i.e., less items than 'size, add | |
; empty items to fill it to 'size. | |
; We loop here, because insert/dup doesn't copy the value inserted. | |
if size > length? res [ | |
loop (size - length? res) [add-fill-val] | |
] | |
] | |
;-- integer? size | |
; If they spec'd an integer size, but did not use /parts, there is | |
; no special filing to be done. The final element may be less than | |
; size, which is intentional. | |
;-- | |
'else [ ;-- = any [bitset? dlm any-string? dlm char? dlm] | |
;-- If the last thing in the series is a delimiter, there is an | |
; implied empty field after it, which we add here. | |
case [ | |
bitset? dlm [ | |
;-- ATTEMPT is here because LAST will return NONE for an | |
; empty series, and finding none in a bitest is not allowed. | |
if attempt [find dlm last series][add-fill-val] | |
] | |
char? dlm [ | |
if dlm = last series [add-fill-val] | |
] | |
string? dlm [ | |
if all [ | |
find series dlm | |
empty? find/last/tail series dlm | |
][add-fill-val] | |
] | |
] | |
] | |
] | |
res | |
] | |
] | |
test: func [block expected-result /local res err] [ | |
if error? set/any 'err try [ | |
print [mold/only :block newline tab mold res: do block] | |
if res <> expected-result [print [tab 'FAILED! tab 'expected mold expected-result]] | |
][ | |
print [mold/only :block newline tab "ERROR!" mold err] | |
] | |
] | |
test [split "1234567812345678" 4] ["1234" "5678" "1234" "5678"] | |
test [split "1234567812345678" 3] ["123" "456" "781" "234" "567" "8"] | |
test [split "1234567812345678" 5] ["12345" "67812" "34567" "8"] | |
test [split/parts [1 2 3 4 5 6] 2] [[1 2 3] [4 5 6]] | |
test [split/parts "1234567812345678" 2] ["12345678" "12345678"] | |
test [split/parts "1234567812345678" 3] ["12345" "67812" "345678"] | |
test [split/parts "1234567812345678" 5] ["123" "456" "781" "234" "5678"] | |
; Dlm longer than series | |
test [split/parts "123" 6] ["1" "2" "3" "" "" ""] ;or ["1" "2" "3"] | |
test [split/parts [1 2 3] 6] [[1] [2] [3] [] [] []] ;or [1 2 3] | |
;test [split/parts [1 2 3] 6] [[1] [2] [3] none none none] ;or [1 2 3] | |
test [split [1 2 3 4 5 6] [2 1 3]] [[1 2] [3] [4 5 6]] | |
test [split "1234567812345678" [4 4 2 2 1 1 1 1]] ["1234" "5678" "12" "34" "5" "6" "7" "8"] | |
test [split first [(1 2 3 4 5 6 7 8 9)] 3] [(1 2 3) (4 5 6) (7 8 9)] | |
;!! Red doesn't have binary! yet | |
;test [split #{0102030405060708090A} [4 3 1 2]] [#{01020304} #{050607} #{08} #{090A}] | |
test [split [1 2 3 4 5 6] [2 1]] [[1 2] [3]] | |
test [split [1 2 3 4 5 6] [2 1 3 5]] [[1 2] [3] [4 5 6] []] | |
test [split [1 2 3 4 5 6] [2 1 6]] [[1 2] [3] [4 5 6]] | |
; Old design for negative skip vals | |
;test [split [1 2 3 4 5 6] [3 2 2 -2 2 -4 3]] [[1 2 3] [4 5] [6] [5 6] [3 4 5]] | |
; New design for negative skip vals | |
test [split [1 2 3 4 5 6] [2 -2 2]] [[1 2] [5 6]] | |
test [split "abc,de,fghi,jk" #","] ["abc" "de" "fghi" "jk"] | |
;!! Red doesn't have tag! yet | |
;test [split "abc<br>de<br>fghi<br>jk" <br>] ["abc" "de" "fghi" "jk"] | |
test [split "a.b.c" "."] ["a" "b" "c"] | |
test [split "c c" " "] ["c" "c"] | |
test [split "1,2,3" " "] ["1,2,3"] | |
test [split "1,2,3" ","] ["1" "2" "3"] | |
test [split "1,2,3," ","] ["1" "2" "3" ""] | |
test [split "1,2,3," charset ",."] ["1" "2" "3" ""] | |
test [split "1.2,3." charset ",."] ["1" "2" "3" ""] | |
test [split "-a-a" ["a"]] ["-" "-"] | |
test [split "-a-a'" ["a"]] ["-" "-" "'"] | |
;------------------------------------------------------------------------------- | |
; to/thru bitset! is broken in R3 now. | |
test [split "abc|de/fghi:jk" charset "|/:"] ["abc" "de" "fghi" "jk"] | |
; to/thru block! is broken in R3 now. | |
test [split "abc^M^Jde^Mfghi^Jjk" [crlf | #"^M" | newline]] ["abc" "de" "fghi" "jk"] | |
test [split "abc de fghi jk" [some #" "]] ["abc" "de" "fghi" "jk"] | |
;------------------------------------------------------------------------------- | |
test [split [1 2 3 4 5 6] :even?] [[2 4 6] [1 3 5]] | |
test [split [1 2 3 4 5 6] :odd?] [[1 3 5] [2 4 6]] | |
test [split [1 2.3 /a word "str" #iss x: :y] :refinement?] [[/a] [1 2.3 word "str" #iss x: :y]] | |
test [split [1 2.3 /a word "str" #iss x: :y] :number?] [[1 2.3] [/a word "str" #iss x: :y]] | |
test [split [1 2.3 /a word "str" #iss x: :y] :any-word?] [[/a word #iss x: :y] [1 2.3 "str"]] | |
;------------------------------------------------------------------------------- | |
test [split/at [1 2.3 /a word "str" #iss x: :y] 4] [[1 2.3 /a word] ["str" #iss x: :y]] | |
;!! Splitting /at with a non-integer excludes the delimiter from the result | |
test [split/at [1 2.3 /a word "str" #iss x: :y] "str"] [[1 2.3 /a word] [#iss x: :y]] | |
test [split/at [1 2.3 /a word "str" #iss x: :y] 'word] [[1 2.3 /a] ["str" #iss x: :y]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment