Skip to content

Instantly share code, notes, and snippets.

@shirok
Last active April 12, 2016 14:04
Show Gist options
  • Save shirok/13d71397348d8966cd84d4272897f307 to your computer and use it in GitHub Desktop.
Save shirok/13d71397348d8966cd84d4272897f307 to your computer and use it in GitHub Desktop.
;; begin reference implementation
(define (string-split s delimiter . args)
;; The argument checking part might be refactored with other srfi-130
;; routines.
(unless (string? s) (error "string expected" s))
(unless (string? delimiter) (error "string expected" delimiter))
(let ((slen (string-length s)))
(receive (grammar limit no-limit start end)
(if (pair? args)
(if (pair? (cdr args))
(if (pair? (cddr args))
(if (pair? (cdddr args))
(values (car args) (cadr args) #f (caddr args) (cadddr args))
(values (car args) (cadr args) #f (caddr args) slen))
(values (car args) (cadr args) #f 0 slen))
(values (car args) #f #t 0 slen))
(values 'infix #f #t 0 slen))
(unless (memq grammar '(infix strict-infix prefix suffix))
(error "grammar must be one of (infix strict-infix prefix suffix)" grammar))
(unless (or no-limit
(and (exact? limit) (integer? limit) (>= limit 0)))
(error "limit must be exact nonnegative integer" limit))
(unless (and (exact? start) (integer? start))
(error "start argument must be exact integer" start))
(unless (<= 0 start slen)
(error "start argument out of range" start))
(unless (<= 0 end slen)
(error "end argument out of range" end))
(unless (<= start end)
(error "start argument is greater than end argument" (list start end)))
(cond ((string-cursor=? s start end)
(if (eq? grammar 'strict-infix)
(error "empty string cannot be spilt with strict-infix grammar")
'()))
((string-null? delimiter)
(%string-split-chars s start end limit))
(else (%string-split s start end delimiter grammar limit))))))
(define (%string-split-chars s start end limit)
(if (not limit)
(map string (string->list/cursors s start end))
(let loop ((r '()) (c start) (n 0))
(cond ((string-cursor=? s c end) (reverse r))
((>= n limit) (reverse (cons (substring s c end) r)))
(else (loop (cons (string (string-ref s c)) r)
(string-cursor-forward s c 1)
(+ n 1)))))))
(define (%string-split s start end delimiter grammar limit)
(let ((dlen (string-length delimiter)))
(define (finish r c)
(let ((rest (substring/cursors s c end)))
(if (and (eq? grammar 'suffix) (string-null? rest))
(reverse r)
(reverse (cons rest r)))))
(define (scan r c n)
(if (and limit (>= n limit))
(finish r c)
(let ((i (string-contains s delimiter c end)))
(if i
(let ((fragment (substring/cursors s c i)))
(if (and (= n 0) (eq? grammar 'prefix) (string-null? fragment))
(scan r (string-cursor-forward s i dlen) (+ n 1))
(scan (cons fragment r)
(string-cursor-forward s i dlen)
(+ n 1))))
(finish r c)))))
(scan '() start 0)))
;; end reference implementation
;; begin gauche-specific stuff
(use srfi-13)
(define substring/cursors substring)
(define string->list/cursors string->list)
(define (string-cursor=? s c1 c2) (= c1 c2))
(define (string-cursor-forward s cursor nchars)
(when (> (+ cursor nchars) (string-length s)) (error "cursor overrun"))
(+ cursor nchars))
(use gauche.test)
(use compat.chibi-test)
(test-start "string-split")
(chibi-test
;; end gauche-specific stuff
(test '() (string-split "" ":=:"))
(test '("" "") (string-split ":=:" ":=:"))
(test '("abc") (string-split "abc" ":=:"))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:"))
(test '("abc" "" "ghi") (string-split "abc:=::=:ghi" ":=:"))
(test '("abc" "=:ghi") (string-split "abc:=:=:ghi" ":=:"))
(test '("abc" "def" "") (string-split "abc:=:def:=:" ":=:"))
(test '("" "def" "ghi") (string-split ":=:def:=:ghi" ":=:"))
(test '("abc:=:def:=:ghi") (string-split "abc:=:def:=:ghi" ":=:" 'infix 0))
(test '("abc" "def:=:ghi") (string-split "abc:=:def:=:ghi" ":=:" 'infix 1))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'infix 2))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'infix 3))
(test-error (string-split "" ":=:" 'strict-infix))
(test '("" "") (string-split ":=:" ":=:" 'strict-infix))
(test '("abc") (string-split "abc" ":=:" 'strict-infix))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'strict-infix))
(test '("abc" "" "ghi") (string-split "abc:=::=:ghi" ":=:" 'strict-infix))
(test '("abc" "=:ghi") (string-split "abc:=:=:ghi" ":=:" 'strict-infix))
(test '("abc" "def" "") (string-split "abc:=:def:=:" ":=:" 'strict-infix))
(test '("" "def" "ghi") (string-split ":=:def:=:ghi" ":=:" 'strict-infix))
(test '() (string-split "" ":=:" 'prefix))
(test '("") (string-split ":=:" ":=:" 'prefix))
(test '("abc") (string-split "abc" ":=:" 'prefix))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'prefix))
(test '("abc" "" "ghi") (string-split "abc:=::=:ghi" ":=:" 'prefix))
(test '("abc" "=:ghi") (string-split "abc:=:=:ghi" ":=:" 'prefix))
(test '("abc" "def" "") (string-split "abc:=:def:=:" ":=:" 'prefix))
(test '("def" "ghi") (string-split ":=:def:=:ghi" ":=:" 'prefix))
(test '("abc:=:def:=:ghi") (string-split "abc:=:def:=:ghi" ":=:" 'prefix 0))
(test '("abc" "def:=:ghi") (string-split "abc:=:def:=:ghi" ":=:" 'prefix 1))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'prefix 2))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'prefix 3))
(test '(":=:def:=:ghi") (string-split ":=:def:=:ghi" ":=:" 'prefix 0))
(test '("def:=:ghi") (string-split ":=:def:=:ghi" ":=:" 'prefix 1))
(test '("def" "ghi") (string-split ":=:def:=:ghi" ":=:" 'prefix 2))
(test '() (string-split "" ":=:" 'suffix))
(test '("") (string-split ":=:" ":=:" 'suffix))
(test '("abc") (string-split "abc" ":=:" 'suffix))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'suffix))
(test '("abc" "" "ghi") (string-split "abc:=::=:ghi" ":=:" 'suffix))
(test '("abc" "=:ghi") (string-split "abc:=:=:ghi" ":=:" 'suffix))
(test '("abc" "def") (string-split "abc:=:def:=:" ":=:" 'suffix))
(test '("" "def" "ghi") (string-split ":=:def:=:ghi" ":=:" 'suffix))
(test '("abc:=:def:=:ghi") (string-split "abc:=:def:=:ghi" ":=:" 'suffix 0))
(test '("abc" "def:=:ghi") (string-split "abc:=:def:=:ghi" ":=:" 'suffix 1))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'suffix 2))
(test '("abc" "def" "ghi") (string-split "abc:=:def:=:ghi" ":=:" 'suffix 3))
(test '("abc:=:def:=:") (string-split "abc:=:def:=:" ":=:" 'suffix 0))
(test '("abc" "def:=:") (string-split "abc:=:def:=:" ":=:" 'suffix 1))
(test '("abc" "def") (string-split "abc:=:def:=:" ":=:" 'suffix 2))
;; empty delimiter
(test '() (string-split "" "")) ;**
(test '("a" "b" "c" "d" "e") (string-split "abcde" ""))
(test '("abcde") (string-split "abcde" "" 'infix 0))
(test '("a" "bcde") (string-split "abcde" "" 'infix 1))
(test '("a" "b" "cde") (string-split "abcde" "" 'infix 2))
(test '("a" "b" "c" "de") (string-split "abcde" "" 'infix 3))
(test '("a" "b" "c" "d" "e") (string-split "abcde" "" 'infix 4))
(test '("a" "b" "c" "d" "e") (string-split "abcde" "" 'infix 5))
(test '("a" "b" "c" "d" "e") (string-split "abcde" "" 'infix 6))
;; bounded
(test '("d" "e" "f") (string-split "a/b/c/d/e/f/g/h/i" "/" 'infix 10
6 11))
(test '("d" "e/f") (string-split "a/b/c/d/e/f/g/h/i" "/" 'infix 1
6 11))
(test '("" "d" "e" "f" "") (string-split "a/b/c/d/e/f/g/h/i" "/" 'infix 10
5 12))
(test '("" "d" "e/f/") (string-split "a/b/c/d/e/f/g/h/i" "/" 'infix 2
5 12))
(test '() (string-split "a/b/c" "/" 'infix 10 0 0))
(test '() (string-split "a/b/c" "/" 'infix 10 5 5))
;; error conditions
(test-error (string-split "abc" "def" 'foo))
(test-error (string-split "abc" "def" 'infix -1))
(test-error (string-split "abc" "def" 'infix 10 5 8))
(test-error (string-split "abc" "def" 'infix 10 0 8))
(test-error (string-split "abc" "def" 'infix 10 2 1))
;; begin gauche-specific stuff
)
(test-end)
;; end gauche-specific stuff
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment