Skip to content

Instantly share code, notes, and snippets.

@bizenn
Created January 23, 2013 00:05
Show Gist options
  • Save bizenn/4600197 to your computer and use it in GitHub Desktop.
Save bizenn/4600197 to your computer and use it in GitHub Desktop.
Patch to add limit optional parameter to string-split on Gauche. Sorry but not including document updates.
diff --git a/lib/gauche/stringutil.scm b/lib/gauche/stringutil.scm
index d252322..388eb88 100644
--- a/lib/gauche/stringutil.scm
+++ b/lib/gauche/stringutil.scm
@@ -44,12 +44,20 @@
;; Generic string-split
;; splitter can be a character, a char-set, a string, or a regexp.
-(define (string-split string splitter)
- (cond ((char? splitter) (%string-split-by-char string splitter))
- ((and (string? splitter) (= (string-length splitter) 1))
- (%string-split-by-char string (string-ref splitter 0)))
- (else (%string-split string (%string-split-scanner splitter)))
- ))
+(define (string-split string splitter :optional limit)
+ (let* ((limit_ (cond ((undefined? limit) (string-length string))
+ ((< limit 0) (error "positive exact integer or 0 required for limit, byt got" limit))
+ (else limit)))
+ ;; In order to build Gauche itself by old Gauche.
+ (%%string-split-by-char
+ (if (undefined? limit)
+ (cut %string-split-by-char <> <>)
+ (cut %string-split-by-char <> <> limit_)
+ )))
+ (cond ((char? splitter) (%%string-split-by-char string splitter))
+ ((and (string? splitter) (= (string-length splitter) 1))
+ (%%string-split-by-char string (string-ref splitter 0)))
+ (else (%string-split string (%string-split-scanner splitter) limit_)))))
;; aux fns
(define (%string-split-scanner splitter)
@@ -94,11 +102,15 @@
[else (values before (string-pointer-substring p :after #t))])))
(lambda (s) (scan-in (make-string-pointer s))))
-(define (%string-split string scanner)
- (let loop ((s string)
- (r '()))
- (receive (before after) (scanner s)
- (if after
- (loop after (cons before r))
- (reverse! (cons before r))))))
-
+(define (%string-split string scanner limit)
+ (if (= 0 limit)
+ `(,string)
+ (let loop ((s string)
+ (count limit)
+ (r '()))
+ (if (< count 1)
+ (reverse! (cons s r))
+ (receive (before after) (scanner s)
+ (if after
+ (loop after (- count 1) (cons before r))
+ (reverse! (cons before r))))))))
diff --git a/src/gauche/string.h b/src/gauche/string.h
index 374f688..855ebd1 100644
--- a/src/gauche/string.h
+++ b/src/gauche/string.h
@@ -231,7 +231,7 @@ enum {
* Searching
*/
-SCM_EXTERN ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch);
+SCM_EXTERN ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch, ScmObj limit);
SCM_EXTERN ScmObj Scm_StringScan(ScmString *s1, ScmString *s2, int retmode);
SCM_EXTERN ScmObj Scm_StringScanChar(ScmString *s1, ScmChar ch, int retmode);
SCM_EXTERN ScmObj Scm_StringScanRight(ScmString *s1, ScmString *s2, int retmode);
diff --git a/src/libstr.scm b/src/libstr.scm
index b30bca4..193ab30 100644
--- a/src/libstr.scm
+++ b/src/libstr.scm
@@ -109,7 +109,7 @@
(select-module gauche.internal)
;; see lib/gauche/string for generic string-split
-(define-cproc %string-split-by-char (s::<string> ch::<char>)
+(define-cproc %string-split-by-char (s::<string> ch::<char> :optional limit)
Scm_StringSplitByChar)
(define-cproc %maybe-substring (str::<string> :optional start end)
diff --git a/src/load.c b/src/load.c
index b3fb37b..f35d823 100644
--- a/src/load.c
+++ b/src/load.c
@@ -421,7 +421,7 @@ static ScmObj break_env_paths(const char *envname)
return SCM_NIL;
} else {
return Scm_StringSplitByChar(SCM_STRING(SCM_MAKE_STR_COPYING(e)),
- delim);
+ delim, SCM_UNBOUND);
}
}
diff --git a/src/main.c b/src/main.c
index 6ceae4c..0e197b7 100644
--- a/src/main.c
+++ b/src/main.c
@@ -489,7 +489,7 @@ int main(int argc, char **argv)
break;
case 'u':
if (Scm_Require(Scm_StringJoin(Scm_StringSplitByChar(SCM_STRING(v),
- '.'),
+ '.', SCM_UNBOUND),
SCM_STRING(SCM_MAKE_STR("/")),
SCM_STRING_JOIN_INFIX),
0, &lpak) < 0) {
diff --git a/src/string.c b/src/string.c
index ed73ac3..fa36feb 100644
--- a/src/string.c
+++ b/src/string.c
@@ -852,7 +852,7 @@ ScmObj Scm_MaybeSubstring(ScmString *x, ScmObj start, ScmObj end)
/* Split string by char. Char itself is not included in the result. */
/* TODO: fix semantics. What should be returned for (string-split "" #\.)? */
-ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch)
+ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch, ScmObj limit)
{
const ScmStringBody *strb = SCM_STRING_BODY(str);
int size = SCM_STRING_BODY_SIZE(strb), sizecnt = 0;
@@ -860,6 +860,12 @@ ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch)
const char *s = SCM_STRING_BODY_START(strb), *p = s, *e = s + size;
ScmObj head = SCM_NIL, tail = SCM_NIL;
+ int ilimit;
+ if (SCM_UNBOUNDP(limit))
+ ilimit = SCM_STRING_BODY_LENGTH(strb);
+ else
+ ilimit = SCM_INT_VALUE(limit);
+
if (SCM_STRING_BODY_INCOMPLETE_P(strb)) {
/* TODO: fix the policy of handling incomplete string */
Scm_Error("incomplete string not accepted: %S", str);
@@ -871,11 +877,12 @@ ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch)
SCM_CHAR_GET(p, cc);
ncc = SCM_CHAR_NBYTES(cc);
- if (ch == cc) {
+ if ((ilimit > 0) && (ch == cc)) {
SCM_APPEND1(head, tail, Scm_MakeString(s, sizecnt, lencnt, 0));
sizecnt = lencnt = 0;
p += ncc;
s = p;
+ --ilimit;
} else {
p += ncc;
sizecnt += ncc;
diff --git a/test/string.scm b/test/string.scm
index de48ff3..73b8ebe 100644
--- a/test/string.scm
+++ b/test/string.scm
@@ -208,6 +208,99 @@
(test* "string-split (predicate)" '("" "---" "***" "&" "")
(string-split "aa---bbb***c&d" char-alphabetic?))
+(test-section "string-split(with limit)")
+
+(test* "string-split (char)" '("aa*bbb*c**")
+ (string-split "aa*bbb*c**" #\* 0))
+(test* "string-split (char)" '("aa" "bbb*c**")
+ (string-split "aa*bbb*c**" #\* 1))
+(test* "string-split (char)" '("aa" "bbb" "c**")
+ (string-split "aa*bbb*c**" #\* 2))
+
+(test* "string-split (char)" '("aa")
+ (string-split "aa" #\* 1))
+(test* "string-split (char)" '("")
+ (string-split "" #\* 2))
+(test* "string-split (char)" '("" "")
+ (string-split "*" #\* 3))
+(test* "string-split (char)" '("*")
+ (string-split "*" #\* 0))
+
+(test* "string-split (1-char string)" '("aa" "bbb*c")
+ (string-split "aa*bbb*c" "*" 1))
+(test* "string-split (1-char string)" '("aa" "bbb" "c")
+ (string-split "aa*bbb*c" "*" 2))
+
+(test* "string-split (string)" '("aa**bbb**c*c")
+ (string-split "aa**bbb**c*c" "**" 0))
+(test* "string-split (string)" '("aa" "bbb**c*c")
+ (string-split "aa**bbb**c*c" "**" 1))
+(test* "string-split (string)" '("aa" "bbb" "c*c")
+ (string-split "aa**bbb**c*c" "**" 2))
+(test* "string-split (string)" '("aa" "bbb" "c*c")
+ (string-split "aa**bbb**c*c" "**" 3))
+(test* "string-split (string)" '("aa**bbb**c*c")
+ (string-split "aa**bbb**c*c" "--" 2))
+(test* "string-split (string)" '("aa**bbb**c*c**")
+ (string-split "aa**bbb**c*c**" "**" 0))
+(test* "string-split (string)" '("aa" "bbb**c*c**")
+ (string-split "aa**bbb**c*c**" "**" 1))
+(test* "string-split (string)" '("aa" "bbb" "c*c**")
+ (string-split "aa**bbb**c*c**" "**" 2))
+(test* "string-split (string)" '("aa" "bbb" "c*c" "")
+ (string-split "aa**bbb**c*c**" "**" 3))
+(test* "string-split (string)" '("")
+ (string-split "" "**" 0))
+(test* "string-split (string)" '("")
+ (string-split "" "**" 1))
+(test* "string-split (string)" '("**")
+ (string-split "**" "**" 0))
+(test* "string-split (string)" '("" "")
+ (string-split "**" "**" 1))
+
+(test* "string-split (regexp)" '("aa--bbb--c-c")
+ (string-split "aa--bbb--c-c" #/-+/ 0))
+(test* "string-split (regexp)" '("aa" "bbb--c-c")
+ (string-split "aa--bbb--c-c" #/-+/ 1))
+(test* "string-split (regexp)" '("aa" "bbb" "c-c")
+ (string-split "aa--bbb--c-c" #/-+/ 2))
+(test* "string-split (regexp)" '("aa" "bbb" "c" "c")
+ (string-split "aa--bbb--c-c" #/-+/ 3))
+(test* "string-split (regexp)" '("aa" "bbb" "c" "c")
+ (string-split "aa--bbb--c-c" #/-+/ 4))
+(test* "string-split (regexp)" '("aa--bbb---c-c")
+ (string-split "aa--bbb---c-c" #/--/ 0))
+(test* "string-split (regexp)" '("aa" "bbb---c-c")
+ (string-split "aa--bbb---c-c" #/--/ 1))
+(test* "string-split (regexp)" '("aa" "bbb" "-c-c")
+ (string-split "aa--bbb---c-c" #/--/ 2))
+(test* "string-split (regexp)" '("aa" "bbb" "-c-c")
+ (string-split "aa--bbb---c-c" #/--/ 3))
+(test* "string-split (regexp)" '("--aa--bbb---c-c-")
+ (string-split "--aa--bbb---c-c-" #/-+/ 0))
+(test* "string-split (regexp)" '("" "aa--bbb---c-c-")
+ (string-split "--aa--bbb---c-c-" #/-+/ 1))
+(test* "string-split (regexp)" '("" "aa" "bbb---c-c-")
+ (string-split "--aa--bbb---c-c-" #/-+/ 2))
+(test* "string-split (regexp)" '("" "aa" "bbb" "c-c-")
+ (string-split "--aa--bbb---c-c-" #/-+/ 3))
+(test* "string-split (regexp)" '("" "aa" "bbb" "c" "c-")
+ (string-split "--aa--bbb---c-c-" #/-+/ 4))
+(test* "string-split (regexp)" '("--aa--bbb---c-c-")
+ (string-split "--aa--bbb---c-c-" #/\w+/ 0))
+(test* "string-split (regexp)" '("--" "--bbb---c-c-")
+ (string-split "--aa--bbb---c-c-" #/\w+/ 1))
+(test* "string-split (regexp)" '("--" "--" "---c-c-")
+ (string-split "--aa--bbb---c-c-" #/\w+/ 2))
+(test* "string-split (regexp)" '("--" "--" "---" "-c-")
+ (string-split "--aa--bbb---c-c-" #/\w+/ 3))
+(test* "string-split (regexp)" '("--aa--bbb---c-c-")
+ (string-split "--aa--bbb---c-c-" #/z+/ 2))
+(test* "string-split (regexp)" (test-error) ;; test detection of infinite loop
+ (string-split "--aa--bbb---c-c-" #/-*/ 2))
+(test* "string-split (negative limit)" (test-error)
+ (string-split "--aa--bbb---c-c-" #/-+/ -1))
+
;;-------------------------------------------------------------------
(test-section "incomplete strings")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment