Created
January 23, 2013 00:05
-
-
Save bizenn/4600197 to your computer and use it in GitHub Desktop.
Patch to add limit optional parameter to string-split on Gauche. Sorry but not including document updates.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/gauche/stringutil.scm b/lib/gauche/stringutil.scm | |
index d252322..388eb88 100644 | |
--- a/lib/gauche/stringutil.scm | |
+++ b/lib/gauche/stringutil.scm | |
@@ -44,12 +44,20 @@ | |
;; Generic string-split | |
;; splitter can be a character, a char-set, a string, or a regexp. | |
-(define (string-split string splitter) | |
- (cond ((char? splitter) (%string-split-by-char string splitter)) | |
- ((and (string? splitter) (= (string-length splitter) 1)) | |
- (%string-split-by-char string (string-ref splitter 0))) | |
- (else (%string-split string (%string-split-scanner splitter))) | |
- )) | |
+(define (string-split string splitter :optional limit) | |
+ (let* ((limit_ (cond ((undefined? limit) (string-length string)) | |
+ ((< limit 0) (error "positive exact integer or 0 required for limit, byt got" limit)) | |
+ (else limit))) | |
+ ;; In order to build Gauche itself by old Gauche. | |
+ (%%string-split-by-char | |
+ (if (undefined? limit) | |
+ (cut %string-split-by-char <> <>) | |
+ (cut %string-split-by-char <> <> limit_) | |
+ ))) | |
+ (cond ((char? splitter) (%%string-split-by-char string splitter)) | |
+ ((and (string? splitter) (= (string-length splitter) 1)) | |
+ (%%string-split-by-char string (string-ref splitter 0))) | |
+ (else (%string-split string (%string-split-scanner splitter) limit_))))) | |
;; aux fns | |
(define (%string-split-scanner splitter) | |
@@ -94,11 +102,15 @@ | |
[else (values before (string-pointer-substring p :after #t))]))) | |
(lambda (s) (scan-in (make-string-pointer s)))) | |
-(define (%string-split string scanner) | |
- (let loop ((s string) | |
- (r '())) | |
- (receive (before after) (scanner s) | |
- (if after | |
- (loop after (cons before r)) | |
- (reverse! (cons before r)))))) | |
- | |
+(define (%string-split string scanner limit) | |
+ (if (= 0 limit) | |
+ `(,string) | |
+ (let loop ((s string) | |
+ (count limit) | |
+ (r '())) | |
+ (if (< count 1) | |
+ (reverse! (cons s r)) | |
+ (receive (before after) (scanner s) | |
+ (if after | |
+ (loop after (- count 1) (cons before r)) | |
+ (reverse! (cons before r)))))))) | |
diff --git a/src/gauche/string.h b/src/gauche/string.h | |
index 374f688..855ebd1 100644 | |
--- a/src/gauche/string.h | |
+++ b/src/gauche/string.h | |
@@ -231,7 +231,7 @@ enum { | |
* Searching | |
*/ | |
-SCM_EXTERN ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch); | |
+SCM_EXTERN ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch, ScmObj limit); | |
SCM_EXTERN ScmObj Scm_StringScan(ScmString *s1, ScmString *s2, int retmode); | |
SCM_EXTERN ScmObj Scm_StringScanChar(ScmString *s1, ScmChar ch, int retmode); | |
SCM_EXTERN ScmObj Scm_StringScanRight(ScmString *s1, ScmString *s2, int retmode); | |
diff --git a/src/libstr.scm b/src/libstr.scm | |
index b30bca4..193ab30 100644 | |
--- a/src/libstr.scm | |
+++ b/src/libstr.scm | |
@@ -109,7 +109,7 @@ | |
(select-module gauche.internal) | |
;; see lib/gauche/string for generic string-split | |
-(define-cproc %string-split-by-char (s::<string> ch::<char>) | |
+(define-cproc %string-split-by-char (s::<string> ch::<char> :optional limit) | |
Scm_StringSplitByChar) | |
(define-cproc %maybe-substring (str::<string> :optional start end) | |
diff --git a/src/load.c b/src/load.c | |
index b3fb37b..f35d823 100644 | |
--- a/src/load.c | |
+++ b/src/load.c | |
@@ -421,7 +421,7 @@ static ScmObj break_env_paths(const char *envname) | |
return SCM_NIL; | |
} else { | |
return Scm_StringSplitByChar(SCM_STRING(SCM_MAKE_STR_COPYING(e)), | |
- delim); | |
+ delim, SCM_UNBOUND); | |
} | |
} | |
diff --git a/src/main.c b/src/main.c | |
index 6ceae4c..0e197b7 100644 | |
--- a/src/main.c | |
+++ b/src/main.c | |
@@ -489,7 +489,7 @@ int main(int argc, char **argv) | |
break; | |
case 'u': | |
if (Scm_Require(Scm_StringJoin(Scm_StringSplitByChar(SCM_STRING(v), | |
- '.'), | |
+ '.', SCM_UNBOUND), | |
SCM_STRING(SCM_MAKE_STR("/")), | |
SCM_STRING_JOIN_INFIX), | |
0, &lpak) < 0) { | |
diff --git a/src/string.c b/src/string.c | |
index ed73ac3..fa36feb 100644 | |
--- a/src/string.c | |
+++ b/src/string.c | |
@@ -852,7 +852,7 @@ ScmObj Scm_MaybeSubstring(ScmString *x, ScmObj start, ScmObj end) | |
/* Split string by char. Char itself is not included in the result. */ | |
/* TODO: fix semantics. What should be returned for (string-split "" #\.)? */ | |
-ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch) | |
+ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch, ScmObj limit) | |
{ | |
const ScmStringBody *strb = SCM_STRING_BODY(str); | |
int size = SCM_STRING_BODY_SIZE(strb), sizecnt = 0; | |
@@ -860,6 +860,12 @@ ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch) | |
const char *s = SCM_STRING_BODY_START(strb), *p = s, *e = s + size; | |
ScmObj head = SCM_NIL, tail = SCM_NIL; | |
+ int ilimit; | |
+ if (SCM_UNBOUNDP(limit)) | |
+ ilimit = SCM_STRING_BODY_LENGTH(strb); | |
+ else | |
+ ilimit = SCM_INT_VALUE(limit); | |
+ | |
if (SCM_STRING_BODY_INCOMPLETE_P(strb)) { | |
/* TODO: fix the policy of handling incomplete string */ | |
Scm_Error("incomplete string not accepted: %S", str); | |
@@ -871,11 +877,12 @@ ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch) | |
SCM_CHAR_GET(p, cc); | |
ncc = SCM_CHAR_NBYTES(cc); | |
- if (ch == cc) { | |
+ if ((ilimit > 0) && (ch == cc)) { | |
SCM_APPEND1(head, tail, Scm_MakeString(s, sizecnt, lencnt, 0)); | |
sizecnt = lencnt = 0; | |
p += ncc; | |
s = p; | |
+ --ilimit; | |
} else { | |
p += ncc; | |
sizecnt += ncc; | |
diff --git a/test/string.scm b/test/string.scm | |
index de48ff3..73b8ebe 100644 | |
--- a/test/string.scm | |
+++ b/test/string.scm | |
@@ -208,6 +208,99 @@ | |
(test* "string-split (predicate)" '("" "---" "***" "&" "") | |
(string-split "aa---bbb***c&d" char-alphabetic?)) | |
+(test-section "string-split(with limit)") | |
+ | |
+(test* "string-split (char)" '("aa*bbb*c**") | |
+ (string-split "aa*bbb*c**" #\* 0)) | |
+(test* "string-split (char)" '("aa" "bbb*c**") | |
+ (string-split "aa*bbb*c**" #\* 1)) | |
+(test* "string-split (char)" '("aa" "bbb" "c**") | |
+ (string-split "aa*bbb*c**" #\* 2)) | |
+ | |
+(test* "string-split (char)" '("aa") | |
+ (string-split "aa" #\* 1)) | |
+(test* "string-split (char)" '("") | |
+ (string-split "" #\* 2)) | |
+(test* "string-split (char)" '("" "") | |
+ (string-split "*" #\* 3)) | |
+(test* "string-split (char)" '("*") | |
+ (string-split "*" #\* 0)) | |
+ | |
+(test* "string-split (1-char string)" '("aa" "bbb*c") | |
+ (string-split "aa*bbb*c" "*" 1)) | |
+(test* "string-split (1-char string)" '("aa" "bbb" "c") | |
+ (string-split "aa*bbb*c" "*" 2)) | |
+ | |
+(test* "string-split (string)" '("aa**bbb**c*c") | |
+ (string-split "aa**bbb**c*c" "**" 0)) | |
+(test* "string-split (string)" '("aa" "bbb**c*c") | |
+ (string-split "aa**bbb**c*c" "**" 1)) | |
+(test* "string-split (string)" '("aa" "bbb" "c*c") | |
+ (string-split "aa**bbb**c*c" "**" 2)) | |
+(test* "string-split (string)" '("aa" "bbb" "c*c") | |
+ (string-split "aa**bbb**c*c" "**" 3)) | |
+(test* "string-split (string)" '("aa**bbb**c*c") | |
+ (string-split "aa**bbb**c*c" "--" 2)) | |
+(test* "string-split (string)" '("aa**bbb**c*c**") | |
+ (string-split "aa**bbb**c*c**" "**" 0)) | |
+(test* "string-split (string)" '("aa" "bbb**c*c**") | |
+ (string-split "aa**bbb**c*c**" "**" 1)) | |
+(test* "string-split (string)" '("aa" "bbb" "c*c**") | |
+ (string-split "aa**bbb**c*c**" "**" 2)) | |
+(test* "string-split (string)" '("aa" "bbb" "c*c" "") | |
+ (string-split "aa**bbb**c*c**" "**" 3)) | |
+(test* "string-split (string)" '("") | |
+ (string-split "" "**" 0)) | |
+(test* "string-split (string)" '("") | |
+ (string-split "" "**" 1)) | |
+(test* "string-split (string)" '("**") | |
+ (string-split "**" "**" 0)) | |
+(test* "string-split (string)" '("" "") | |
+ (string-split "**" "**" 1)) | |
+ | |
+(test* "string-split (regexp)" '("aa--bbb--c-c") | |
+ (string-split "aa--bbb--c-c" #/-+/ 0)) | |
+(test* "string-split (regexp)" '("aa" "bbb--c-c") | |
+ (string-split "aa--bbb--c-c" #/-+/ 1)) | |
+(test* "string-split (regexp)" '("aa" "bbb" "c-c") | |
+ (string-split "aa--bbb--c-c" #/-+/ 2)) | |
+(test* "string-split (regexp)" '("aa" "bbb" "c" "c") | |
+ (string-split "aa--bbb--c-c" #/-+/ 3)) | |
+(test* "string-split (regexp)" '("aa" "bbb" "c" "c") | |
+ (string-split "aa--bbb--c-c" #/-+/ 4)) | |
+(test* "string-split (regexp)" '("aa--bbb---c-c") | |
+ (string-split "aa--bbb---c-c" #/--/ 0)) | |
+(test* "string-split (regexp)" '("aa" "bbb---c-c") | |
+ (string-split "aa--bbb---c-c" #/--/ 1)) | |
+(test* "string-split (regexp)" '("aa" "bbb" "-c-c") | |
+ (string-split "aa--bbb---c-c" #/--/ 2)) | |
+(test* "string-split (regexp)" '("aa" "bbb" "-c-c") | |
+ (string-split "aa--bbb---c-c" #/--/ 3)) | |
+(test* "string-split (regexp)" '("--aa--bbb---c-c-") | |
+ (string-split "--aa--bbb---c-c-" #/-+/ 0)) | |
+(test* "string-split (regexp)" '("" "aa--bbb---c-c-") | |
+ (string-split "--aa--bbb---c-c-" #/-+/ 1)) | |
+(test* "string-split (regexp)" '("" "aa" "bbb---c-c-") | |
+ (string-split "--aa--bbb---c-c-" #/-+/ 2)) | |
+(test* "string-split (regexp)" '("" "aa" "bbb" "c-c-") | |
+ (string-split "--aa--bbb---c-c-" #/-+/ 3)) | |
+(test* "string-split (regexp)" '("" "aa" "bbb" "c" "c-") | |
+ (string-split "--aa--bbb---c-c-" #/-+/ 4)) | |
+(test* "string-split (regexp)" '("--aa--bbb---c-c-") | |
+ (string-split "--aa--bbb---c-c-" #/\w+/ 0)) | |
+(test* "string-split (regexp)" '("--" "--bbb---c-c-") | |
+ (string-split "--aa--bbb---c-c-" #/\w+/ 1)) | |
+(test* "string-split (regexp)" '("--" "--" "---c-c-") | |
+ (string-split "--aa--bbb---c-c-" #/\w+/ 2)) | |
+(test* "string-split (regexp)" '("--" "--" "---" "-c-") | |
+ (string-split "--aa--bbb---c-c-" #/\w+/ 3)) | |
+(test* "string-split (regexp)" '("--aa--bbb---c-c-") | |
+ (string-split "--aa--bbb---c-c-" #/z+/ 2)) | |
+(test* "string-split (regexp)" (test-error) ;; test detection of infinite loop | |
+ (string-split "--aa--bbb---c-c-" #/-*/ 2)) | |
+(test* "string-split (negative limit)" (test-error) | |
+ (string-split "--aa--bbb---c-c-" #/-+/ -1)) | |
+ | |
;;------------------------------------------------------------------- | |
(test-section "incomplete strings") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment