Created
November 4, 2010 17:12
-
-
Save afiedler/662786 to your computer and use it in GitHub Desktop.
C string tokenizer that returns empty strings for adjacent delimiters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TOKENIZE -- demonstrates a way to tokenize a string, with adjacent delimiters | |
* returned as empty strings. | |
* Partially based on: http://stackoverflow.com/questions/874161 | |
*/ | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdio.h> | |
/* Function declarations */ | |
unsigned int tokenize(const char* text, char delim, char*** output); | |
void reclaim2D(char ***store, unsigned int itemCount); | |
int main(void) { | |
/* Tokenizer test */ | |
char* string1 = "ABC,DE,F"; | |
char* string2 = "AB,C,DEF,G,"; | |
char* string3 = ","; | |
char* string4 = ""; | |
char* string5 = ",,A,,"; | |
char* string6 = ",ABC,"; | |
unsigned int i,c; | |
char** tokens = NULL; | |
printf("string1: '%s'\n", string1); | |
c=tokenize(string1,',',&tokens); | |
for(i=0;i<c;i++) { | |
printf("\ttoken %d: '%s'\n",i+1,tokens[i]); | |
} | |
reclaim2D(&tokens, c); | |
printf("string2: '%s'\n", string2); | |
c=tokenize(string2,',',&tokens); | |
for(i=0;i<c;i++) { | |
printf("\ttoken %d: '%s'\n",i+1,tokens[i]); | |
} | |
reclaim2D(&tokens, c); | |
printf("string3: '%s'\n", string3); | |
c=tokenize(string3,',',&tokens); | |
for(i=0;i<c;i++) { | |
printf("\ttoken %d: '%s'\n",i+1,tokens[i]); | |
} | |
reclaim2D(&tokens, c); | |
printf("string4: '%s'\n", string4); | |
c=tokenize(string4,',',&tokens); | |
for(i=0;i<c;i++) { | |
printf("\ttoken %d: '%s'\n",i+1,tokens[i]); | |
} | |
reclaim2D(&tokens, c); | |
printf("string5: '%s'\n", string5); | |
c=tokenize(string5,',',&tokens); | |
for(i=0;i<c;i++) { | |
printf("\ttoken %d: '%s'\n",i+1,tokens[i]); | |
} | |
reclaim2D(&tokens, c); | |
printf("string6: '%s'\n", string6); | |
c=tokenize(string6,',',&tokens); | |
for(i=0;i<c;i++) { | |
printf("\ttoken %d: '%s'\n",i+1,tokens[i]); | |
} | |
reclaim2D(&tokens, c); | |
return 0; | |
} | |
unsigned int tokenize(const char* text, char delim, char*** output) { | |
if((*output) != NULL) return -1; /* I will allocate my own storage */ | |
int ndelims,i,j,ntokens,starttok,endtok; | |
// First pass, count the number of delims | |
i=0; | |
ndelims=0; | |
while(text[i] != '\0') { | |
if(text[i] == delim) ndelims++; | |
i++; | |
} | |
// The number of delims is one less than the number of tokens | |
ntokens=ndelims+1; | |
// Now, allocate an array of (char*)'s equal to the number of tokens | |
(*output) = (char**) malloc(sizeof(char*)*ntokens); | |
// Now, loop through and extract each token | |
starttok=0; | |
endtok=0; | |
i=0; | |
j=0; | |
while(text[i] != '\0') { | |
// Reached the end of a token? | |
if(text[i] == delim) { | |
endtok = i; | |
// Allocate a char array to hold the token | |
(*output)[j] = (char*) malloc(sizeof(char)*(endtok-starttok+1)); | |
// If the token is not empty, copy over the token | |
if(endtok-starttok > 0) | |
memcpy((*output)[j],&text[starttok],(endtok-starttok)); | |
// Null-terminate the string | |
(*output)[j][(endtok-starttok)] = '\0'; | |
// The next token starts at i+1 | |
starttok = i+1; | |
j++; | |
} | |
i++; | |
} | |
// Deal with the last token | |
endtok = i; | |
// Allocate a char array to hold the token | |
(*output)[j] = (char*) malloc(sizeof(char)*(endtok-starttok+1)); | |
// If the token is not empty, copy over the token | |
if(endtok-starttok > 0) | |
memcpy((*output)[j],&text[starttok],(endtok-starttok)); | |
// Null-terminate the string | |
(*output)[j][(endtok-starttok)] = '\0'; | |
return ntokens; | |
} | |
void reclaim2D(char ***store, unsigned int itemCount) | |
{ | |
int x; | |
for (x = 0; x < itemCount; ++x) | |
{ | |
if((*store)[x] != NULL) free((*store)[x]); | |
(*store)[x] = NULL; | |
} | |
if((*store) != NULL) free((*store)); | |
(*store) = NULL; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment