Created
October 12, 2014 02:42
-
-
Save roman-yepishev/2ca95c0bb26b8727acf1 to your computer and use it in GitHub Desktop.
String tokenizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
int is_sep(const char s, const char *separators) | |
{ | |
int i; | |
for (i = 0; i < strlen(separators); i++) { | |
if (s == separators[i]) { | |
return 1; | |
} | |
} | |
return 0; | |
} | |
char **split(const char *input, const char *sep) | |
{ | |
int i = 0; | |
int capacity = 1; | |
int token_start_idx = -1; | |
int token_count = 0; | |
int token_length = 0; | |
char **result = NULL; | |
char **tmp = NULL; | |
char *token = NULL; | |
int input_length = strlen(input); | |
result = calloc(sizeof(*result), capacity); | |
for (; i < input_length + 1; i++) { | |
if (i == input_length || is_sep(input[i], sep)) { | |
if (token_start_idx > -1) { | |
/* we have seen some token, need begin/end info */ | |
token_length = i - token_start_idx; | |
if (token_count >= capacity) { | |
capacity *= 2; | |
tmp = realloc(result, sizeof(*result) * capacity); | |
if (!tmp) { | |
perror("realloc"); | |
goto err_out; | |
} | |
result = tmp; | |
} | |
/* | |
* hello_ | |
* 012345 - 5 - 0 = 5; +1 for null character | |
*/ | |
token = malloc(token_length + 1); | |
if (!token) { | |
perror("malloc"); | |
goto err_out; | |
} | |
memcpy(token, input + token_start_idx, token_length); | |
token[token_length] = '\0'; | |
result[token_count] = token; | |
token_start_idx = -1; | |
token_count++; | |
} | |
} else { | |
/* Current character is not a separator */ | |
if (token_start_idx < 0) { | |
token_start_idx = i; | |
} | |
} | |
} | |
tmp = realloc(result, sizeof(*result) * (token_count + 1)); | |
if (! tmp) { | |
perror("realloc"); | |
goto err_out; | |
} | |
result = tmp; | |
result[token_count] = NULL; | |
return result; | |
err_out: | |
return NULL; | |
} | |
void free_r(void **ptr) | |
{ | |
int i = 0; | |
for (; ptr[i] != NULL; i++) { | |
free(ptr[i]); | |
} | |
free(ptr); | |
} | |
int main(int argc, char **argv) | |
{ | |
char **result = NULL; | |
int i = 0; | |
if (argc != 3) { | |
fprintf(stderr, "Usage: %s string separator\n", argv[0]); | |
return 1; | |
} | |
result = split(argv[1], argv[2]); | |
if (!result) { | |
printf("split() failed\n"); | |
return 1; | |
} | |
while (result[i] != NULL) { | |
printf("%s\n", result[i]); | |
i++; | |
} | |
free_r((void **)result); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment