Created
January 6, 2021 12:30
-
-
Save EdThePro101/14b2ac5507fbe36be92adaafb556801c to your computer and use it in GitHub Desktop.
Tokenise a string in C by whitespaces.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
// Check if a character is a digit | |
char is_digit(char curr) { | |
return (curr >= '0' && curr <= '9'); | |
} | |
// Check if a character is a letter | |
char is_letter(char curr) { | |
return ((curr >= 'A' && curr <= 'Z') || (curr >= 'a' && curr <= 'z')); | |
} | |
// Check if a character is a whitespace | |
char is_whitespace(char curr) { | |
return (curr == ' ' || curr == '\n' || curr == '\r' || curr == '\t'); | |
} | |
// tokenise and print a string | |
void tokenise_string(char* string) { | |
char token[128] = "\0"; | |
unsigned long int token_length = 0; | |
// Loop over the string character-by-character | |
for (unsigned long int i = 0; i < strlen(string); ++i) { | |
// If the current character is a letter | |
if (is_letter(string[i])) { | |
// And if it's the first character in a token, print "IDENT: " | |
if (token_length == 0) { printf("IDENT: "); } | |
// Append the character to the token and increment the token's length | |
token[token_length++] = string[i]; | |
} | |
// If the current character is a digit | |
if (is_digit(string[i])) { | |
// And it's the first character in a token, print "NUMBER: "); | |
if (token_length == 0) { printf("NUMBER: "); } | |
// Append the character to the token and increment the token's length | |
token[token_length++] = string[i]; | |
} | |
// If the current character is a whitespace | |
if (is_whitespace(string[i])) { | |
// And the token's length is not 0 | |
if (token_length != 0) { | |
// Add a NULL character to the token to close the string | |
token[token_length++] = '\0'; | |
// Print the token | |
printf("%s\n", token); | |
} | |
// NULL-ify the token | |
token[0] = '\0'; | |
// Reset the token's length to 0 | |
token_length = 0; | |
} | |
} | |
// If the token's length is not 0, there's still a token that needs to be printed, so let's do that. | |
if (token_length != 0) { | |
// Add a NULL character to finalise the string | |
token[token_length++] = '\0'; | |
// Print the token | |
printf("%s\n", token); | |
// NULL-ify the token | |
token[0] = '\0'; | |
// Reset the token's length to 0 | |
token_length = 0; | |
} | |
} | |
int main() { | |
char* string = "5 plus 6 equals 11"; | |
printf("Splitting string:\n%s\n", string); | |
tokenise_string(string); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment