Created
October 22, 2016 18:13
-
-
Save drpventura/0d2ec68e4412b7b25b34f2a5ffde22ca to your computer and use it in GitHub Desktop.
Splitting a string based on a regular expression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // based on code taken from | |
| // http://en.cppreference.com/w/cpp/regex/regex_iterator#Example, | |
| // last access 10/22/2016 | |
| #include <regex> | |
| #include <iostream> | |
| using namespace std; | |
| int main() { | |
| // some words to pick apart | |
| vector<string> words = {"Harry", "hairy", "hare", "the"}; | |
| // a case-insensitive regular expression that matches anything that is NOT a vowel | |
| regex words_regex("[^aeiouy]+", regex_constants::ECMAScript | regex_constants::icase ); | |
| // for every word | |
| for (auto word : words) { | |
| // separate it based on the regex | |
| auto words_begin = | |
| sregex_iterator(word.begin(), word.end(), words_regex); | |
| auto words_end = sregex_iterator(); | |
| // print the word | |
| cout << word << ", "; | |
| // compute how many parts (tokens) the word was broken into | |
| int num_tokens = std::distance(words_begin, words_end); | |
| cout << num_tokens << " token"; | |
| // pluralization | |
| cout << (num_tokens != 1 ? "s: " : ": "); | |
| // print out the parts (tokens) | |
| for (auto i = words_begin; i != words_end; ++i) { | |
| smatch match = *i; | |
| string match_str = match.str(); | |
| cout << match_str << ' '; | |
| } // end | |
| cout << endl; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment