Last active
May 17, 2018 17:31
-
-
Save larytet/247f09c0edae49fd2f2cea1fdfdc68d0 to your computer and use it in GitHub Desktop.
C++ Boost regex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <iostream> | |
#include <boost/regex.hpp> | |
#include <boost/spirit/include/qi.hpp> | |
#include <boost/spirit/repository/include/qi_seek.hpp> | |
volatile int groupindex = 0; | |
enum BrowserType | |
{ | |
FIREFOX = 0 , | |
CHROME, | |
SAFARI, | |
OPERA, | |
IE, | |
EDGE, | |
OTHER, | |
}; | |
struct browser_type_sym : boost::spirit::qi::symbols<char, BrowserType> { | |
browser_type_sym() { | |
this->add | |
("Firefox", FIREFOX) | |
("Chrome", CHROME) | |
("Safari", SAFARI) | |
("Opera", OPERA) | |
("MSIE", IE) | |
("Edge", EDGE) | |
("Trident", OTHER); | |
} | |
} static const browser_type; | |
template <typename Types> | |
bool extract_browser_ids(std::string const& userAgent, Types& into) { | |
using boost::spirit::repository::qi::seek; | |
return parse(userAgent.begin(), userAgent.end(), *seek [ browser_type ], into); | |
} | |
const boost::regex BROWSERS_REGEX("(Firefox)|(Chrome)|(Safari)|(Opera)|(MSIE)|(Edge)|(Trident)"); | |
int test_vector() { | |
std::string const input("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 Chrome/62.0.3202.94"); | |
std::vector<BrowserType> types; | |
extract_browser_ids(input, types); | |
for(auto type : types) | |
{ | |
groupindex = type; | |
} | |
return 0; | |
} | |
int test_regex() | |
{ | |
// I expect two matches here CHROME and SAFARI | |
std::string input("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 Chrome/62.0.3202.94"); | |
boost::sregex_iterator res(input.begin(), input.end(), BROWSERS_REGEX); | |
boost::sregex_iterator end; | |
for(; res != end; ++res) | |
{ | |
// elude copy here ? | |
boost::smatch what = *res; | |
// Can I know the index of the matching group w/o 'for'? | |
for (int type = 0;type < OTHER;type++) | |
{ | |
if (what[type+1].matched) | |
{ | |
groupindex = type; | |
} | |
} | |
} | |
return 0; | |
} | |
void test_non_boost() | |
{ | |
const char *input = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 Chrome/62.0.3202.94"; | |
const char *browsers[7] = {"Firefox", "Chrome", "Safari", "Opera", "MSIE", "Edge", "Trident"}; | |
int i = 0; | |
for (const char **browser = &browsers[0];browser <= &browsers[6];browser++, i++) | |
{ | |
if (strstr(input, *browser)) | |
{ | |
groupindex = i; | |
} | |
} | |
} | |
void test_isalpha() | |
{ | |
const char *input = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 Chrome/62.0.3202.94"; | |
const char *browsers[OTHER+1] = {"Firefox", "Chrome", "Safari", "Opera", "MSIE", "Edge", "Trident"}; | |
const size_t browsers_len[OTHER+1] = {7, 5, 6, 5, 4, 4, 6}; | |
bool isFirstLetter = true; | |
size_t _minSize = 4; | |
size_t length = 0; | |
for (const char *i = &input[0] ; *i != 0; ++i) | |
{ | |
//If alpha its may be part of token | |
if(isalpha(*i)) | |
{ | |
//Mark beginning of token | |
if(isFirstLetter) | |
{ | |
isFirstLetter = false; | |
//Mark position in string | |
} | |
++length; | |
} | |
else | |
{ | |
if(length >= _minSize) | |
{ | |
//compare | |
for(size_t j = 0; j < (OTHER+1) ; ++j) | |
{ | |
if(browsers_len[j] == length && strncmp (browsers[j], i,length) == 0) | |
{ | |
groupindex = j; | |
} | |
} | |
} | |
length = 0; | |
isFirstLetter = true; | |
} | |
} | |
} | |
int main() | |
{ | |
for (int i =0;i < 500*1000;i++) | |
{ | |
// test_vector(); | |
} | |
for (int i =0;i < 500*1000;i++) | |
{ | |
// test_regex(); | |
} | |
for (int i =0;i < 500*1000;i++) | |
{ | |
// test_non_boost(); | |
} | |
for (int i =0;i < 500*1000;i++) | |
{ | |
test_isalpha(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment