Created
May 25, 2014 12:41
-
-
Save ryanjdew/4f70b7a9dabca36dba85 to your computer and use it in GitHub Desktop.
C++ map MarkLogic UDF function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void BucketeerRegex:: | |
map(TupleIterator& values, Reporter& reporter) | |
{ | |
int reti = 0; | |
for(; !values.done(); values.next()) { | |
if(!values.null(0)) { | |
String cur; | |
values.value(0,cur); | |
/* Execute regular expression */ | |
regmatch_t pmatch[1]; | |
reti = regexec(®ex_compiled, cur.get(), 1, pmatch, 0); | |
if( !reti ){ | |
// add a place for the trailing NULL | |
int match_length = (pmatch[0].rm_eo - (pmatch[0].rm_so)) + 1; | |
char match_str[match_length]; | |
/* If matches then create a copy of the string */ | |
size_t str_length = strlen(cur.get()); | |
char cp_str[str_length]; | |
strcpy(cp_str,cur.get()); | |
// don't set the last value since it needs to be NULL | |
for (int i = 0; i < (match_length - 1); i++) { | |
int str_pos = pmatch[0].rm_so + i; | |
match_str[i] = cp_str[str_pos]; | |
} | |
// add trailing NULL to match | |
match_str[match_length - 1] = '\0'; | |
String* match; | |
// make everything lowercase if case_insensitive option is passed | |
if (case_insensitive) { | |
std::string tmp_str = std::string(match_str); | |
std::transform(tmp_str.begin(), tmp_str.end(),tmp_str.begin(), ::tolower); | |
match = new String(tmp_str.c_str(),cur.collation()); | |
} else { | |
match = new String(match_str,cur.collation()); | |
} | |
/* Store the pointer to the marklogic::String for output later */ | |
buckets.insert(std::pair<String, String>(*(match),*(new String(cp_str,cur.collation())))); | |
} else if (capture_overflow) { | |
size_t str_length = strlen(cur.get()); | |
char cp_str[str_length]; | |
strcpy(cp_str,cur.get()); | |
buckets.insert(std::pair<String, String>(*(new String(overflow_bucket.c_str(),cur.collation())),*(new String(cp_str,cur.collation())))); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment