Skip to content

Instantly share code, notes, and snippets.

@ryanjdew
Created May 25, 2014 12:41
Show Gist options
  • Save ryanjdew/4f70b7a9dabca36dba85 to your computer and use it in GitHub Desktop.
Save ryanjdew/4f70b7a9dabca36dba85 to your computer and use it in GitHub Desktop.
C++ map MarkLogic UDF function
void BucketeerRegex::
map(TupleIterator& values, Reporter& reporter)
{
int reti = 0;
for(; !values.done(); values.next()) {
if(!values.null(0)) {
String cur;
values.value(0,cur);
/* Execute regular expression */
regmatch_t pmatch[1];
reti = regexec(&regex_compiled, cur.get(), 1, pmatch, 0);
if( !reti ){
// add a place for the trailing NULL
int match_length = (pmatch[0].rm_eo - (pmatch[0].rm_so)) + 1;
char match_str[match_length];
/* If matches then create a copy of the string */
size_t str_length = strlen(cur.get());
char cp_str[str_length];
strcpy(cp_str,cur.get());
// don't set the last value since it needs to be NULL
for (int i = 0; i < (match_length - 1); i++) {
int str_pos = pmatch[0].rm_so + i;
match_str[i] = cp_str[str_pos];
}
// add trailing NULL to match
match_str[match_length - 1] = '\0';
String* match;
// make everything lowercase if case_insensitive option is passed
if (case_insensitive) {
std::string tmp_str = std::string(match_str);
std::transform(tmp_str.begin(), tmp_str.end(),tmp_str.begin(), ::tolower);
match = new String(tmp_str.c_str(),cur.collation());
} else {
match = new String(match_str,cur.collation());
}
/* Store the pointer to the marklogic::String for output later */
buckets.insert(std::pair<String, String>(*(match),*(new String(cp_str,cur.collation()))));
} else if (capture_overflow) {
size_t str_length = strlen(cur.get());
char cp_str[str_length];
strcpy(cp_str,cur.get());
buckets.insert(std::pair<String, String>(*(new String(overflow_bucket.c_str(),cur.collation())),*(new String(cp_str,cur.collation()))));
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment