Created
November 11, 2011 20:20
-
-
Save Solution/1359119 to your computer and use it in GitHub Desktop.
My C version of preg_match()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* File: main.c | |
* Author: solution | |
* | |
* Created on 9. listopad 2011, 22:08 | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include "Regpx.h" | |
/* | |
* | |
*/ | |
int main(int argc, char** argv) | |
{ | |
char **results; | |
int i = 0; | |
results = preg_match("(\\w+):(\\w+)", "Word:abbrevation:discous:dictionary:list:hashmap"); | |
while(results[i] != NULL) | |
{ | |
printf("%s\n", results[i]); | |
i++; | |
} | |
printf("\t --- \t\n"); | |
results = NULL; | |
struct resulter **resultset = preg_match_all("(\\w+):(\\w+)", "Word:abbrevation:discous:dictionary:list:hashmap"); | |
i = 0; | |
printf("Počet výsledků: %d\n", resultset[0]->info->countOfMatches); | |
printf("Velikost výsledků: %d\n", sizeof(resultset)); | |
while(resultset[i]) | |
{ | |
printf("%s\n", resultset[i]->match); | |
printf("%s\n", resultset[i]->info->pattern); | |
i++; | |
} | |
return (EXIT_SUCCESS); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* File: Regxp.h | |
* Author: solution | |
* | |
* Created on 11. listopad 2011, 16:00 | |
*/ | |
#include <stdio.h> | |
#include <string.h> | |
#include <pcre.h> | |
#define COUNT(a, b) (sizeof(a)/sizeof(b)) | |
#ifndef REGXP_H | |
#define REGXP_H | |
#define OVECCOUNT 30 | |
struct resulter{ | |
char *subStringName; | |
char *match; | |
struct resulter *subMatch; | |
struct pcreInfo *info; | |
}; | |
struct pcreInfo{ | |
char *pattern; | |
int patternGroups; | |
int countOfMatches; | |
}; | |
char *preg_error(int rc); | |
pcre *preg_compile(char* regxp); | |
char **preg_match(char *regxp, char *data); | |
struct resulter **preg_match_all(char *regxp, char *data); | |
#ifdef __cplusplus | |
extern "C" { | |
#endif | |
#ifdef __cplusplus | |
} | |
#endif | |
#endif /* REGXP_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "Regpx.h" | |
pcre *preg_compile(char* regxp) | |
{ | |
pcre *re; // Pcre pointer | |
const char *error; // Const char error pointer | |
int errorOffset; | |
re = pcre_compile( | |
regxp, /* regexp */ | |
PCRE_MULTILINE, /* default options */ | |
&error, /* for error message */ | |
&errorOffset, /* for error offset */ | |
NULL /* use default character tab*/ | |
); | |
if(!re) | |
{ | |
printf("PCRE compilation failed at expression offset %d: %s\n", | |
errorOffset, | |
error); | |
exit(1); | |
} | |
return re; | |
} | |
char *preg_error(int rc) | |
{ | |
char *errorMsg = (char*) malloc(128); | |
switch(rc) | |
{ | |
case PCRE_ERROR_NOMATCH : | |
errorMsg = "No match found in string \n"; | |
break; | |
case PCRE_ERROR_MATCHLIMIT : | |
errorMsg = "Match is out of possible matches \n"; | |
break; | |
case PCRE_ERROR_NOMEMORY : | |
errorMsg = "Out of memory \n"; | |
break; | |
default: | |
sprintf(errorMsg, "Match error %d \n", rc); | |
break; | |
} | |
return errorMsg; | |
} | |
char **preg_match(char *regxp, char *data) | |
{ | |
pcre *re = preg_compile(regxp); // Pcre pointer | |
int rc, ovector[OVECCOUNT], groupsCount; //state integers | |
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &groupsCount); | |
rc = pcre_exec( | |
re, /* The compiled pattern */ | |
NULL, /* No extra data */ | |
data, /* the subject string */ | |
strlen(data), /* Length of subject */ | |
0, /* Start at offset 0*/ | |
0, /* Default options */ | |
ovector, /* Output vector for substring info */ | |
OVECCOUNT); /* Number of elements in the output */ | |
if(rc < 0) | |
{ | |
printf("%s", preg_error(rc)); | |
exit(1); | |
} | |
char **results = (char**) malloc(OVECCOUNT * sizeof(char)); | |
if(rc < 0) | |
{ | |
printf("Match did not catch all the groups \n"); | |
} | |
else{ | |
char *tmp; | |
int i = 3, it = 0, tmpInt = 0; | |
for(it; it < groupsCount; it++) | |
{ | |
tmp = data + ovector[i-1]; | |
tmpInt = ovector[i] - ovector[i-1]; | |
results[it] = (char*) malloc(tmpInt + 1); | |
sprintf(results[it], "%.*s\0", tmpInt, tmp); | |
i += 2; | |
} | |
} | |
return results; | |
} | |
struct resulter **preg_match_all(char *regxp, char *data) | |
{ | |
pcre *re = preg_compile(regxp); // Pcre pointer | |
int rc, ovector[OVECCOUNT], offsetStart = 0, groupsCount; //state integers | |
static struct pcreInfo info; | |
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &groupsCount); | |
info.pattern = regxp; | |
info.patternGroups = groupsCount; | |
// char **results = (char**) malloc(OVECCOUNT * sizeof(char)); | |
int it = 0, ite = 0, i, tmpInt; // Iterators and tmpInteger for size | |
struct resulter **results; | |
results = (struct resulter**) malloc(OVECCOUNT * sizeof(struct resulter*)); | |
while((rc = pcre_exec( | |
re, /* The compiled pattern */ | |
NULL, /* No extra data */ | |
data, /* the subject string */ | |
strlen(data), /* Length of subject */ | |
offsetStart, /* Start at offset 0*/ | |
0, /* Default options */ | |
ovector, /* Output vector for substring info */ | |
OVECCOUNT)) > 0) /* Number of elements in the output */ | |
{ | |
char *tmp; | |
tmpInt = 0, i = 3; | |
for(it = 0; it < groupsCount; it++) | |
{ | |
tmp = data + ovector[i-1]; | |
tmpInt = ovector[i] - ovector[i-1]; | |
if(tmpInt > 0) | |
{ | |
results[ite] = (struct resulter*) malloc(sizeof(struct resulter)); | |
results[ite]->match = (char*) malloc(tmpInt + 1); | |
sprintf(results[ite]->match, "%.*s\0", tmpInt, tmp); | |
results[ite]->info = &info; | |
} | |
i += 2; ++ite; | |
} | |
offsetStart = ovector[i-2]; | |
(*ovector) = 0; | |
} | |
info.countOfMatches = ite; | |
return results; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you for your example! But this code does not work:
struct resulter *resultset = preg_match_all("/M[^zZ][zZ]/", "M806 86l-74 74c-57 -50 -131 -80 -212 -80c-177 0 -320 143 -320 320s143 320 320 320s320 -143 320 -320c0 -82 -30 -156 -80 -212l74 -74c8 -8 8 -20 0 -28s-20 -8 -28 0zM594 298c-8 8 -8 20 0 28s20 8 28 0l109 -110c43 49 69 113 69 184c0 155 -125 280 -280 280 s-280 -125 -280 -280s125 -280 280 -280c70 0 134 26 183 68c-108 109 -109 109 -109 110z");
=(