-
-
Save varvaruc/4aecaf564db9b3a79b92f59f8b184b35 to your computer and use it in GitHub Desktop.
My C version of preg_match()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* File: main.c | |
* Author: solution | |
* | |
* Created on 9. listopad 2011, 22:08 | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include "Regpx.h" | |
/* | |
* | |
*/ | |
int main(int argc, char** argv) | |
{ | |
char **results; | |
int i = 0; | |
results = preg_match("(\\w+):(\\w+)", "Word:abbrevation:discous:dictionary:list:hashmap"); | |
while(results[i] != NULL) | |
{ | |
printf("%s\n", results[i]); | |
i++; | |
} | |
printf("\t --- \t\n"); | |
results = NULL; | |
struct resulter **resultset = preg_match_all("(\\w+):(\\w+)", "Word:abbrevation:discous:dictionary:list:hashmap"); | |
i = 0; | |
printf("Počet výsledků: %d\n", resultset[0]->info->countOfMatches); | |
printf("Velikost výsledků: %d\n", sizeof(resultset)); | |
while(resultset[i]) | |
{ | |
printf("%s\n", resultset[i]->match); | |
printf("%s\n", resultset[i]->info->pattern); | |
i++; | |
} | |
return (EXIT_SUCCESS); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* File: Regxp.h | |
* Author: solution | |
* | |
* Created on 11. listopad 2011, 16:00 | |
*/ | |
#include <stdio.h> | |
#include <string.h> | |
#include <pcre.h> | |
#define COUNT(a, b) (sizeof(a)/sizeof(b)) | |
#ifndef REGXP_H | |
#define REGXP_H | |
#define OVECCOUNT 30 | |
struct resulter{ | |
char *subStringName; | |
char *match; | |
struct resulter *subMatch; | |
struct pcreInfo *info; | |
}; | |
struct pcreInfo{ | |
char *pattern; | |
int patternGroups; | |
int countOfMatches; | |
}; | |
char *preg_error(int rc); | |
pcre *preg_compile(char* regxp); | |
char **preg_match(char *regxp, char *data); | |
struct resulter **preg_match_all(char *regxp, char *data); | |
#ifdef __cplusplus | |
extern "C" { | |
#endif | |
#ifdef __cplusplus | |
} | |
#endif | |
#endif /* REGXP_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "Regpx.h" | |
pcre *preg_compile(char* regxp) | |
{ | |
pcre *re; // Pcre pointer | |
const char *error; // Const char error pointer | |
int errorOffset; | |
re = pcre_compile( | |
regxp, /* regexp */ | |
PCRE_MULTILINE, /* default options */ | |
&error, /* for error message */ | |
&errorOffset, /* for error offset */ | |
NULL /* use default character tab*/ | |
); | |
if(!re) | |
{ | |
printf("PCRE compilation failed at expression offset %d: %s\n", | |
errorOffset, | |
error); | |
exit(1); | |
} | |
return re; | |
} | |
char *preg_error(int rc) | |
{ | |
char *errorMsg = (char*) malloc(128); | |
switch(rc) | |
{ | |
case PCRE_ERROR_NOMATCH : | |
errorMsg = "No match found in string \n"; | |
break; | |
case PCRE_ERROR_MATCHLIMIT : | |
errorMsg = "Match is out of possible matches \n"; | |
break; | |
case PCRE_ERROR_NOMEMORY : | |
errorMsg = "Out of memory \n"; | |
break; | |
default: | |
sprintf(errorMsg, "Match error %d \n", rc); | |
break; | |
} | |
return errorMsg; | |
} | |
char **preg_match(char *regxp, char *data) | |
{ | |
pcre *re = preg_compile(regxp); // Pcre pointer | |
int rc, ovector[OVECCOUNT], groupsCount; //state integers | |
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &groupsCount); | |
rc = pcre_exec( | |
re, /* The compiled pattern */ | |
NULL, /* No extra data */ | |
data, /* the subject string */ | |
strlen(data), /* Length of subject */ | |
0, /* Start at offset 0*/ | |
0, /* Default options */ | |
ovector, /* Output vector for substring info */ | |
OVECCOUNT); /* Number of elements in the output */ | |
if(rc < 0) | |
{ | |
printf("%s", preg_error(rc)); | |
exit(1); | |
} | |
char **results = (char**) malloc(OVECCOUNT * sizeof(char)); | |
if(rc < 0) | |
{ | |
printf("Match did not catch all the groups \n"); | |
} | |
else{ | |
char *tmp; | |
int i = 3, it = 0, tmpInt = 0; | |
for(it; it < groupsCount; it++) | |
{ | |
tmp = data + ovector[i-1]; | |
tmpInt = ovector[i] - ovector[i-1]; | |
results[it] = (char*) malloc(tmpInt + 1); | |
sprintf(results[it], "%.*s\0", tmpInt, tmp); | |
i += 2; | |
} | |
} | |
return results; | |
} | |
struct resulter **preg_match_all(char *regxp, char *data) | |
{ | |
pcre *re = preg_compile(regxp); // Pcre pointer | |
int rc, ovector[OVECCOUNT], offsetStart = 0, groupsCount; //state integers | |
static struct pcreInfo info; | |
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &groupsCount); | |
info.pattern = regxp; | |
info.patternGroups = groupsCount; | |
// char **results = (char**) malloc(OVECCOUNT * sizeof(char)); | |
int it = 0, ite = 0, i, tmpInt; // Iterators and tmpInteger for size | |
struct resulter **results; | |
results = (struct resulter**) malloc(OVECCOUNT * sizeof(struct resulter*)); | |
while((rc = pcre_exec( | |
re, /* The compiled pattern */ | |
NULL, /* No extra data */ | |
data, /* the subject string */ | |
strlen(data), /* Length of subject */ | |
offsetStart, /* Start at offset 0*/ | |
0, /* Default options */ | |
ovector, /* Output vector for substring info */ | |
OVECCOUNT)) > 0) /* Number of elements in the output */ | |
{ | |
char *tmp; | |
tmpInt = 0, i = 3; | |
for(it = 0; it < groupsCount; it++) | |
{ | |
tmp = data + ovector[i-1]; | |
tmpInt = ovector[i] - ovector[i-1]; | |
if(tmpInt > 0) | |
{ | |
results[ite] = (struct resulter*) malloc(sizeof(struct resulter)); | |
results[ite]->match = (char*) malloc(tmpInt + 1); | |
sprintf(results[ite]->match, "%.*s\0", tmpInt, tmp); | |
results[ite]->info = &info; | |
} | |
i += 2; ++ite; | |
} | |
offsetStart = ovector[i-2]; | |
(*ovector) = 0; | |
} | |
info.countOfMatches = ite; | |
return results; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment