Last active
February 24, 2023 22:00
-
-
Save Agnishom/5877230ce7d0a9632e0049edd8b0deab to your computer and use it in GitHub Desktop.
PCRE Execution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <pcre.h> | |
#include <chrono> | |
using namespace std; | |
int main() { | |
string pattern; | |
string input; | |
getline(cin, pattern); | |
// Compile the regex pattern | |
const char *error; | |
int error_offset; | |
pcre *re = pcre_compile(pattern.c_str(), 0, &error, &error_offset, nullptr); | |
if (re == nullptr) { | |
cerr << "Error: Failed to compile regex pattern: " << error << endl; | |
return 1; | |
} | |
// Read the input string from stdin | |
getline(cin, input); | |
// Measure the time required for the regex match | |
auto start_time = chrono::high_resolution_clock::now(); | |
// Execute the regex match against the input string | |
const int ovector_size = 30; | |
int ovector[ovector_size]; | |
int rc = pcre_exec(re, nullptr, input.c_str(), input.length(), 0, 0, ovector, ovector_size); | |
if (rc < 0) { | |
cerr << "Error: Failed to match input string against regex pattern" << endl; | |
return 1; | |
} | |
auto end_time = chrono::high_resolution_clock::now(); | |
auto duration = chrono::duration_cast<chrono::microseconds>(end_time - start_time); | |
// Print the matching substring(s) | |
for (int i = 0; i < rc; i++) { | |
int start = ovector[2 * i]; | |
int end = ovector[2 * i + 1]; | |
cout << "Match " << i << ": " << input.substr(start, end - start) << endl; | |
} | |
cout << "Time taken: " << duration.count() << " microseconds" << endl; | |
// Clean up resources | |
pcre_free(re); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// g++ -o pcre2 pcre2.cpp -lpcre2-8 | |
#define PCRE2_CODE_UNIT_WIDTH 8 | |
#include <iostream> | |
#include <fstream> | |
#include <chrono> | |
#include <pcre2.h> | |
// Define the code unit width to use with PCRE2 | |
int main(int argc, char **argv) { | |
if (argc != 3) { | |
std::cerr << "Usage: " << argv[0] << " pattern.in text.in" << std::endl; | |
return 1; | |
} | |
const char *pattern_filename = argv[1]; | |
const char *text_filename = argv[2]; | |
// Read the regular expression pattern from the file | |
std::ifstream pattern_file(pattern_filename); | |
if (!pattern_file.is_open()) { | |
std::cerr << "Failed to open pattern file: " << pattern_filename << std::endl; | |
return 2; | |
} | |
std::string pattern_str((std::istreambuf_iterator<char>(pattern_file)), std::istreambuf_iterator<char>()); | |
pattern_file.close(); | |
// Read the input text from the file | |
std::ifstream text_file(text_filename); | |
if (!text_file.is_open()) { | |
std::cerr << "Failed to open text file: " << text_filename << std::endl; | |
return 3; | |
} | |
std::string text_str((std::istreambuf_iterator<char>(text_file)), std::istreambuf_iterator<char>()); | |
text_file.close(); | |
// Compile the regular expression pattern | |
int error_code; | |
PCRE2_SIZE error_offset; | |
pcre2_code *pattern = pcre2_compile( | |
(PCRE2_SPTR)pattern_str.c_str(), // pattern string | |
PCRE2_ZERO_TERMINATED, // length of pattern string | |
0, // PCRE2 options | |
&error_code, // error code | |
&error_offset, // error offset | |
0 // compile context | |
); | |
if (pattern == NULL) { | |
PCRE2_UCHAR buffer[256]; | |
pcre2_get_error_message(error_code, buffer, sizeof(buffer)); | |
std::cerr << "Failed to compile pattern: " << buffer << std::endl; | |
return 4; | |
} | |
// Create a match data block | |
pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(pattern, NULL); | |
// Match the input text against the pattern | |
pcre2_match_context *match_context = pcre2_match_context_create(NULL); | |
uint32_t match_limit = 0xffffffff; | |
pcre2_set_match_limit(match_context, match_limit); | |
auto start_time = std::chrono::steady_clock::now(); | |
int result = pcre2_match( | |
pattern, // compiled pattern | |
(PCRE2_SPTR)text_str.c_str(), // input text | |
PCRE2_ZERO_TERMINATED, // length of input text | |
0, // start offset | |
0, // PCRE2 options | |
match_data, // match data block | |
match_context // match context | |
); | |
auto end_time = std::chrono::steady_clock::now(); | |
// Report the time spent on matching | |
auto elapsed_time = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count(); | |
std::cout << "Time taken for matching: " << elapsed_time << " microseconds" << std::endl; | |
// Print the result of the match | |
if (result > 0) { | |
std::cout << "Match found!" << std::endl; | |
} else if (result == 0) { | |
std::cout << "No match found." << std::endl; | |
} else { | |
std::cerr << "Matching error: " << result << std::endl; | |
return 3; | |
} | |
// Clean up | |
pcre2_match_data_free(match_data); | |
pcre2_code_free(pattern); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment