Last active
September 14, 2023 18:18
-
-
Save Agnishom/4ce98a7e7165fe9cdd06965a16962a94 to your computer and use it in GitHub Desktop.
Regex Execution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// g++ -o boostregex-test boostregex-test.cpp -lboost_regex | |
#include <fstream> | |
#include <vector> | |
#include <string> | |
#include <iostream> | |
#include <sstream> | |
#include <iomanip> | |
#include <chrono> | |
#include <boost/regex.hpp> | |
std::vector<std::string> readLines(const std::string& fileName) { | |
std::vector<std::string> output; | |
std::ifstream fileStream(fileName); | |
if (!fileStream) { | |
std::cerr << "Error in read_lines: Unable to open file " << fileName << std::endl; | |
return output; | |
} | |
std::string line; | |
while (std::getline(fileStream, line)) { | |
output.push_back(line); | |
} | |
return output; | |
} | |
class MatchRecord { | |
public: | |
int regexId; | |
double durMsec; // in msec | |
int output; // 0 if no match, 1 if there is a match | |
MatchRecord(int regexId, double durMsec, int output) | |
: regexId(regexId), durMsec(durMsec), output(output) {} | |
}; | |
int main(int argc, char **argv){ | |
if (argc < 2) { | |
std::cerr << "Not enough arguments." << std::endl; | |
return 1; | |
} | |
std::vector<char> buffer; | |
char ch; | |
while (std::cin.get(ch)) { | |
buffer.push_back(ch); | |
} | |
if (!std::cin.eof() && std::cin.fail()) { | |
std::cerr << "Error reading input" << std::endl; | |
return 1; | |
} | |
std::string inputText(buffer.begin(), buffer.end()); | |
int textLen = inputText.length(); | |
std::cout << "input loaded, length = " << std::setprecision(2) << textLen << " bytes" << std::endl; | |
std::string patternFile = argv[1]; | |
std::vector<std::string> patterns = readLines(patternFile); | |
std::vector<MatchRecord> results; | |
for (int i = 0; i < patterns.size(); i++){ | |
int regexId = i; | |
std::string pattern_str = patterns[i]; | |
std::cout << "[" << regexId << "] " << patternFile << std::endl; | |
std::cout << "regex: " << pattern_str << std::endl; | |
// Create a regular expression object from the pattern string | |
boost::regex pattern; | |
boost::smatch match; | |
try { | |
pattern = boost::regex(pattern_str); | |
} catch (boost::regex_error& e) { | |
std::cerr << "Error compiling regex: " << e.what() << std::endl; | |
std::cout << std::endl; | |
continue; | |
} | |
// Measure the time it takes to perform the match | |
auto start_time = std::chrono::steady_clock::now(); | |
bool match_found = false; | |
try { | |
match_found = boost::regex_search(inputText, match, pattern); | |
} catch (boost::regex_error& e) { | |
std::cerr << "Error matching regex: " << e.what() << std::endl; | |
std::cout << std::endl; | |
continue; | |
} | |
auto end_time = std::chrono::steady_clock::now(); | |
// Print the results | |
if (match_found) { | |
std::cout << "Match found at [" << match.position() << "," << match.position() + match.length() << "]" << std::endl; | |
} else { | |
std::cout << "No match found" << std::endl; | |
} | |
std::cout << "text length = " << textLen << " bytes" << std::endl; | |
auto durNano = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count(); | |
std::cout << "duration (msec) = " << durNano / 1000000.0 << std::endl; | |
double throughput = (double) inputText.length() / durNano * 1000000000.0; | |
std::cout << "throughput (bytes/second) = " << throughput << std::endl; | |
std::cout << std::endl; | |
// add to the results | |
double durMsec = ((double) durNano) / 1000000.0; | |
int output = match_found ? 1 : 0; | |
MatchRecord outputRecord(regexId, durMsec, output); | |
results.push_back(outputRecord); | |
} | |
std::cout << "HEADER: id,text_len,duration,output" << std::endl; | |
for (MatchRecord r : results) { | |
std::cout << r.regexId << "," << textLen << "," << r.durMsec << "," << r.output << std::endl; | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// g++ -o stdregex stdregex-test.cpp | |
#include <fstream> | |
#include <vector> | |
#include <string> | |
#include <iostream> | |
#include <sstream> | |
#include <iomanip> | |
#include <regex> | |
#include <chrono> | |
std::vector<std::string> readLines(const std::string& fileName) { | |
std::vector<std::string> output; | |
std::ifstream fileStream(fileName); | |
if (!fileStream) { | |
std::cerr << "Error in read_lines: Unable to open file " << fileName << std::endl; | |
return output; | |
} | |
std::string line; | |
while (std::getline(fileStream, line)) { | |
output.push_back(line); | |
} | |
return output; | |
} | |
class MatchRecord { | |
public: | |
int regexId; | |
double durMsec; // in msec | |
int output; // 0 if no match, 1 if there is a match | |
MatchRecord(int regexId, double durMsec, int output) | |
: regexId(regexId), durMsec(durMsec), output(output) {} | |
}; | |
int main(int argc, char **argv){ | |
if (argc < 2) { | |
std::cerr << "Not enough arguments." << std::endl; | |
return 1; | |
} | |
std::vector<char> buffer; | |
char ch; | |
while (std::cin.get(ch)) { | |
buffer.push_back(ch); | |
} | |
if (!std::cin.eof() && std::cin.fail()) { | |
std::cerr << "Error reading input" << std::endl; | |
return 1; | |
} | |
std::string inputText(buffer.begin(), buffer.end()); | |
int textLen = inputText.length(); | |
std::cout << "input loaded, length = " << std::setprecision(2) << textLen << " bytes" << std::endl; | |
std::string patternFile = argv[1]; | |
std::vector<std::string> patterns = readLines(patternFile); | |
std::vector<MatchRecord> results; | |
for (int i = 0; i < patterns.size(); i++){ | |
int regexId = i; | |
std::string pattern_str = patterns[i]; | |
std::cout << "[" << regexId << "] " << patternFile << std::endl; | |
std::cout << "regex: " << pattern_str << std::endl; | |
// Create a regular expression object from the pattern string | |
std::regex pattern; | |
std::smatch match; | |
try { | |
pattern = std::regex(pattern_str, std::regex::ECMAScript | std::regex::optimize); | |
} catch (std::regex_error& e) { | |
std::cerr << "Error compiling regex: " << e.what() << std::endl; | |
std::cout << std::endl; | |
continue; | |
} | |
// Measure the time it takes to perform the match | |
auto start_time = std::chrono::steady_clock::now(); | |
bool match_found = false; | |
try { | |
match_found = std::regex_search(inputText, match, pattern); | |
} catch (std::regex_error& e) { | |
std::cerr << "Error matching regex: " << e.what() << std::endl; | |
std::cout << std::endl; | |
continue; | |
} | |
auto end_time = std::chrono::steady_clock::now(); | |
// Print the results | |
if (match_found) { | |
std::cout << "Match found at [" << match.position() << "," << match.position() + match.length() << "]" << std::endl; | |
} else { | |
std::cout << "No match found" << std::endl; | |
} | |
std::cout << "text length = " << textLen << " bytes" << std::endl; | |
auto durNano = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count(); | |
std::cout << "duration (msec) = " << durNano / 1000000.0 << std::endl; | |
double throughput = (double) inputText.length() / durNano * 1000000000.0; | |
std::cout << "throughput (bytes/second) = " << throughput << std::endl; | |
std::cout << std::endl; | |
// add to the results | |
double durMsec = ((double) durNano) / 1000000.0; | |
int output = match_found ? 1 : 0; | |
MatchRecord outputRecord(regexId, durMsec, output); | |
results.push_back(outputRecord); | |
} | |
std::cout << "HEADER: id,text_len,duration,output" << std::endl; | |
for (MatchRecord r : results) { | |
std::cout << r.regexId << "," << textLen << "," << r.durMsec << "," << r.output << std::endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment