Last active
August 1, 2019 07:29
-
-
Save artemklevtsov/0b15d5be768dc2d153076b692da6adac to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <sstream> | |
#include <string> | |
#include <unordered_map> | |
#include <Rcpp.h> | |
using map = std::unordered_map<std::string, std::string>; | |
static inline void trim(std::string& s) { | |
s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end()); | |
s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace)); | |
} | |
static inline void tolower(std::string& s) { | |
std::transform(s.begin(), s.end(), s.begin(), ::tolower); | |
} | |
Rcpp::Environment map_to_env(const map& x) { | |
Rcpp::Environment env = Rcpp::new_env(); | |
for (const auto& pair: x) { | |
env.assign(pair.first, pair.second); | |
} | |
return env; | |
} | |
// [[Rcpp::export]] | |
Rcpp::RObject parse_headers_cpp(std::string headers) { | |
map res; | |
std::istringstream stream(headers); | |
std::string buffer; | |
std::string::size_type index; | |
while (std::getline(stream, buffer) && buffer != "\r") { | |
index = buffer.find(':', 0); | |
if(index != std::string::npos) { | |
std::string key = buffer.substr(0, index); | |
std::string val = buffer.substr(index + 1); | |
trim(key); | |
tolower(key); | |
trim(val); | |
if (res.find(key) != res.end()) { | |
if (key == "cookie") { | |
val = val + "; " + res[key]; | |
} else { | |
val = val + ", " + res[key]; | |
} | |
} | |
res.insert(std::make_pair(key, val)); | |
} | |
} | |
return Rcpp::wrap(res); | |
} | |
// [[Rcpp::export]] | |
Rcpp::Environment parse_headers_cpp2(std::string headers) { | |
Rcpp::Environment res = Rcpp::new_env(); | |
std::istringstream stream(headers); | |
std::string buffer; | |
std::string::size_type index; | |
while (std::getline(stream, buffer) && buffer != "\r") { | |
index = buffer.find(':', 0); | |
if(index != std::string::npos) { | |
std::string key = buffer.substr(0, index); | |
std::string val = buffer.substr(index + 1); | |
trim(key); | |
tolower(key); | |
trim(val); | |
if (res.exists(key)) { | |
std::string tmp = Rcpp::as<std::string>(res[key]); | |
if (key == "cookie") { | |
val = val + "; " + tmp; | |
} else { | |
val = val + ", " + tmp; | |
} | |
} | |
res.assign(key, val); | |
} | |
} | |
return res; | |
} | |
// You can include R code blocks in C++ files processed with sourceCpp | |
// (useful for testing and development). The R code will be automatically | |
// run after the compilation. | |
// | |
/*** R | |
parse_headers_r = function(headers) { | |
if (is.raw(headers)) { | |
headers = rawToChar(headers) | |
} | |
res = new.env(parent = emptyenv()) | |
if (is.character(headers) && length(headers) > 0L) { | |
## parse the headers into key/value pairs, collapsing multi-line values | |
lines = strsplit(gsub("[\r\n]+[ \t]+", " ", headers), "[\r\n]+")[[1]] | |
keys = tolower(gsub(":.*", "", lines)) | |
values = gsub("^[^:]*:[[:space:]]*", "", lines) | |
idx = grep("^[^:]+:", lines) | |
keys = keys[idx] | |
values = values[idx] | |
for (i in seq_along(keys)) { | |
key = keys[[i]] | |
value = values[[i]] | |
# no such key yet | |
if (is.null(res[[key]])) { | |
res[[key]] = value | |
} else { | |
# key already exists and we need combine values with existing values | |
if (key == "cookie") { | |
# cookies processed in a special way - combined with "; " opposed to ", " for the rest of the keys | |
res[[key]] = paste(res[[key]], value, sep = "; ") | |
} else { | |
res[[key]] = paste(res[[key]], value, sep = ", ") | |
} | |
} | |
} | |
} | |
return(res) | |
} | |
s = "Host: stackoverflow.com\r\n | |
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0\r\n | |
Accept: text/html,application/xhtml+xml,application/xml\r\n | |
Accept-Language: ru-RU,ru\r\n | |
Accept-Encoding: gzip, deflate, br\r\n | |
Referer: https://www.google.ru/\r\n | |
DNT: 1\r\n | |
Connection: keep-alive\r\n | |
Cookie: prov=f1e12498-1231-c8f0-8f53-97a8a6b17754; notice-ctt=4%3B1560153827826; mfnes=6e32CJ0CEAMaCwishdGGwpPLNxAFIJsCKAEyCDYyZGY0OTJh; acct=t=cmBi7gQEMWgxdi6kOiPwqAVNqmbEPdVj&s=E9Ly%2bCeEeAGmK9wDx2Zaseg6tiyi2hd8; sgt=id=3f4b96f5-b5ef-4ab1-96af-5ebce2950bcc\r\n | |
Upgrade-Insecure-Requests: 1\r\n | |
Cache-Control: max-age=0\r\n | |
TE: Trailers\r\n\r\n" | |
bench::mark( | |
as.list(parse_headers_r(s)), | |
as.list(parse_headers_cpp(s)), | |
as.list(parse_headers_cpp2(s)), | |
min_iterations = 1000, | |
check = FALSE | |
) | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.