Skip to content

Instantly share code, notes, and snippets.

@artemklevtsov
Last active August 1, 2019 07:29
Show Gist options
  • Save artemklevtsov/0b15d5be768dc2d153076b692da6adac to your computer and use it in GitHub Desktop.
Save artemklevtsov/0b15d5be768dc2d153076b692da6adac to your computer and use it in GitHub Desktop.
#include <sstream>
#include <string>
#include <unordered_map>
#include <Rcpp.h>
using map = std::unordered_map<std::string, std::string>;
static inline void trim(std::string& s) {
s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end());
s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace));
}
static inline void tolower(std::string& s) {
std::transform(s.begin(), s.end(), s.begin(), ::tolower);
}
Rcpp::Environment map_to_env(const map& x) {
Rcpp::Environment env = Rcpp::new_env();
for (const auto& pair: x) {
env.assign(pair.first, pair.second);
}
return env;
}
// [[Rcpp::export]]
Rcpp::RObject parse_headers_cpp(std::string headers) {
map res;
std::istringstream stream(headers);
std::string buffer;
std::string::size_type index;
while (std::getline(stream, buffer) && buffer != "\r") {
index = buffer.find(':', 0);
if(index != std::string::npos) {
std::string key = buffer.substr(0, index);
std::string val = buffer.substr(index + 1);
trim(key);
tolower(key);
trim(val);
if (res.find(key) != res.end()) {
if (key == "cookie") {
val = val + "; " + res[key];
} else {
val = val + ", " + res[key];
}
}
res.insert(std::make_pair(key, val));
}
}
return Rcpp::wrap(res);
}
// [[Rcpp::export]]
Rcpp::Environment parse_headers_cpp2(std::string headers) {
Rcpp::Environment res = Rcpp::new_env();
std::istringstream stream(headers);
std::string buffer;
std::string::size_type index;
while (std::getline(stream, buffer) && buffer != "\r") {
index = buffer.find(':', 0);
if(index != std::string::npos) {
std::string key = buffer.substr(0, index);
std::string val = buffer.substr(index + 1);
trim(key);
tolower(key);
trim(val);
if (res.exists(key)) {
std::string tmp = Rcpp::as<std::string>(res[key]);
if (key == "cookie") {
val = val + "; " + tmp;
} else {
val = val + ", " + tmp;
}
}
res.assign(key, val);
}
}
return res;
}
// You can include R code blocks in C++ files processed with sourceCpp
// (useful for testing and development). The R code will be automatically
// run after the compilation.
//
/*** R
parse_headers_r = function(headers) {
if (is.raw(headers)) {
headers = rawToChar(headers)
}
res = new.env(parent = emptyenv())
if (is.character(headers) && length(headers) > 0L) {
## parse the headers into key/value pairs, collapsing multi-line values
lines = strsplit(gsub("[\r\n]+[ \t]+", " ", headers), "[\r\n]+")[[1]]
keys = tolower(gsub(":.*", "", lines))
values = gsub("^[^:]*:[[:space:]]*", "", lines)
idx = grep("^[^:]+:", lines)
keys = keys[idx]
values = values[idx]
for (i in seq_along(keys)) {
key = keys[[i]]
value = values[[i]]
# no such key yet
if (is.null(res[[key]])) {
res[[key]] = value
} else {
# key already exists and we need combine values with existing values
if (key == "cookie") {
# cookies processed in a special way - combined with "; " opposed to ", " for the rest of the keys
res[[key]] = paste(res[[key]], value, sep = "; ")
} else {
res[[key]] = paste(res[[key]], value, sep = ", ")
}
}
}
}
return(res)
}
s = "Host: stackoverflow.com\r\n
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0\r\n
Accept: text/html,application/xhtml+xml,application/xml\r\n
Accept-Language: ru-RU,ru\r\n
Accept-Encoding: gzip, deflate, br\r\n
Referer: https://www.google.ru/\r\n
DNT: 1\r\n
Connection: keep-alive\r\n
Cookie: prov=f1e12498-1231-c8f0-8f53-97a8a6b17754; notice-ctt=4%3B1560153827826; mfnes=6e32CJ0CEAMaCwishdGGwpPLNxAFIJsCKAEyCDYyZGY0OTJh; acct=t=cmBi7gQEMWgxdi6kOiPwqAVNqmbEPdVj&s=E9Ly%2bCeEeAGmK9wDx2Zaseg6tiyi2hd8; sgt=id=3f4b96f5-b5ef-4ab1-96af-5ebce2950bcc\r\n
Upgrade-Insecure-Requests: 1\r\n
Cache-Control: max-age=0\r\n
TE: Trailers\r\n\r\n"
bench::mark(
as.list(parse_headers_r(s)),
as.list(parse_headers_cpp(s)),
as.list(parse_headers_cpp2(s)),
min_iterations = 1000,
check = FALSE
)
*/
@artemklevtsov
Copy link
Author

artemklevtsov commented Aug 1, 2019

  expression                         min  median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
  <bch:expr>                     <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
1 as.list(parse_headers_r(s))    133.9µs 140.7µs     5483.        0B     2.03  2706     1      494ms
2 as.list(parse_headers_cpp(s))   10.4µs  11.9µs    82718.        0B     8.27  9999     1      121ms
3 as.list(parse_headers_cpp2(s))  25.7µs  28.6µs    34276.        0B    10.3   9997     3      292ms
# … with 4 more variables: result <list>, memory <list>, time <list>, gc <list>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment