Skip to content

Instantly share code, notes, and snippets.

@vmx
Created September 24, 2015 14:04
Show Gist options
  • Save vmx/5623881edcefdaecf9c0 to your computer and use it in GitHub Desktop.
Save vmx/5623881edcefdaecf9c0 to your computer and use it in GitHub Desktop.
Extracting a single field out of some data using FBSON and Subdoc
#include <sys/time.h>
#include <fstream>
#include <vector>
#include "fbson/FbsonJsonParser.h"
// From http://stackoverflow.com/a/1861493/935109
typedef unsigned long long timestamp_t;
static timestamp_t get_timestamp ()
{
struct timeval now;
gettimeofday (&now, NULL);
return now.tv_usec + (timestamp_t)now.tv_sec * 1000000;
}
// Read the docs we want to process
std::vector<std::string> readData(const char *filename)
{
std::ifstream infile(filename);
std::string line;
std::vector<std::string> file_contents;
while (std::getline(infile, line))
{
file_contents.push_back(line);
}
return file_contents;
}
int main(int argc, char* argv[]) {
// Read the docs we want to process
std::vector<std::string> data = readData(
"/home/vmx/src/c/v8/mapfun/data_fbson.txt");
// The data as FBSON
std::vector<std::string> bin_data;
// Create FBSON out of JSON
for (size_t ii = 0; ii < data.size(); ii++) {
fbson::FbsonJsonParser parser;
if (!parser.parse(data[ii])) {
printf("%d\n", parser.getErrorCode());
return (int)parser.getErrorCode();
}
bin_data.push_back(std::string(
parser.getWriter().getOutput()->getBuffer(),
parser.getWriter().getOutput()->getSize()));
}
// Extract the title and time it
timestamp_t t0 = get_timestamp();
for(std::vector<std::string>::size_type i = 0; i != bin_data.size(); i++) {
auto fbsonDoc = fbson::FbsonDocument::createValue(bin_data[i].c_str(),
bin_data[i].size());
std::string key_path = "title";
fbson::FbsonValue *result_val = fbsonDoc->findPath(key_path.c_str(),
key_path.size());
std::string result = std::string(result_val->getValuePtr(),
result_val->size());
std::cout << result << std::endl;
}
timestamp_t t1 = get_timestamp();
double secs = (t1 - t0) / 1000000.0L;
std::cout << "It took: " << secs << "s" << std::endl;
return 0;
}
#include <sys/time.h>
#include <iostream>
#include <fstream>
#include <vector>
#include "subdoc/operations.h"
// From http://stackoverflow.com/a/1861493/935109
typedef unsigned long long timestamp_t;
static timestamp_t get_timestamp ()
{
struct timeval now;
gettimeofday (&now, NULL);
return now.tv_usec + (timestamp_t)now.tv_sec * 1000000;
}
// Read the docs we want to process
std::vector<std::string> readData(const char *filename)
{
std::ifstream infile(filename);
std::string line;
std::vector<std::string> file_contents;
while (std::getline(infile, line))
{
file_contents.push_back(line);
}
return file_contents;
}
int main(int argc, char* argv[]) {
// Read the docs we want to process
std::vector<std::string> data = readData(
"/home/vmx/src/c/v8/mapfun/data_fbson.txt");
// Extract the title and time it
timestamp_t t0 = get_timestamp();
for (size_t ii = 0; ii < data.size(); ii++) {
Subdoc::Operation op;
Subdoc::Result res;
uint8_t opcode = Subdoc::Command::GET;
res.clear();
op.set_doc(data[ii]);
op.set_result_buf(&res);
Subdoc::Error rv = op.op_exec("title");
if (!rv.success()) {
std::cout << rv.description() << std::endl;
throw rv;
}
std::string match = res.matchloc().to_string();
std::cout << match << std::endl;
}
timestamp_t t1 = get_timestamp();
double secs = (t1 - t0) / 1000000.0L;
std::cout << "It took: " << secs << "s" << std::endl;
return 0;
}
@daverigby
Copy link

One thing jumps out straight away:
#42: Creating a Subdoc::Operation is expensive - try moving the creation of this out of the loop, and then call op.clear() after each call to op_exec

Also this will be massively dependant on input data.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment