Created
September 24, 2015 14:04
-
-
Save vmx/5623881edcefdaecf9c0 to your computer and use it in GitHub Desktop.
Extracting a single field out of some data using FBSON and Subdoc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <sys/time.h> | |
#include <fstream> | |
#include <vector> | |
#include "fbson/FbsonJsonParser.h" | |
// From http://stackoverflow.com/a/1861493/935109 | |
typedef unsigned long long timestamp_t; | |
static timestamp_t get_timestamp () | |
{ | |
struct timeval now; | |
gettimeofday (&now, NULL); | |
return now.tv_usec + (timestamp_t)now.tv_sec * 1000000; | |
} | |
// Read the docs we want to process | |
std::vector<std::string> readData(const char *filename) | |
{ | |
std::ifstream infile(filename); | |
std::string line; | |
std::vector<std::string> file_contents; | |
while (std::getline(infile, line)) | |
{ | |
file_contents.push_back(line); | |
} | |
return file_contents; | |
} | |
int main(int argc, char* argv[]) { | |
// Read the docs we want to process | |
std::vector<std::string> data = readData( | |
"/home/vmx/src/c/v8/mapfun/data_fbson.txt"); | |
// The data as FBSON | |
std::vector<std::string> bin_data; | |
// Create FBSON out of JSON | |
for (size_t ii = 0; ii < data.size(); ii++) { | |
fbson::FbsonJsonParser parser; | |
if (!parser.parse(data[ii])) { | |
printf("%d\n", parser.getErrorCode()); | |
return (int)parser.getErrorCode(); | |
} | |
bin_data.push_back(std::string( | |
parser.getWriter().getOutput()->getBuffer(), | |
parser.getWriter().getOutput()->getSize())); | |
} | |
// Extract the title and time it | |
timestamp_t t0 = get_timestamp(); | |
for(std::vector<std::string>::size_type i = 0; i != bin_data.size(); i++) { | |
auto fbsonDoc = fbson::FbsonDocument::createValue(bin_data[i].c_str(), | |
bin_data[i].size()); | |
std::string key_path = "title"; | |
fbson::FbsonValue *result_val = fbsonDoc->findPath(key_path.c_str(), | |
key_path.size()); | |
std::string result = std::string(result_val->getValuePtr(), | |
result_val->size()); | |
std::cout << result << std::endl; | |
} | |
timestamp_t t1 = get_timestamp(); | |
double secs = (t1 - t0) / 1000000.0L; | |
std::cout << "It took: " << secs << "s" << std::endl; | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <sys/time.h> | |
#include <iostream> | |
#include <fstream> | |
#include <vector> | |
#include "subdoc/operations.h" | |
// From http://stackoverflow.com/a/1861493/935109 | |
typedef unsigned long long timestamp_t; | |
static timestamp_t get_timestamp () | |
{ | |
struct timeval now; | |
gettimeofday (&now, NULL); | |
return now.tv_usec + (timestamp_t)now.tv_sec * 1000000; | |
} | |
// Read the docs we want to process | |
std::vector<std::string> readData(const char *filename) | |
{ | |
std::ifstream infile(filename); | |
std::string line; | |
std::vector<std::string> file_contents; | |
while (std::getline(infile, line)) | |
{ | |
file_contents.push_back(line); | |
} | |
return file_contents; | |
} | |
int main(int argc, char* argv[]) { | |
// Read the docs we want to process | |
std::vector<std::string> data = readData( | |
"/home/vmx/src/c/v8/mapfun/data_fbson.txt"); | |
// Extract the title and time it | |
timestamp_t t0 = get_timestamp(); | |
for (size_t ii = 0; ii < data.size(); ii++) { | |
Subdoc::Operation op; | |
Subdoc::Result res; | |
uint8_t opcode = Subdoc::Command::GET; | |
res.clear(); | |
op.set_doc(data[ii]); | |
op.set_result_buf(&res); | |
Subdoc::Error rv = op.op_exec("title"); | |
if (!rv.success()) { | |
std::cout << rv.description() << std::endl; | |
throw rv; | |
} | |
std::string match = res.matchloc().to_string(); | |
std::cout << match << std::endl; | |
} | |
timestamp_t t1 = get_timestamp(); | |
double secs = (t1 - t0) / 1000000.0L; | |
std::cout << "It took: " << secs << "s" << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
One thing jumps out straight away:
#42: Creating a Subdoc::Operation is expensive - try moving the creation of this out of the loop, and then call
op.clear()
after each call toop_exec
Also this will be massively dependant on input data.