Created
December 11, 2011 12:59
-
-
Save chenshuo/1460482 to your computer and use it in GitHub Desktop.
Join two sets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <iterator> | |
#include <vector> | |
#include <boost/unordered_set.hpp> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <sys/time.h> | |
using namespace std; | |
double now() | |
{ | |
struct timeval tv; | |
gettimeofday(&tv, NULL); | |
return tv.tv_sec + tv.tv_usec / 1e6; | |
} | |
vector<int64_t> generate(int n) | |
{ | |
vector<int64_t> result; | |
result.resize(n); | |
generate(result.begin(), result.end(), lrand48); | |
return result; | |
} | |
int unique_count(vector<int64_t> in) | |
{ | |
sort(in.begin(), in.end()); | |
vector<int64_t>::iterator newend = unique(in.begin(), in.end()); | |
size_t unique_elements = newend - in.begin(); | |
return unique_elements; | |
} | |
struct is_in | |
{ | |
is_in(const boost::unordered_set<int64_t>& set) | |
: set_(set) | |
{ | |
} | |
bool operator()(int64_t x) | |
{ | |
return set_.find(x) != set_.end(); | |
} | |
const boost::unordered_set<int64_t>& set_; | |
}; | |
vector<int64_t> join1(const vector<int64_t>& in1, | |
const vector<int64_t>& in2) | |
{ | |
vector<int64_t> result; | |
boost::unordered_set<int64_t> set1(in1.begin(), in1.end()); | |
remove_copy_if(in2.begin(), in2.end(), back_inserter(result), is_in(set1)); | |
return result; | |
} | |
vector<int64_t> join2(vector<int64_t>& set1, | |
vector<int64_t>& set2) | |
{ | |
vector<int64_t> result; | |
sort(set1.begin(), set1.end()); | |
sort(set2.begin(), set2.end()); | |
set_difference(set2.begin(), set2.end(), | |
set1.begin(), set1.end(), | |
back_inserter(result)); | |
return result; | |
} | |
int main() | |
{ | |
srand48(42); | |
vector<int64_t> set1 = generate(100*1000); | |
vector<int64_t> diff = generate(10*1000); | |
vector<int64_t> set2 = set1; | |
set2.insert(set2.end(), diff.begin(), diff.end()); | |
random_shuffle(set2.begin(), set2.end()); | |
printf("set1 unique_elements %d\n", unique_count(set1)); | |
printf("set2 unique_elements %d\n", unique_count(set2)); | |
vector<int64_t> result1, result2; | |
{ | |
double start = now(); | |
result1 = join1(set1, set2); | |
double finish1 = now(); | |
printf("join1 %f seconds, result size %zd\n", finish1 - start, result1.size()); | |
} | |
{ | |
double start = now(); | |
result2 = join2(set1, set2); | |
double finish2 = now(); | |
printf("join2 %f seconds, result size %zd\n", finish2 - start, result2.size()); | |
} | |
printf("difference of results %zd\n", join1(result1, result2).size()); | |
printf("correctness of result %d\n", join1(result1, diff).size() == 0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment