Skip to content

Instantly share code, notes, and snippets.

@chenshuo
Created December 11, 2011 12:59
Show Gist options
  • Save chenshuo/1460482 to your computer and use it in GitHub Desktop.
Save chenshuo/1460482 to your computer and use it in GitHub Desktop.
Join two sets
#include <algorithm>
#include <iterator>
#include <vector>
#include <boost/unordered_set.hpp>
#include <stdint.h>
#include <stdio.h>
#include <sys/time.h>
using namespace std;
double now()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec / 1e6;
}
vector<int64_t> generate(int n)
{
vector<int64_t> result;
result.resize(n);
generate(result.begin(), result.end(), lrand48);
return result;
}
int unique_count(vector<int64_t> in)
{
sort(in.begin(), in.end());
vector<int64_t>::iterator newend = unique(in.begin(), in.end());
size_t unique_elements = newend - in.begin();
return unique_elements;
}
struct is_in
{
is_in(const boost::unordered_set<int64_t>& set)
: set_(set)
{
}
bool operator()(int64_t x)
{
return set_.find(x) != set_.end();
}
const boost::unordered_set<int64_t>& set_;
};
vector<int64_t> join1(const vector<int64_t>& in1,
const vector<int64_t>& in2)
{
vector<int64_t> result;
boost::unordered_set<int64_t> set1(in1.begin(), in1.end());
remove_copy_if(in2.begin(), in2.end(), back_inserter(result), is_in(set1));
return result;
}
vector<int64_t> join2(vector<int64_t>& set1,
vector<int64_t>& set2)
{
vector<int64_t> result;
sort(set1.begin(), set1.end());
sort(set2.begin(), set2.end());
set_difference(set2.begin(), set2.end(),
set1.begin(), set1.end(),
back_inserter(result));
return result;
}
int main()
{
srand48(42);
vector<int64_t> set1 = generate(100*1000);
vector<int64_t> diff = generate(10*1000);
vector<int64_t> set2 = set1;
set2.insert(set2.end(), diff.begin(), diff.end());
random_shuffle(set2.begin(), set2.end());
printf("set1 unique_elements %d\n", unique_count(set1));
printf("set2 unique_elements %d\n", unique_count(set2));
vector<int64_t> result1, result2;
{
double start = now();
result1 = join1(set1, set2);
double finish1 = now();
printf("join1 %f seconds, result size %zd\n", finish1 - start, result1.size());
}
{
double start = now();
result2 = join2(set1, set2);
double finish2 = now();
printf("join2 %f seconds, result size %zd\n", finish2 - start, result2.size());
}
printf("difference of results %zd\n", join1(result1, result2).size());
printf("correctness of result %d\n", join1(result1, diff).size() == 0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment