Skip to content

Instantly share code, notes, and snippets.

@liancheng
Last active December 25, 2015 00:09
Show Gist options
  • Save liancheng/6885257 to your computer and use it in GitHub Desktop.
Save liancheng/6885257 to your computer and use it in GitHub Desktop.
#!/bin/bash
mkdir -p data
for i in `seq $1`; do
file=data/$i
for j in `seq $2`; do
echo ${RANDOM} >> $file
done
sort -g $file > tmp.$$
mv tmp.$$ $file
done
find data/ -type f | ./a.out
find data/ -type f | xargs cat | sort -g | diff - result
#include <algorithm>
#include <fstream>
#include <iostream>
#include <iterator>
#include <memory>
#include <queue>
#include <string>
#include <tuple>
#include <vector>
using namespace std;
template<typename InputRangeIterator, typename OutputIterator>
OutputIterator n_way_merge(InputRangeIterator first,
InputRangeIterator last,
OutputIterator out)
{
typedef typename iterator_traits<InputRangeIterator>::value_type InputRange;
typedef typename InputRange::first_type InputIterator;
typedef typename iterator_traits<InputIterator>::value_type Value;
struct HeapElement {
InputIterator next;
InputIterator last;
Value value;
HeapElement(InputIterator next, InputIterator last, Value const& value) :
next(next), last(last), value(value)
{}
bool operator<(HeapElement const& that) const
{
return this->value > that.value;
}
};
vector<HeapElement> queue;
for_each(first, last, [&](InputRange& range) {
if (range.first != range.second) {
Value value = *range.first++;
queue.emplace_back(range.first, range.second, value);
}
});
make_heap(begin(queue), end(queue));
while (!queue.empty()) {
HeapElement const& element = queue.front();
InputIterator next = element.next;
InputIterator last = element.last;
*out++ = element.value;
pop_heap(begin(queue), end(queue));
if (next != last) {
Value value = *next++;
queue.back() = HeapElement(next, last, value);
push_heap(begin(queue), end(queue));
}
else {
queue.pop_back();
}
}
return out;
}
template<typename Value, typename PathIterator, typename OutputIterator>
OutputIterator merge_files(PathIterator first,
PathIterator last,
OutputIterator out)
{
vector<unique_ptr<ifstream>> streams;
vector<pair<istream_iterator<Value>, istream_iterator<Value>>> ranges;
for_each(first, last, [&](string const& path) {
streams.emplace_back(new ifstream(path.c_str()));
ranges.push_back(make_pair(istream_iterator<Value>(*streams.back()),
istream_iterator<Value>()));
});
return n_way_merge(begin(ranges), end(ranges), out);
}
int main(int argc, char* argv[])
{
ofstream out("result");
vector<string> paths((istream_iterator<string>(cin)), istream_iterator<string>());
merge_files<int>(begin(paths), end(paths), ostream_iterator<int>(out, "\n"));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment