Created
December 12, 2011 15:01
-
-
Save kogemrka/1467668 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <memory> | |
#include <fstream> | |
#include <exception> | |
#include <algorithm> | |
#include <stdexcept> | |
/* TODO (вот она, десяточка моей мечты) | |
- Причесать функторы | |
- Воспользоваться трюком с наследованием для создания псевдонимов | |
*/ | |
// Only for POD types | |
template <class T> | |
void serialize(const T& obj, std::ostream& out) | |
{ | |
out.write((char*)(&obj), sizeof(obj)); | |
} | |
template <class T> | |
void deserialize(T &obj, std::istream& in) | |
{ | |
in.read((char*)(&obj), sizeof(obj)); | |
} | |
//for Strings | |
void serialize(const std::string &obj, std::ostream& out) | |
{ | |
size_t size = obj.length(); | |
out.write((char*)(&size), sizeof(size_t)); | |
out.write(obj.c_str(), size); | |
} | |
void deserialize(std::string &obj, std::istream& in) | |
{ | |
size_t size; | |
in.read((char*)(&size), sizeof(size_t)); | |
char *data = new char[size + 1]; | |
try | |
{ | |
in.read(data, size); | |
obj.assign(data, size); | |
} | |
catch(...) | |
{ | |
delete [] data; | |
throw; | |
} | |
delete [] data; | |
} | |
// for Vectors | |
template <class T> | |
void serialize(const std::vector<T>& obj, std::ostream& out) | |
{ | |
size_t size = obj.size(); | |
out.write((char*) (&size), sizeof(size_t)); | |
for (size_t i = 0; i < obj.size(); ++i) | |
serialize(obj[i], out); | |
} | |
template <class T> | |
void deserialize(std::vector<T>& obj, std::istream& in) | |
{ | |
size_t size; | |
in.read((char*)(&size), sizeof(size_t)); | |
obj.clear(); | |
obj.resize(size); | |
for (size_t i = 0; i < size; ++i) | |
deserialize(obj[i], in); | |
} | |
template <class T> | |
class FileKeeper; | |
template <class T> | |
class SortSegment | |
{ | |
public: | |
void operator() (std::vector<T> &segment) | |
{ | |
std::sort(segment.begin(), segment.end()); | |
} | |
}; | |
template <class T> | |
class SortNextFile | |
{ | |
public: | |
SortNextFile(): lastSize(-1) {}; | |
void operator() (std::vector<std::shared_ptr<FileKeeper<T> > > &files) | |
{ | |
if (lastSize == -1) | |
{ | |
std::make_heap(files.begin(), files.end(), AntiCmp()); | |
} | |
else if ((size_t) lastSize == files.size()) | |
std::push_heap(files.begin(), files.end(), AntiCmp()); | |
std::pop_heap(files.begin(), files.end(), AntiCmp()); | |
lastSize = files.size(); | |
} | |
private: | |
int lastSize; | |
struct AntiCmp | |
{ | |
bool operator() (std::shared_ptr<FileKeeper<T> > a, std::shared_ptr<FileKeeper<T> > b) | |
{ | |
return *b < *a; | |
} | |
}; | |
}; | |
template <class T> | |
class ReverseSegment | |
{ | |
public: | |
void operator() (std::vector<T> &segment) | |
{ | |
std::reverse(segment.begin(), segment.end()); | |
} | |
}; | |
template <class T> | |
class ReverseNextFile | |
{ | |
public: | |
void operator() (std::vector<std::shared_ptr<FileKeeper<T> > > &) | |
{ | |
} | |
}; | |
template <class T> | |
class RandomizeSegment | |
{ | |
public: | |
void operator() (std::vector<T> &segment) | |
{ | |
std::random_shuffle(segment.begin(), segment.end()); | |
} | |
}; | |
template <class T> | |
class RandomizeNextFile | |
{ | |
public: | |
void operator() (std::vector<std::shared_ptr<FileKeeper<T> > > &files) | |
{ | |
std::swap(files[files.size() - 1], files[rand() % files.size()]); | |
} | |
}; | |
template <class T, class SegmentFunc, class NextFileFunc> | |
class External_Op | |
{ | |
public: | |
// static const size_t defaultSize = 1024; | |
External_Op(SegmentFunc, NextFileFunc, size_t size); | |
void push(T elem); | |
T next(); | |
void finish(); | |
bool isEnd() const; | |
private: | |
bool isFinished; | |
void processSegment(); | |
External_Op(const External_Op&); | |
External_Op & operator=(const External_Op&); | |
std::vector<std::shared_ptr<FileKeeper<T> > > files; | |
std::vector<T> buffer; | |
size_t buffer_size; | |
SegmentFunc SegmentFunctor; | |
NextFileFunc NextFileFunctor; | |
}; | |
template <class T> | |
class FileKeeper | |
{ | |
public: | |
FileKeeper(const std::vector<T> &array); | |
~FileKeeper(); | |
bool eof() const; | |
T next(); | |
bool operator < (const FileKeeper<T> &) const; | |
private: | |
T lastval; | |
bool is_eof; | |
std::string filename; | |
std::ifstream file; | |
}; | |
int main() | |
{ | |
External_Op<std::vector<int>, SortSegment<std::vector<int> >, SortNextFile<std::vector<int> > > | |
Sorter(SortSegment<std::vector<int> >(), SortNextFile<std::vector<int> >(), 10000); | |
for (int i = 0; i < 100000; ++i) | |
{ | |
std::vector<int> v1; | |
for (int j = 0; j < 3; ++j) | |
v1.push_back(rand()%256); | |
Sorter.push(v1); | |
} | |
Sorter.finish(); | |
External_Op<std::vector<int>, ReverseSegment<std::vector<int> >, ReverseNextFile<std::vector<int> > > | |
Reverser(ReverseSegment<std::vector<int> >(), ReverseNextFile<std::vector<int> >(), 10000); | |
while (!Sorter.isEnd()) | |
Reverser.push(Sorter.next()); | |
Reverser.finish(); | |
while (!Reverser.isEnd()) | |
{ | |
std::vector <int> v = Reverser.next(); | |
for (int i = 0; i < 3; ++i) | |
std::cout << v[i] << "\t"; | |
std::cout << std::endl; | |
} | |
return 0; | |
} | |
template <class T, class SegmentFunc, class NextFileFunc> | |
External_Op<T, SegmentFunc, NextFileFunc>::External_Op(SegmentFunc SegmentProc, NextFileFunc NextFileProc, size_t size): | |
SegmentFunctor(SegmentProc), | |
NextFileFunctor(NextFileProc) | |
{ | |
isFinished = false; | |
if (size > 0) | |
{ | |
buffer_size = size; | |
buffer.reserve(size); | |
} | |
else | |
throw(std::range_error("Size <= 0")); | |
} | |
template <class T, class SegmentFunc, class NextFileFunc> | |
void External_Op<T, SegmentFunc, NextFileFunc>::push(T elem) | |
{ | |
if (isFinished) | |
throw(std::runtime_error("Don`t push data after finish")); | |
if (buffer.size() >= buffer_size) | |
{ | |
processSegment(); | |
} | |
buffer.push_back(elem); | |
} | |
template <class T, class SegmentFunc, class NextFileFunc> | |
void External_Op<T, SegmentFunc, NextFileFunc>::finish() | |
{ | |
if (isFinished) | |
throw(std::runtime_error("Don`t use finish twice")); | |
processSegment(); | |
isFinished = true; | |
} | |
template <class T, class SegmentFunc, class NextFileFunc> | |
bool External_Op<T, SegmentFunc, NextFileFunc>::isEnd() const | |
{ | |
return files.empty(); | |
} | |
template <class T, class SegmentFunc, class NextFileFunc> | |
T External_Op<T, SegmentFunc, NextFileFunc>::next() | |
{ | |
// std::pop_heap(files.begin(), files.end(), AntiCmp()); | |
if (!isFinished) | |
throw(std::runtime_error("Operation is not finished yet")); | |
if(isEnd()) | |
throw(std::runtime_error("Object is empty")); | |
NextFileFunctor(files); | |
FileKeeper<T> &lastfile = **(--files.end()); | |
T result = lastfile.next(); | |
if (lastfile.eof()) | |
{ | |
files.pop_back(); | |
} | |
return result; | |
} | |
template <class T, class SegmentFunc, class NextFileFunc> | |
void External_Op<T, SegmentFunc, NextFileFunc>::processSegment() | |
{ | |
SegmentFunctor(buffer); | |
files.push_back(std::shared_ptr<FileKeeper<T> >(new FileKeeper<T> (buffer))); | |
buffer.erase(buffer.begin(), buffer.end()); | |
} | |
template <class T> | |
FileKeeper<T>::FileKeeper(const std::vector<T> &array) | |
{ | |
filename = tmpnam(NULL); | |
typename std::vector<T>::const_iterator it = array.begin(); | |
lastval = *it; | |
++it; | |
try | |
{ | |
std::ofstream output(filename); | |
typename std::vector<T>::const_iterator et = array.end(); | |
is_eof = (it == et); | |
for (; it != et; ++it) | |
serialize(*it, output); | |
output.close(); | |
} | |
catch(...) | |
{ | |
remove(filename.c_str()); | |
throw; | |
} | |
file.open(filename, std::ifstream::binary); | |
} | |
template <class T> | |
bool FileKeeper<T>::eof() const | |
{ | |
return is_eof; | |
} | |
template <class T> | |
T FileKeeper<T>::next() | |
{ | |
T result = lastval; | |
if (!file.eof()) | |
deserialize(lastval, file); | |
is_eof = file.eof(); | |
return result; | |
} | |
template <class T> | |
bool FileKeeper<T>::operator < (const FileKeeper<T> &rhs) const | |
{ | |
return lastval < rhs.lastval; | |
} | |
template <class T> | |
FileKeeper<T>::~FileKeeper() | |
{ | |
file.close(); | |
remove(filename.c_str()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment