Created
March 31, 2016 17:39
-
-
Save ijklr/0c7f66df5cfac209fecf143e20028c5a to your computer and use it in GitHub Desktop.
To compile: g++ -std=c++11 lzw.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <deque> | |
#include <iostream> | |
#include <unordered_map> | |
#include <fstream> | |
#include <cstdint> | |
#include <thread> | |
#include <string> | |
#include <cstring> | |
using namespace std; | |
union ben{ | |
int16_t num; | |
char c[2]; | |
}; | |
class MyStream { | |
public: | |
string peek() { | |
if(data_.empty()) { | |
cout<<"data is empty!"<<endl; | |
return ""; | |
} | |
auto t = data_.front(); | |
string tmp = lookup(t.num); | |
return string(1, tmp[0]); | |
} | |
string read() { | |
if(data_.empty()) { | |
cout<<"read data is empty!"<<endl; | |
return ""; | |
} | |
auto t = data_.front(); | |
data_.pop_front(); | |
fillq(); | |
string ans = lookup(t.num); | |
cout <<"read:"<<ans<<endl; | |
return ans; | |
} | |
void hash_insert(const string &str) { | |
static int key = 256; | |
//if(hash_.size() < 60000) | |
hash_.insert(make_pair(key++, str)); | |
} | |
MyStream(const string &filename) { | |
ifs_.open(filename); | |
fillq(); | |
} | |
~MyStream(){ | |
ifs_.close(); | |
} | |
private: | |
void fillq() { | |
ben bb; | |
if(ifs_.get(bb.c[0]) && ifs_.get(bb.c[1])) | |
data_.push_back(bb); | |
} | |
string lookup(int16_t i) { | |
auto found = hash_.find(i); | |
if(found == end(hash_)) { | |
if(i>128) cout<<"i="<<i<<endl; | |
return string(1,i); | |
} else { | |
cout<<"found "<<found->second<<endl; | |
return found->second; | |
} | |
} | |
deque<ben> data_; | |
ifstream ifs_; | |
unordered_map<int16_t, string> hash_; | |
}; | |
void uncompress(const string &input, const string &output) | |
{ | |
MyStream stream(input); | |
ofstream ofs(output.c_str(), ios::binary); | |
string current_str = stream.read(); | |
while(!current_str.empty()){ | |
string hash_str = current_str + stream.peek(); | |
ofs << current_str; | |
stream.hash_insert(hash_str); | |
current_str = stream.read(); | |
} | |
ofs.close(); | |
} | |
class MyOutStream{ | |
public: | |
void hash_insert(const string & str) { | |
static int i = 256; | |
if(hash_.size() < 60000) { | |
hash_.insert(make_pair(str, i++)); | |
//last_hash_key = str; | |
} else { | |
cout<<"reached capacity, hash won't get bigger."<<endl; | |
} | |
} | |
MyOutStream(const string &filename) { | |
ofs_.open(filename); | |
} | |
void write(const string &str) { | |
//if(0 == str.compare(last_hash_key)) { | |
// ben bb; | |
// bb.num = str[0]; | |
// ofs_.write(bb.c, 2); | |
//} | |
cout<<"writing:"<<str<<endl; | |
auto found = hash_.find(str); | |
if(found == end(hash_)) { | |
for(auto c:str) { | |
ben bb; | |
bb.num = c; | |
ofs_.write(bb.c, 2); | |
} | |
} else { | |
ben bb; | |
bb.num = found->second; | |
ofs_.write(bb.c,2); | |
} | |
} | |
bool exist(const string &str) { | |
if(str.length() == 1) return true; | |
else return hash_.find(str) != end(hash_); | |
} | |
private: | |
unordered_map<string, int16_t> hash_; | |
ofstream ofs_; | |
//string last_hash_key; | |
}; | |
void compress(const string &input, const string &output) | |
{ | |
ifstream ifs(input.c_str()); | |
string current_str; | |
MyOutStream stream(output); | |
char c; | |
while(ifs.get(c)){ | |
current_str += c; | |
int pk = ifs.peek(); | |
if(pk == EOF) { | |
stream.write(current_str); | |
break; | |
} | |
string key_str = current_str + static_cast<char>(pk); | |
if(stream.exist(current_str) && !stream.exist(key_str)) { | |
stream.write(current_str); | |
stream.hash_insert(key_str); | |
current_str.clear(); | |
} | |
} | |
ifs.close(); | |
} | |
int main(int argc, char *argv[]) | |
{ | |
if(argc != 4) { | |
cout <<argv[0]<<" [compress/uncompress] input output"<<endl; | |
} | |
string option(argv[1]); | |
string input(argv[2]); | |
string output(argv[3]); | |
if(0 == option.compare("compress")) { | |
compress(input, output); | |
} else if(0 == option.compare("uncompress")) { | |
uncompress(input, output); | |
} else { | |
cout <<"invalid option!"<<endl; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment