Skip to content

Instantly share code, notes, and snippets.

@ijklr
Created March 31, 2016 17:39
Show Gist options
  • Save ijklr/0c7f66df5cfac209fecf143e20028c5a to your computer and use it in GitHub Desktop.
Save ijklr/0c7f66df5cfac209fecf143e20028c5a to your computer and use it in GitHub Desktop.
To compile: g++ -std=c++11 lzw.cpp
#include <deque>
#include <iostream>
#include <unordered_map>
#include <fstream>
#include <cstdint>
#include <thread>
#include <string>
#include <cstring>
using namespace std;
union ben{
int16_t num;
char c[2];
};
class MyStream {
public:
string peek() {
if(data_.empty()) {
cout<<"data is empty!"<<endl;
return "";
}
auto t = data_.front();
string tmp = lookup(t.num);
return string(1, tmp[0]);
}
string read() {
if(data_.empty()) {
cout<<"read data is empty!"<<endl;
return "";
}
auto t = data_.front();
data_.pop_front();
fillq();
string ans = lookup(t.num);
cout <<"read:"<<ans<<endl;
return ans;
}
void hash_insert(const string &str) {
static int key = 256;
//if(hash_.size() < 60000)
hash_.insert(make_pair(key++, str));
}
MyStream(const string &filename) {
ifs_.open(filename);
fillq();
}
~MyStream(){
ifs_.close();
}
private:
void fillq() {
ben bb;
if(ifs_.get(bb.c[0]) && ifs_.get(bb.c[1]))
data_.push_back(bb);
}
string lookup(int16_t i) {
auto found = hash_.find(i);
if(found == end(hash_)) {
if(i>128) cout<<"i="<<i<<endl;
return string(1,i);
} else {
cout<<"found "<<found->second<<endl;
return found->second;
}
}
deque<ben> data_;
ifstream ifs_;
unordered_map<int16_t, string> hash_;
};
void uncompress(const string &input, const string &output)
{
MyStream stream(input);
ofstream ofs(output.c_str(), ios::binary);
string current_str = stream.read();
while(!current_str.empty()){
string hash_str = current_str + stream.peek();
ofs << current_str;
stream.hash_insert(hash_str);
current_str = stream.read();
}
ofs.close();
}
class MyOutStream{
public:
void hash_insert(const string & str) {
static int i = 256;
if(hash_.size() < 60000) {
hash_.insert(make_pair(str, i++));
//last_hash_key = str;
} else {
cout<<"reached capacity, hash won't get bigger."<<endl;
}
}
MyOutStream(const string &filename) {
ofs_.open(filename);
}
void write(const string &str) {
//if(0 == str.compare(last_hash_key)) {
// ben bb;
// bb.num = str[0];
// ofs_.write(bb.c, 2);
//}
cout<<"writing:"<<str<<endl;
auto found = hash_.find(str);
if(found == end(hash_)) {
for(auto c:str) {
ben bb;
bb.num = c;
ofs_.write(bb.c, 2);
}
} else {
ben bb;
bb.num = found->second;
ofs_.write(bb.c,2);
}
}
bool exist(const string &str) {
if(str.length() == 1) return true;
else return hash_.find(str) != end(hash_);
}
private:
unordered_map<string, int16_t> hash_;
ofstream ofs_;
//string last_hash_key;
};
void compress(const string &input, const string &output)
{
ifstream ifs(input.c_str());
string current_str;
MyOutStream stream(output);
char c;
while(ifs.get(c)){
current_str += c;
int pk = ifs.peek();
if(pk == EOF) {
stream.write(current_str);
break;
}
string key_str = current_str + static_cast<char>(pk);
if(stream.exist(current_str) && !stream.exist(key_str)) {
stream.write(current_str);
stream.hash_insert(key_str);
current_str.clear();
}
}
ifs.close();
}
int main(int argc, char *argv[])
{
if(argc != 4) {
cout <<argv[0]<<" [compress/uncompress] input output"<<endl;
}
string option(argv[1]);
string input(argv[2]);
string output(argv[3]);
if(0 == option.compare("compress")) {
compress(input, output);
} else if(0 == option.compare("uncompress")) {
uncompress(input, output);
} else {
cout <<"invalid option!"<<endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment