Last active
August 29, 2015 14:03
-
-
Save shonenada/db93ea52894848da0bb4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
// 字符集的最大大小 | |
#define MAX_SIZE 256 | |
class ArithmeitcCoding { | |
private: | |
// 记录字符集合总大小 | |
int size; | |
// 区间下限 | |
double lower; | |
// 区间上限 | |
double upper; | |
// 字符集 | |
char char_set[MAX_SIZE]; | |
// 每一个字符出现的频数 | |
int frequency[MAX_SIZE]; | |
// 计算所有字符出现的频数 | |
int get_total(); | |
// 计算下限 | |
int get_prev_size(int idx); | |
// 判斷字符是否在字符集中 | |
int is_in_set(char chr); | |
// 获取输入的字符的下标 | |
int index_of(char chr); | |
public: | |
ArithmeitcCoding(); | |
// 初始化成员变量 | |
void clean(); | |
// 初始化字符集 | |
void init_charset(char* string); | |
// 更新频率区间 | |
void update_range(int idx); | |
// 輸入字符集 | |
void input_char_set(); | |
// 对输入的字符串做算术编码 | |
double encode(char* string); | |
}; | |
ArithmeitcCoding::ArithmeitcCoding() { } | |
// 计算所有字符出现的总频数,用来确定划分区间的单位长度 | |
int ArithmeitcCoding::get_total() { | |
int i; | |
int total = 0; | |
for (i=0; i<size; ++i) { | |
total += frequency[i]; | |
} | |
return total; | |
} | |
// 计算 idx 下标前有多少个单位长度,用来确定新区间的下标。 | |
int ArithmeitcCoding::get_prev_size(int idx) { | |
int i; | |
int prev_size = 0; | |
for(i=0; i<idx; ++i) { | |
prev_size += frequency[i]; | |
} | |
return prev_size; | |
} | |
// 判断输入的字符是否存在于字符集中 | |
// 存在则返回 1 | |
// 不存在返回 0 | |
int ArithmeitcCoding::is_in_set(char chr) { | |
int i; | |
for (i=0; i<size; ++i) { | |
if (char_set[i] == chr) { | |
return 1; | |
} | |
} | |
return 0; | |
} | |
// 判断输入的字符是否存在于字符集中 | |
// 存在则返回 下標 | |
// 不存在返回 -1 | |
int ArithmeitcCoding::index_of(char chr) { | |
int i; | |
for (i=0; i<size; ++i) { | |
if (char_set[i] == chr) { | |
return i; | |
} | |
} | |
return -1; | |
} | |
// 初始化成员变量,每次对字符串进行编码之前都需要进行初始化。 | |
void ArithmeitcCoding::clean() { | |
lower = 0; | |
upper = 1; | |
int i; | |
for (i=0; i<size; ++i) { | |
frequency[i] = 1; | |
} | |
} | |
// 初始化字符集,将字符串的字符存入字符集合中 | |
void ArithmeitcCoding::init_charset(char* string) { | |
int i=0; | |
while(string[i] != '\0') { | |
if (!is_in_set(string[i])) { | |
char_set[size] = string[i]; | |
size += 1; | |
} | |
i += 1; | |
} | |
} | |
// 计算并且更新区间范围 | |
void ArithmeitcCoding::update_range(int idx) { | |
int total = get_total(); | |
double prange = upper - lower; | |
double slice_size = prange / total; | |
int prev_size = get_prev_size(idx); | |
lower = lower + slice_size * prev_size; | |
upper = lower + slice_size * frequency[idx]; | |
} | |
// 输入字符集 | |
void ArithmeitcCoding::input_char_set() { | |
char cset[MAX_SIZE]; | |
printf("请输入字符集:"); | |
scanf("%s", cset); | |
init_charset(cset); | |
} | |
// 对输入的字符串做编码,对每个字符做一个区间更新的操作。最后输出区间范围内的值,这里输出了区间的中间值。 | |
double ArithmeitcCoding::encode(char* string) { | |
clean(); | |
int i = 0; | |
while (string[i] != '\0') { | |
update_range(index_of(string[i])); | |
frequency[index_of(string[i])] += 1; | |
i += 1; | |
} | |
update_range(index_of(string[i-1])); | |
return (upper + lower) / 2; | |
} | |
int main(int argc, char* argv[]) { | |
char string[MAX_SIZE]; | |
ArithmeitcCoding encoder; | |
encoder.input_char_set(); | |
printf("请输入待压缩字符串:"); | |
scanf("%s", string); | |
printf("%lf\n", encoder.encode(string)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment