Skip to content

Instantly share code, notes, and snippets.

@Rag0n
Last active August 29, 2015 13:56
Show Gist options
  • Save Rag0n/9216175 to your computer and use it in GitHub Desktop.
Save Rag0n/9216175 to your computer and use it in GitHub Desktop.
bwt[quicksort] + mtf + huffman
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// #include <stdbool.h> // bool
#define BLOCKSIZE 5000
#define MAX_TREE_HT 100
// Преобразование Барроуза-Уиллера
// int f = 53; // колво символов в алфавите mtf
struct CharAndFreq
{
unsigned ch;
unsigned freq;
};
struct CharAndBit
{
unsigned ch;
int bits[8];
};
struct Node
{
struct CharAndFreq data[1];
struct Node *left, *right; // Left and right child of this node
};
struct Heap
{
unsigned size; // Current size of min heap
unsigned capacity; // capacity of min heap (count)
// unsigned int size; // Size of the allocated memory (in number of items)
// unsigned int capacity; // capacity of the elements in the heap
struct Node **array; // Array of Heap node pointers
};
// A utility function to print an array of size n
void printArr(int arr[], int n, struct CharAndBit *result, int *pos)
{
int i;
for (i = 0; i < n; ++i)
{
printf("%d", arr[i]);
result[*pos].bits[i] = arr[i];
}
result[*pos].bits[i] = 2;
printf("\n");
}
struct Heap* createHeap(int capacity) //
{
struct Heap *heap = (struct Heap*)malloc(sizeof(struct Heap));
heap->size = 0;
heap->capacity = capacity;
heap->array = (struct Node**) malloc(heap->capacity * sizeof(struct Node*));
return heap;
}
// A utility function to swap two min heap nodes
void swap(struct Node **one, struct Node **two)
{
struct Node *temp = *one;
*one = *two;
*two = temp;
}
// The standard minHeapify function.
void Heapify(struct Heap* heap, int i) //
{
int minimum = i;
int left = 2*i + 1;
int right = 2*i + 2;
if (left < heap->size && heap->array[left]->data[0].freq < heap->array[minimum]->data[0].freq)
minimum = left;
if (right < heap->size && heap->array[right]->data[0].freq < heap->array[minimum]->data[0].freq)
minimum = right;
if (minimum != i)
{
swap(&heap->array[minimum], &heap->array[i]);
Heapify(heap, minimum);
}
}
struct Node* extract_min(struct Heap *heap)
{
struct Mode *temp = heap->array[0];
heap->array[0] = heap->array[heap->size - 1];
heap->size--;
Heapify(heap, 0);
return temp;
}
// A standard funvtion to build min heap
void buildHeap(struct Heap *heap) //
{
int lenght = heap->size - 1;
for (int i = lenght / 2; i > -1; --i)
Heapify(heap, i);
}
struct Node* newNode(char data, int freq)
{
struct Node* temp = (struct Node*)malloc(sizeof(struct Node));
temp->left = temp->right = NULL; // обнуляем указатели
temp->data[0].ch = data;
temp->data[0].freq = freq;
return temp;
}
void add(struct Heap *heap, struct Node *node)
{
int i = heap->size;
heap->size++;
while(i > 0 && node->data[0].freq < heap->array[(i - 1)/2]->data[0].freq)
{
heap->array[i] = heap->array[(i - 1)/2];
i = (i - 1)/2;
}
heap->array[i] = node;
}
struct Node* buildTree(struct CharAndFreq data[], int size) //
{
struct Node *top, *left, *right;
// создаем минимальную кучу
struct Heap *heap = createHeap(size);
for (int i = 0; i < size; ++i)
{
heap->array[i] = newNode(data[i].ch, data[i].freq);
}
heap->size = size;
buildHeap(heap);
while(heap->size > 1)
{
left = extract_min(heap);
right = extract_min(heap);
top = newNode('$', left->data[0].freq + right->data[0].freq);
top->left = left;
top->right = right;
add(heap, top);
}
// последний элемент - корень
return extract_min(heap);
}
// Prints huffman codes from the root of Huffman Tree. It uses arr[] to
// store codes
void HuffmanCodes(struct Node* root, int arr[], int top, struct CharAndBit result[], int *pos)
{
// Assign 0 to left edge and recur
if (root->left)
{
arr[top] = 0;
HuffmanCodes(root->left, arr, top + 1, result, pos);
}
// Assign 1 to right edge and recur
if (root->right)
{
arr[top] = 1;
HuffmanCodes(root->right, arr, top + 1, result, pos);
}
// If this is a leaf node, then it contains one of the input
// characters, print the character and its code from arr[]
if (root->left == NULL && root->right == NULL)
{
printf("%d: ", root->data[0].ch);
result[*pos].ch = root->data[0].ch;
printArr(arr, top, result, pos);
(*pos)++;
}
}
// MTF:
int mtf_move(int index, char *dict)
{
int tmp1, tmp2, i = 1, x = dict[index];
tmp1 = dict[i];
dict[i] = x;
while(tmp1 != x)
{
i++;
tmp2 = tmp1;
tmp1 = dict[i];
dict[i] = tmp2;
}
return 0;
}
int mtf(int sym_mem, char *result_bwt)
{
// составляем словарь
char dict[97] = {'e', 't', 'a', 'o', 'i', 'n', 's', 'h', 'r', 'd', 'l', 'c', 'u', 'm', 'w', 'f', 'g', 'y', 'p', \
'b', 'v', 'k', 'j', 'x', 'q', 'z', 'E', 'T', 'A', 'O', 'I', 'N', 'S', 'H', 'R', 'D', 'L', 'C', 'U', 'M', 'W', 'F', 'G', 'Y', 'P', 'B', \
'V', 'K', 'X', 'J', 'Q', 'Z'};
int f = 53;
for (int d = 32; d < 65; ++d)
{
dict[f] = d;
f++;
}
for (int d = 91; d < 97; ++d)
{
dict[f] = d;
f++;
}
for (int d = 123; d < 127; ++d)
{
dict[f] = d;
f++;
}
// printf("%d\n", f);
// можно сделать бинарный поиск
int j, tmp;
for (int i = 0; i < sym_mem; ++i)
{
// переписать на while?
for (j = 0; j < f; ++j)
if (result_bwt[i] == dict[j])
break;
printf("%d ", j);
result_bwt[i] = j;
if (j > 1)
{
mtf_move(j, dict);
}
else if (j == 1)
{
tmp = dict[0];
dict[0] = dict[1];
dict[1] = tmp;
}
}
printf("\n");
return f;
}
// BWT:
void bwtswap(char *f, char *l, int memory)
{
char temp;
for (int i = 0; i < memory; ++i)
{
temp = f[i];
f[i] = l[i];
l[i] = temp;
}
}
int mystrcmp(const char *a, const char *b){
while ( *a && *b && *a == *b )
++a, ++b;
return (*a - *b);
}
int cmp_min(char *f, char *l)
{
int res = mystrcmp(f, l);
if (res <= 0)
return 1;
return 0;
}
int cmp_max(char *f, char *l)
{
int res = mystrcmp(f, l);
if (res > 0)
return 1;
return 0;
}
void quickSortR(char *s_arr, int first, int last, int memory)
{
// На входе - массив a[], a[N] - его последний элемент.
long i = 0, j = last; // поставить указатели на исходные места
char *x = s_arr+(first+(last-first)/2)*(memory+1);
// процедура разделения
do {
while (mystrcmp(s_arr+i*(memory+1), x) < 0 ) i++;
while (mystrcmp(s_arr+j*(memory+1), x) > 0 ) j--;
if (i <= j) {
bwtswap(s_arr+i*(memory+1), s_arr+j*(memory+1), memory);
i++; j--;
}
} while ( i<=j );
// рекурсивные вызовы, если есть, что сортировать
if (i < last)
quickSortR(s_arr, i, last, memory);
if (first < j)
quickSortR(s_arr, first,j, memory);
}
int bwt(int memory, FILE *file, int *current_pos, char *result_bwt)
{
// memory+1 по сути лишнее, но рефакторинг потом
char bwt_array[memory][memory+1]; // +1 для терминального нуля
for (int i = 0; i < memory-1; ++i)
bwt_array[0][i] = fgetc(file);
bwt_array[0][memory-1] = '$';
for (int i = 1; i < memory; ++i)
{
bwt_array[i][memory-1] = bwt_array[i-1][0];
for (int j = 0; j < memory-1; ++j)
bwt_array[i][j] = bwt_array[i-1][j+1];
}
for (int i = 0; i < memory; ++i)
bwt_array[i][memory] = '\0';
for (int i = 0; i < memory; ++i)
printf("%s\n", bwt_array[i]);
printf("=======\n");
quickSortR(bwt_array, 0, memory-1, memory);
for (int i = 0; i < memory; ++i)
printf("%s\n", bwt_array[i]);
printf("=======\n");
// выбираем последний столбец
for (int i = 0, j = *current_pos; j < memory + *current_pos; ++i, ++j)
result_bwt[j] = bwt_array[i][memory-1];
*current_pos += memory;
return 0;
}
int main()
{
FILE *file, *en_file;
file = fopen("/home/rag0n/Programming/C/MyProjects/kursovaya/text/sample.txt", "r");
int i, symbol, counter = 0, sum_mem = 0, current_pos = 0, alphabet;
for (i = 0; symbol != EOF; ++i)
symbol = fgetc(file);
// исправить на фсик
fclose(file);
file = fopen("/home/rag0n/Programming/C/MyProjects/kursovaya/text/sample.txt", "r");
i -= 2;
printf("%d\n", i);
while(i > 0)
{
counter++;
i -= BLOCKSIZE;
sum_mem += BLOCKSIZE+1;
}
sum_mem += i; // суммарное кол-во памяти для bwt
char result_bwt[sum_mem];
// printf("i: %d\n", i);
// printf("%d\n", counter);
printf("sum mem %d\n", sum_mem);
while(counter > 1)
{
bwt(BLOCKSIZE+1, file, &current_pos, result_bwt);
counter--;
}
// printf("%d\n", BLOCKSIZE+i+1);
bwt(BLOCKSIZE+i+1, file, &current_pos, result_bwt);
printf("\n\n");
// for (int i = 0; i < sum_mem; ++i)
// printf("%c", result_bwt[i]);
printf("%s\n", result_bwt);
// printf("\n");
alphabet = mtf(sum_mem, result_bwt);
for (int i = 0; i < sum_mem; ++i)
printf("%d ", result_bwt[i]);
printf("\n");
fclose(file);
int j, pos = 0, *pnt; // частота вхождения символа
struct CharAndFreq data[256];
struct CharAndBit result[256];
int k = 0; // указатель на массив и кол-во массивов одновременно;
printf("---------------------\n");
for (int i = 0; i < alphabet; ++i)
{
j = 0;
data[k].freq = 0;
while(j < sum_mem)
{
if (result_bwt[j] == i)
{
data[k].freq++;
}
j++;
}
if (data[k].freq != 0)
{
data[k].ch = i;
k++;
}
}
printf("%d\n", k);
printf("Частота встречаемости символов:\n");
for (int i = 0; i < k; ++i)
{
printf("%d %d\n", data[i].ch, data[i].freq);
}
struct Node *root = buildTree(data, k);
int codes[256], top = 0;
printf("---------------------\nHuffman codes:\n");
HuffmanCodes(root, codes, top, result, &pos);
// printf("===============\n");
// for (int i = 0; i < k; ++i)
// {
// printf("%d ", result[i].ch);
// for (pnt = result[i].bits; *pnt != 2; pnt++)
// printf("%d", *pnt);
// printf("\n");
// }
// encode
file = fopen("/home/rag0n/Programming/C/MyProjects/kursovaya/text/sample.txt", "r");
en_file = fopen("/home/rag0n/Programming/C/MyProjects/kursovaya/text/sample_result.txt", "w");
int cnt = 0;
printf("%d\n", result[5].ch);
while(cnt < sum_mem)
{
for (int i = 0; i < k; ++i)
{
if (result_bwt[cnt] == result[i].ch)
{
for (pnt = result[i].bits; *pnt != 2; pnt++)
{
printf("%d", *pnt);
fprintf(en_file, "%d", *pnt);
}
break;
}
}
cnt++;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment