首页 >> 大全

C语言实现Huffman的编码和解码

2023-06-24 大全 43 作者:考证青年

tags: [‘C/C++’, ‘’]

原文链接:

说起 的算法原理其实很简单,难在实现过程中对细节的控制,比如 字串流转换成 比特流, 比特流转换回 字串流,这类操作极易出错;再比如要使 解码过程效率更高,需要让 游标指针在逐个获取 比特位的过程中高效地从根节点移动到目标节点,从而获取目标节点对应的解码字符;再就是针对解码所必须的 字符集频率表如何设计才能最大限度减少体积。总之我的体会就是,若要亲手实现一个各方面鲁棒性良好的 Code 其过程并不那么轻松。

输入数据测试

为检测 Tree的构建是否正常,我写了一个测试功能,可以输入字符以及频率来构建一颗 Tree,并打印 Tree的 树形图和 编码表,下面展示的是我以数据 {A:1, B:2, C:3, D:4}的构建效果:

文件的字符集频率表的设计

下面说说文件的压缩和解压,文件存储不光要存储压缩数据,还需要在文件头部额外存储 字符集频率表,目的是为了文件解压时,可根据 字符集频率表重新构建回 Tree,进而在构建的 Tree上将压缩数据解码成原始数据。 字符集频率表应最大限度减少体积,这样才能降低文件的总体积。所以根据上述说法,文件内容将分为两部分: 文件头部信息块和 数据区, 文件头部信息块内含 文件头标识和 字符集频率表。

还是拿上面的数据 {A:1, B:2, C:3, D:4}为案例,我的 文件头部信息块设计如下:

文件头的两个字节是类型标识 :FX,用来标识这是一个压缩文件,通过扫描文件头标识,可判断对该文件的操作是压缩还是解压。

文件头标识之后是 字符集频率表,第一个字节是表长,特别注意,它的值在 0 ~ 255之间,表示的表长的范围是 1 ~ 256之间,所以字符集 {A:1, B:2, C:3, D:4}的表长是3而不是4。接下来以每5个字节代表一个字符信息块,其中1个字节存储字符编码剩下4个字节存储该字符的频率,例如 A的频率是1,所以4个字节中存放的是 {0,0,0,1},由此可见我的设计尚有压缩空间,如果我用2个 比特位标识该字符的频率所占用的字节数,那么4个字节的占用将压缩到1个字节,整个 字符集频率表在理想状况下能缩小一倍以上。如我目前存储上述 字符集频率表信息需要 1+5*4=21字节,采用这种方式能压缩到 1+2*4=9字节。这点留待以后再优化吧。

文件的压缩和解压测试

拿源文件本身来测试压缩和解压:

源程序

程序仅使用C的常用标准库函数,且编写采用 C89标准,其目的是为了让程序拥有更广泛的适应性,以下是程序的关键代码,可供参考:

/********************************************
* Huffman Code Demo
* Copyright (C) i@foxzzz.com
*
* C implementation of the Huffman Code's
* encoding and decoding.
*********************************************/#include 
#include 
#include 
#include 
#include /*数据列表长度*/
#define LIST_SIZE 256
/*构建Huffman树需要产生的森林长度*/
#define FOREST_SIZE (LIST_SIZE * 2 - 1)
/*单个数据产生的Huffman编码文本串的最大容量*/
#define CODE_MAX 2048
/*文件路径长度*/
#define PATH_MAX 1024
/*文件头标识*/
const char FILE_HEADER_FLAG[] = { 'F', 'X' };/*节点标识*/
enum {NODE_FLAG_ROOT,          /*根节点*/NODE_FLAG_LEFT,          /*左孩子节点*/NODE_FLAG_RIGHT          /*右孩子节点*/
};/*节点类型*/
enum {NODE_TYPE_DATA,          /*数据节点*/NODE_TYPE_BLANK          /*空节点*/
};/*文件类型*/
enum {FILE_TYPE_NULL,          /*读取出错*/FILE_TYPE_ENCODE,        /*原始数据文件*/FILE_TYPE_DECODE,        /*压缩数据文件*/
};/*字节类型*/
typedef unsigned char Byte;/*Huffman树节点*/
typedef struct _tNode {int type;                /*节点类型*/int data;                /*节点数据*/int weight;              /*节点权重*/char code[CODE_MAX];     /*Huffman编码*/struct _tNode* left;     /*左孩子*/struct _tNode* right;    /*右孩子*/
}Node, * pNode;/*Huffman树信息*/
typedef struct _tHuffmanTree {pNode root;              /*根节点*/int total                /*总字节数*/;
}HuffmanTree, * pHuffmanTree;/*得到当前时间戳*/
struct timeval startTimestamp() {struct timeval stamp;gettimeofday(&stamp, NULL);return stamp;
}/*计算从时间戳到当前时间的毫秒*/
double endTimestamp(struct timeval start) {int diff_sec = 0;double start_msec = 0;double end_msec = 0;struct timeval end;gettimeofday(&end, NULL);diff_sec = (int)(end.tv_sec - start.tv_sec);start_msec = (double)start.tv_usec / 1000.0;end_msec = (diff_sec * 1000) + ((double)end.tv_usec / 1000.0);return (end_msec - start_msec);
}/*创建Huffman树的数据节点*/
pNode createDataNode(int data, int weight) {pNode node = (pNode)malloc(sizeof(Node));memset(node, 0, sizeof(Node));node->type = NODE_TYPE_DATA;node->data = data;node->weight = weight;return node;
}/*创建Huffman树的空节点*/
pNode createBlankNode(int weight) {pNode node = (pNode)malloc(sizeof(Node));memset(node, 0, sizeof(Node));node->type = NODE_TYPE_BLANK;node->weight = weight;return node;
}/*将Huffman树节点添加到森林列表*/
void addNodeToList(pNode nodelist[], int size, pNode node) {int index;for (index = 0; index < size; ++index) {if (nodelist[index] == NULL) {/*从表中找到空位放入新节点*/nodelist[index] = node;break;}}
}/*从森林列表弹出权重最低的Huffman树节点*/
pNode popMinNodeFromList(pNode nodelist[], int size) {int min = -1;int index;for (index = 0; index < size; ++index) {if (nodelist[index]) {if (min == -1) {min = index;} else {if (nodelist[min]->weight > nodelist[index]->weight) {/*当发现存在更小权重节点时候更新记录*/min = index;}}}}if (min != -1) {pNode node = nodelist[min];nodelist[min] = NULL;return node;}return NULL;
}/*通过递归遍历方式为Huffman树中的的所有节点产生Huffman编码*/
void generateHuffmanCode(pNode root) {if (root) {if (root->left) {strcpy(root->left->code, root->code);strcat(root->left->code, "0");generateHuffmanCode(root->left);}if (root->right) {strcpy(root->right->code, root->code);strcat(root->right->code, "1");generateHuffmanCode(root->right);}}
}/*传入权重表构建Huffman树*/
pNode buildHuffmanTree(int times[]) {pNode nodelist[FOREST_SIZE] = { NULL };pNode root = NULL;struct timeval startstamp = startTimestamp();int index;/*创建森林表*/for (index = 0; index < LIST_SIZE; ++index) {if (times[index] > 0) {/*将所有节点逐个放入森林表*/addNodeToList(nodelist, FOREST_SIZE, createDataNode(index, times[index]));}}/*构建Huffman树*/while (1) {pNode left = popMinNodeFromList(nodelist, FOREST_SIZE);pNode right = popMinNodeFromList(nodelist, FOREST_SIZE);if (right == NULL) {/*当森林中只剩下一颗树节点的时候表示整个Huffman树构建完成*/root = left;break;} else {pNode node = createBlankNode(left->weight + right->weight);node->left = left;node->right = right;/*每次从森林表中取出两个最小的节点,并创建新节点重新放入森林表*/addNodeToList(nodelist, FOREST_SIZE, node);}}generateHuffmanCode(root);printf("       bulid huffman tree : %lf msc\n", endTimestamp(startstamp));return root;
}/*在Huffman树中前进一步*/
pNode setpHuffmanTree(pNode root, int flag) {switch (flag) {case NODE_FLAG_LEFT:return root->left;case NODE_FLAG_RIGHT:return root->right;}return NULL;
}/*通过后序遍历的方式销毁Huffman树*/
void destroyHuffmanTree(pNode root) {if (root) {destroyHuffmanTree(root->left);destroyHuffmanTree(root->right);free(root);}
}/*从文件构建Huffman树*/
pNode buildHuffmanTreeFromFile(FILE* input) {int times[LIST_SIZE] = { 0 };Byte byte;while (fread(&byte, sizeof(byte), 1, input) == 1) {++times[byte];}return buildHuffmanTree(times);
}/*计算Huffman树的权重总值*/
int countHuffmanTreeWeightTotal(pNode root) {int total = 0;if (root) {/*只获取有效字符节点*/if (root->type == NODE_TYPE_DATA) {total = root->weight;}total += countHuffmanTreeWeightTotal(root->left);total += countHuffmanTreeWeightTotal(root->right);}return total;
}/*通过递归遍历将Huffman树转换成Huffman表*/
void convertTreeToList(pNode root, pNode nodelist[]) {if (root) {/*只获取有效字符节点*/if (root->type == NODE_TYPE_DATA) {nodelist[root->data] = root;}convertTreeToList(root->left, nodelist);convertTreeToList(root->right, nodelist);}
}/*清理Huffman表中的空指针,并返回实际的表元素数量*/
int trimNodeList(pNode nodelist[], int size) {int count = 0;int index;for (index = 0; index < size; ++index) {pNode node = nodelist[index];if (node) {nodelist[count++] = node;}}return count;
}/*对文件数据进行Huffman编码*/
int encodeFileData(pNode root, FILE* input, FILE* output) {int total = 0;int count = 0;if (root) {Byte byte;int buffer = 0;pNode nodelist[LIST_SIZE] = { NULL };/*将Huffman树转换成Huffman表*/convertTreeToList(root, nodelist);while (fread(&byte, sizeof(byte), 1, input) == 1) {char* cursor = nodelist[byte]->code;while (*cursor) {buffer <<= 1;if (*cursor == '0') {buffer |= 0;} else {buffer |= 1;}++count;if (count == 8) {Byte byte = (Byte)buffer;fwrite(&byte, sizeof(byte), 1, output);count = 0;buffer = 0;++total;}++cursor;}}if (count > 0) {buffer <<= (8 - count);char byte = (char)buffer;fwrite(&byte, 1, 1, output);++total;}}return total;
}/*对文件数据进行Huffman解码*/
void decodeFileData(pNode root, FILE* input, FILE* output, int count) {if (root) {Byte byte;pNode cursor = root;while (fread(&byte, sizeof(byte), 1, input) == 1) {int buffer = byte;int index;for (index = 0; index < 8; ++index) {buffer <<= 1;if (!cursor->left || !cursor->right) {Byte data = (Byte)cursor->data;fwrite(&data, sizeof(data), 1, output);if (--count == 0) {break;}cursor = root;}if (buffer & ~0xff) {cursor = setpHuffmanTree(cursor, NODE_FLAG_RIGHT);} else {cursor = setpHuffmanTree(cursor, NODE_FLAG_LEFT);}buffer &= 0xff;}}}
}/*检测是否是可显示字符*/
int canShowChar(char ch) {return (ch > 32 && ch < 127);
}/*通过递归遍历方式打印Huffman树*/
void outputHuffmanTree(FILE* output, pNode root, int flag) {if (root) {int index;char content[128] = { 0 };const char* flagname[] = { "ROOT", "LEFT", "RIGHT" };int offset = (int)strlen(root->code) - 1;for (index = 0; index < offset; ++index) {if (root->code[index] == '0') {fprintf(output, " │ ");} else {fprintf(output, "   ");}}switch (root->type) {case NODE_TYPE_DATA:sprintf(content, "> %-6s #%-4d 0xX : '%c'", flagname[flag], root->weight, root->data, canShowChar((char)root->data) ? root->data : ' ');break;case NODE_TYPE_BLANK:sprintf(content, "[+] %-6s #%-4d", flagname[flag], root->weight);break;}switch (flag) {case NODE_FLAG_ROOT:fprintf(output, "%s", content);break;case NODE_FLAG_LEFT:fprintf(output, " ├─%s", content);break;case NODE_FLAG_RIGHT:fprintf(output, " └─%s", content);break;}if (root->type == NODE_TYPE_DATA) {fprintf(output, " CODE : %s\n", root->code);} else {fprintf(output, "\n");}outputHuffmanTree(output, root->left, NODE_FLAG_LEFT);outputHuffmanTree(output, root->right, NODE_FLAG_RIGHT);}
}/*打印Huffman树*/
void printHuffmanTree(FILE* output, pNode root) {fprintf(output, "    *******************************\n");fprintf(output, "       Print Huffman Tree\n");fprintf(output, "\n");outputHuffmanTree(output, root, NODE_FLAG_ROOT);fprintf(output, "\n");
}/*将Huffman表中的数据输出成编码和权重统计表*/
void printHuffmanList(FILE* output, pNode root) {pNode nodelist[LIST_SIZE] = { NULL };int index;int listcount = 0;/*将Huffman树转换成Huffman表*/convertTreeToList(root, nodelist);listcount = trimNodeList(nodelist, LIST_SIZE);fprintf(output, "    *******************************\n");fprintf(output, "        # Print Huffman Code List #\n");fprintf(output, "\n");fprintf(output, "                       Total : %d\n", listcount);fprintf(output, "\n");fprintf(output, "     %-7s%-10s%-10s%s\n", "ASCII", "Char", "Weight", "Code");for (index = 0; index < listcount; ++index) {pNode node = nodelist[index];Byte ch = (Byte)node->data;if (canShowChar((char)ch)) {/*可显示字符的输出*/fprintf(output, "     %-7d%-10c%-10d%s\n", ch, ch, node->weight, node->code);} else {/*不可显示字符的输出*/fprintf(output, "     %-7d%-10s%-10d%s\n", ch, "NOShow", node->weight, node->code);}}printf("\n");
}/*统计输入的字符权重*/
void contUserInputTimes(int times[]) {int index, count;printf("    *******************************\n");printf("        # Input data to test #\n");printf("\n");printf("        Number of input nodes : ");scanf("%d", &count);printf("        Enter the letters and weights for each node : \n");for (index = 0; index < count; ++index) {char str[128] = { 0 };int weight = 0;printf("        Char   : ");scanf("%s", str);printf("        Weight : ");scanf("%d", &weight);times[(int)str[0]] = weight;}
}/*输入数据构建Huffman树选项*/
pNode inputDataToBuildHuffmanTreeOption() {int times[LIST_SIZE] = { 0 };contUserInputTimes(times);return buildHuffmanTree(times);
}/*获取输入选项*/
int inputOption(int begin, int end) {do {int opt;if (scanf("%d", &opt) == 1) {if (opt >= begin && opt <= end) {return opt;} else {printf("       error : The input range should be between %d and %d.\n", begin, end);}} else {printf("       error : Please enter integer type.\n");/*清空缓冲区*/setbuf(stdin, NULL);}} while (1);
}/*检测文件类型*/
int getFileType(const char filename[]) {int type = FILE_TYPE_NULL;FILE* input = fopen(filename, "rb");if (input) {char buffer[2] = { 0 };type = FILE_TYPE_ENCODE;if (fread(buffer, 2, 1, input) == 1) {if (buffer[0] == FILE_HEADER_FLAG[0] && buffer[1] == FILE_HEADER_FLAG[1]) {type = FILE_TYPE_DECODE;}}fclose(input);}return type;
}/*写入文件头信息(文件头含文件头标识和字符权重集)*/
int writeFileHeader(pNode root, FILE* output) {pNode nodelist[LIST_SIZE] = { NULL };Byte total = 0;int index;/*写入文件头标识*/fwrite(FILE_HEADER_FLAG, 2, 1, output);convertTreeToList(root, nodelist);/** 为节省空间字符集总量存储为1个字节* 总量1用0表示,总量256用255表示* 所以总量 - 1*/total = (Byte)(trimNodeList(nodelist, LIST_SIZE) - 1);/*写入字符集总数*/fwrite(&total, sizeof(total), 1, output);/*写入每个字符以及权重*/for (index = 0; index <= total; ++index) {pNode node = nodelist[index];Byte byte = (Byte)node->data;fwrite(&byte, sizeof(byte), 1, output);fwrite(&node->weight, sizeof(node->weight), 1, output);}/*返回写入的文件头总字节数*/return (total * 5 + 1 + 2);
}/*读取文件头信息(读取字符权重集)*/
void readFileHeader(FILE* input, int times[]) {Byte total;int index;/*跳过文件头*/fseek(input, 2, SEEK_CUR);fread(&total, sizeof(total), 1, input);for (index = 0; index <= total; ++index) {Byte byte;int weight;fread(&byte, sizeof(byte), 1, input);fread(&weight, sizeof(weight), 1, input);times[byte] = weight;}
}

关于我们

最火推荐

小编推荐

联系我们


版权声明:本站内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 88@qq.com 举报,一经查实,本站将立刻删除。备案号:桂ICP备2021009421号
Powered By Z-BlogPHP.
复制成功
微信号:
我知道了