实验内容及要求:
从字符文件读取若干个大写英文字符(英文字符种类数m建议为6至8种,如:m=6,则英文字符可取A-F),统计m种英文字符的出现频度,构造Huffman二叉树,对所有英文字符进行Huffman编码,将编码后的比特流用byte型(或char型)数组实现存储。在屏幕上输出该比特流的压缩率,然后利用该数组和Huffman二叉树进行译码,将译码后的字符序列输出到另一个字符文件。
提示:(1) 输入与输出字符文件每10个字符一行;
(2) 输入文件中的不可显示字符(如:回车、换行符)不进行统计和编码;
(3) 压缩率=平均编码长度/log2m。
#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
#define READ_ROOT "../readfile.txt"
#define WRITE_ROOT "../writefile.txt"
//最大字符数
#define MAX_TEXT 1000
//最大不同字符个数
#define MAX_LEAF 32
//最多26个字母,所以最少用五位来表示
#define MAX_BITS (MAX_LEAF - 1)
//最大节点个数
#define MAX_NODE (MAX_LEAF * 2 - 1)
using namespace std;
typedef struct HTNode {
float weight;
int parent;
int lchild;
int rchild;
}HFNode;
typedef struct HCode {
char code[MAX_BITS];
int start;
}HFCode;
class Huffman {
private:
fstream file;
//字符种类数
int kinds{};
//源文本
char src_string[MAX_TEXT]{};
//所有字符
vector<char> text;
//频数
vector<int> count;
//频率(权值)
vector<float> weight;
//权值最小的节点索引
int s1{}, s2{};
HFCode hfCode[MAX_LEAF]{};
HFNode hfNode[MAX_NODE]{};
public:
Huffman() = default;
//文件读取
bool DataRead(const string& root);
//统计
void Statistics();
//选择权值最小的两个节点
void Select(int range);
//构建哈夫曼树
bool createHFTree();
//构建哈夫曼编码
void createHFCode();
//求压缩率
void solveCompressibility();
//输出哈夫曼编码
bool writeHFCode(const string& root);
~Huffman() = default;
};
bool Huffman::DataRead(const std::string &root) {
file.open(root, ios::in);
if (!file.is_open()) {
cout << "Wrong Root!" << endl;
return false;
} else {
int i = 0;
while (file >> src_string[i++]);
file.close();
return true;
}
}
void Huffman::Statistics() {
for (auto &item: src_string) {
if (item >= 'A' && item <= 'Z') {
bool exist = false;
for (auto &i: text) {
if (item == i) {
exist = true;
break;
}
}
if (!exist) {
text.push_back(item);
count.push_back(1);
} else {
for (int i = 0; i < text.size(); i++) {
if (text[i] == item) {
count[i]++;
break;
}
}
}
}
}
int sum = 0;
for (auto &item : count) {
sum += item;
}
for (int i : count) {
float temp = static_cast<float>(i) / static_cast<float>(sum);
weight.push_back(temp);
}
kinds = static_cast<int>(text.size());
}
void Huffman::Select(int range) {
float min1 = MAX_LEAF;
for (int i = 0; i < range; i++) {
if (hfNode[i].weight < min1 && hfNode[i].parent == -1) {
min1 = hfNode[i].weight;
s1 = i;
}
}
float min2 = MAX_LEAF;
for (int i = 0; i < range; i++) {
if (hfNode[i].weight < min2 && hfNode[i].parent == -1 && i != s1) {
min2 = hfNode[i].weight;
s2 = i;
}
}
}
bool Huffman::createHFTree() {
if (kinds > MAX_LEAF) {
cout << "Out Range!" << endl;
return false;
}
if (kinds <= 1) {
cout << "Only One Character" << endl;
return false;
}
int m = 2 * kinds - 1;
for (int i = 0; i < kinds; i++) {
hfNode[i].weight = weight[i];
hfNode[i].parent = -1;
hfNode[i].lchild = -1;
hfNode[i].rchild = -1;
}
for (int i = kinds; i < m; i++) {
hfNode[i].weight = 0;
hfNode[i].parent = -1;
hfNode[i].lchild = -1;
hfNode[i].rchild = -1;
}
for (int i = kinds; i < m; i++) {
Select(i);
hfNode[s1].parent = i;
hfNode[s2].parent = i;
hfNode[i].lchild = s1;
hfNode[i].rchild = s2;
hfNode[i].weight = hfNode[s1].weight + hfNode[s2].weight;
}
return true;
}
void Huffman::createHFCode() {
int start;
for (int i = 0; i < kinds; i++) {
start = kinds - 2;
for (int c = i, f = hfNode[i].parent; f != -1; c = f, f = hfNode[f].parent) {
if (c == hfNode[f].lchild) {
hfCode[i].code[start--] = '0';
} else {
hfCode[i].code[start--] = '1';
}
}
hfCode[i].start = start + 1;
}
}
void Huffman::solveCompressibility() {
float average_length = 0;
for (int i = 0; i < kinds; i++) {
int length = kinds - hfCode[i].start - 1;
average_length += static_cast<float>(length) * weight[i];
}
cout << "Compressibility: " << average_length / log2(kinds) << endl;
}
bool Huffman::writeHFCode(const std::string &root) {
file.open(root, ios::out);
if (!file.is_open()) {
cout << "Wrong Root!" << endl;
return false;
} else {
//输出每个字母的哈夫曼编码
for (int i = 0; i < kinds; i++) {
file << text[i] << ":" << endl;
for (int j = hfCode[i].start; j < kinds - 1; j++) {
file << hfCode[i].code[j];
}
file << endl;
}
file << endl;
//输出源字符串的哈夫曼编码
int counter = 0;
for (int i = 0; src_string[i] != '\0'; i++) {
for (int j = 0; j < text.size(); j++) {
if (src_string[i] == text[j]) {
for (int k = hfCode[j].start; k < kinds - 1; k++) {
file << hfCode[j].code[k];
counter++;
if (counter == 10) {
file << endl;
counter = 0;
}
}
break;
}
}
}
return true;
}
}
int main() {
Huffman huffman;
if (!huffman.DataRead(READ_ROOT)) {
return -1;
}
huffman.Statistics();
if (!huffman.createHFTree()) {
return -1;
}
huffman.createHFCode();
huffman.solveCompressibility();
if (!huffman.writeHFCode(WRITE_ROOT)) {
return -1;
}
return 0;
}
路径记得自己改
示例输入:
AAAAAAAAAA BBBCCCCDDD EEEEEEEEEF AAAAAABBBB CCCCCCDDDD DDDDDDDDDD
程序框输出:
Compressibility: 0.934894
文件输出:
A: 01 B: 1101 C: 00 D: 10 E: 111 F: 1100 0101010101 0101010101 1101110111 0100000000 1010101111 1111111111 1111111111 1111100010 1010101011 1011101110 1110100000 0000000101 0101010101 0101010101 01010
参考博客:
数据结构(C语言)-哈夫曼(Huffman)树编码操作 - 知乎 (zhihu.com)
说实话感觉压缩率有点低。。
文章评论