当前位置:网站首页>Huffman tree: (1) input each character and its weight (2) construct Huffman tree (3) carry out Huffman coding (4) find hc[i], and get the Huffman coding of each character
Huffman tree: (1) input each character and its weight (2) construct Huffman tree (3) carry out Huffman coding (4) find hc[i], and get the Huffman coding of each character
2022-07-02 15:14:00 【MASJLE】
#include <iostream>
#include <fstream>
#include <string.h>
using namespace std;
#define MaxSize 1024 // Upper limit of read in file
#define OK 1
#define ERROR 0
typedef int Status;
typedef struct wordcnt{ // Count characters and corresponding times
char ch;
int cnt = 0;
}Count;
typedef struct NumCount{ // Count the number of external packages
Count count[MaxSize];
int length = 0;
}NumCount;
typedef struct HTree{ // Huffman tree structure
char data;
int weight;
int parent,lchild,rchild;
}HTNode,*HuffmanTree;
typedef struct HCode{ // Coding structure
char data;
char* str;
}*HuffmanCode;
Status ReadData(char *source); // Read in file
Status WordCount(char *data,NumCount *paraCnt); // Count the times
Status Show(NumCount *paraCnt); // Number of shows
Status CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray); // Create the Huffman tree
Status select(HuffmanTree HT,int top,int *s1,int *s2); // Select the two nodes with the least weight
Status CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length); // Create Huffman code
Status Encode(char *data,HuffmanCode HC,int length); // Encode the read file , writes txt file
Status Decode(HuffmanTree HT,int length); // Read the encoded file , decode
int main(int argc, char** argv) {
char data[MaxSize];
NumCount Cntarray;
ReadData(data); // Read in the data
WordCount(data,&Cntarray); // Count the times
// Show(&Cntarray); // You can view the corresponding number of occurrences of each word
HuffmanTree tree;
CreateHuffmanTree(tree,Cntarray.length,Cntarray); // Build up trees
HuffmanCode code;
CreateHuffmanCode(tree,code,Cntarray.length); // Create code
Encode(data,code,Cntarray.length); // Generate encoding file
Decode(tree,Cntarray.length); // decode
cout<<"Please view the generated TXT file to check the result"<<endl;
return 0;
}
Status ReadData(char *source)
{
// Open the file and read in the data
ifstream infile;
infile.open("in.txt");
cout<<"Reading..."<<endl;
cout<<"the input file is:"<<endl;
infile.getline(source,MaxSize);
cout<<source<<endl;
infile.close();
cout<<endl;
return OK;
}
Status WordCount(char *data,NumCount *paraCnt)
{
int flag;// Identify whether it has been recorded
int len = strlen(data);
for(int i = 0;i < len;++i)
{
flag = 0;
for(int j = 0;j < paraCnt->length;++j)
{
if(paraCnt->count[j].ch == data[i]) // If there are records , direct ++
{
++paraCnt->count[j].cnt;
flag = 1;
break;
}
}
if(!flag) // There is no record , Then add
{
paraCnt->count[paraCnt->length].ch = data[i];
++paraCnt->count[paraCnt->length].cnt;
++paraCnt->length;
}
}
return OK;
}
Status Show(NumCount *paraCnt)
{
cout<<"the length is "<<paraCnt->length<<endl;
for(int i = 0;i < paraCnt->length;++i)
{
cout<<"The character "<<paraCnt->count[i].ch<<" appears "<<paraCnt->count[i].cnt<<endl;
}
cout<<endl;
return OK;
}
Status CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray)
{
if(length <= 1) return ERROR;
int s1,s2;
int m = length*2-1; // There is no measure of 1 The node of , The conclusion is 2* Number of leaf nodes -1 individual
HT = new HTNode[m+1];
for(int i = 1;i <= m;++i) // initialization
{
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
for(int i = 1;i <= length;++i)
{
HT[i].data = cntarray.count[i-1].ch;
HT[i].weight = cntarray.count[i-1].cnt;
}
for(int i = length + 1;i <= m;++i)
{
select(HT,i-1,&s1,&s2); // Select the two nodes with the least weight from the previous range
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight; // Get a new node
}
return OK;
}
Status select(HuffmanTree HT,int top,int *s1,int *s2)
{
int min = INT_MAX;
for(int i = 1;i <= top;++i) // Select nodes without parents , The node with the smallest weight
{
if(HT[i].weight < min && HT[i].parent == 0)
{
min = HT[i].weight;
*s1 = i;
}
}
min = INT_MAX;
for(int i = 1;i <= top;++i) // Select nodes without parents , The node with the second smallest weight
{
if(HT[i].weight < min && i != *s1 && HT[i].parent == 0)
{
min = HT[i].weight;
*s2 = i;
}
}
return OK;
}
Status CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length)
{
HC = new HCode[length+1];
char *cd = new char[length]; // Temporary space for storing codes
cd[length-1] = '\0'; // Call after convenience strcpy function
int c,f,start;
for(int i = 1;i <= length;++i)
{
start = length-1; // start Indicates the starting subscript of the code in the temporary space , Because it is traced back from the leaf node , So start at the end
c = i;
f = HT[c].parent;
while(f != 0)
{
--start; // Because it is backtracking , So from the end of the temporary space
if(HT[f].lchild == c)
cd[start] = '0';
else
cd[start] = '1';
c = f;
f = HT[c].parent;
}
HC[i].str = new char[length-start]; // Last , The actual encoding space used is length-start
HC[i].data = HT[i].data;
strcpy(HC[i].str,&cd[start]); // Start from actual start address , Copy to coding structure
}
delete cd;
}
Status Encode(char *data,HuffmanCode HC,int length)
{
ofstream outfile;
outfile.open("code.txt");
for(int i = 0;i < strlen(data);++i) // Read the data in sequence , Find the corresponding code , Write encoded file
{
for(int j = 1;j <= length;++j)
{
if(data[i] == HC[j].data)
{
outfile<<HC[j].str;
}
}
}
outfile.close();
cout<<"the code txt has been written"<<endl;
cout<<endl;
return OK;
}
Status Decode(HuffmanTree HT,int length)
{
char codetxt[MaxSize*length];
ifstream infile;
infile.open("code.txt");
infile.getline(codetxt,MaxSize*length);
infile.close();
ofstream outfile;
outfile.open("out.txt");
int root = 2*length-1; // Traverse from root
for(int i = 0;i < strlen(codetxt);++i)
{
if(codetxt[i] == '0') root = HT[root].lchild; // by 0 Indicates left traversal
else if(codetxt[i] == '1') root = HT[root].rchild; // by 1 Indicates traversal to the right
if(HT[root].lchild == 0 && HT[root].rchild == 0) // If it is already a leaf node , Output to output file , Then go back to the root node
{
outfile<<HT[root].data;
root = 2*length-1;
}
}
outfile.close();
cout<<"the output txt has been written"<<endl;
cout<<endl;
return OK;
}
边栏推荐
- 【NOI模拟赛】刮痧(动态规划)
- Practice of compiling principle course -- implementing an interpreter or compiler of elementary function operation language
- Learn the method code of using PHP to realize the conversion of Gregorian calendar and lunar calendar
- Advanced C language (realize simple address book)
- 03_线性表_链表
- Advanced C language (learn malloc & calloc & realloc & free in simple dynamic memory management)
- TiDB跨数据中心部署拓扑
- Internet Explorer officially retired
- N皇后问题的解决
- 你不知道的Set集合
猜你喜欢

Jenkins Pipeline 应用与实践

Have you learned the wrong usage of foreach

【NOI模拟赛】伊莉斯elis(贪心,模拟)
![[C language] explain the initial and advanced levels of the pointer and points for attention (1)](/img/61/1619bd2e959bae1b769963f66bab4e.png)
[C language] explain the initial and advanced levels of the pointer and points for attention (1)

Introduction to C language -- array

Practice of compiling principle course -- implementing an interpreter or compiler of elementary function operation language

Error: NPM warn config global ` --global`, `--local` are deprecated Use `--location=global` instead.

05_队列

LeetCode 2320. Count the number of ways to place the house

MFC timer usage
随机推荐
Printf function and scanf function in C language
.NET Core 日志系统
Arithmetic operations and related exercises in C language
Topology architecture of the minimum deployment of tidb cluster
LeetCode 209. Minimum length subarray
HUSTPC2022
LeetCode 209. 长度最小的子数组
Points clés de l'examen de principe de compilation pour l'année scolaire 2021 - 2022 [Université chinoise d'outre - mer]
[C language] explain the initial and advanced levels of the pointer and points for attention (1)
【NOI模拟赛】刮痧(动态规划)
如何对 TiDB 进行 TPC-C 测试
为什么只会编程的程序员无法成为优秀的开发者?
Li Chuang EDA learning notes 15: draw border or import border (DXF file)
Implementation of n queen in C language
XML配置文件
【C语音】详解指针进阶和注意点(2)
03_线性表_链表
2021-2022學年編譯原理考試重點[華僑大學]
Some Chinese character codes in the user privacy agreement are not standardized, which leads to the display of garbled codes on the web page. It needs to be found and handled uniformly
LeetCode 2310. The number of digits is the sum of integers of K