当前位置:网站首页>Huffman encoded compressed file

Huffman encoded compressed file

2022-07-07 09:47:00 moyangxian

#include<algorithm>
#include<iostream>
#include<cstdlib>
#include<cstring>
#include<cstdio>

using namespace std;
const long INF = 0x7FFFFFFF;

struct HuffNode {
    
	unsigned char b;
	long count;
	long parent;
	long lch, rch;
	char bits[256];
};
HuffNode HuffTree[512];

/*  Function description : initialization HuffTree  Function name :init  Return value :void  Parameters to describe : nothing   matters needing attention : nothing  */
void init() {
    
	for (int i = 0; i < 512; i++) {
        // initialization 
		HuffTree[i].b = 0;
		HuffTree[i].count = 0;
		HuffTree[i].parent = -1;
		HuffTree[i].lch = -1;
		HuffTree[i].rch = -1;
		HuffTree[i].bits[0] = 0;
	}
}

/*  Function description : Customize cmp function , Sort by weight from large to small   Return value :bool  Parameters to describe : nothing   matters needing attention : nothing  */

bool cmp(HuffNode A, HuffNode B) {
    	// Sort by weight from large to small 
	return A.count > B.count;
}

void compress() {
    	// Compress 
	init();	// initialization 
	char filename[255],outputfile[255],buf[255];
	unsigned char c;
	long f, p, l;
	long n, m, i, j;
	long min1, pt1;
	long flength=0,length1;
	FILE* ifp, * ofp;
	printf("\t Please enter the file name to be compressed :");
	//getchar();
	//gets_s(filename);
	scanf("%s", &filename);		// File path 
	ifp = fopen(filename, "rb");	// Open the file as a binary stream 
	if (ifp == NULL) {
    	// Judge whether the file exists 
		printf("\n\t File opening failure !\n\n");
		return;
	}
	printf("\t Will be compressed under the current directory , Please enter the compressed file name including the extension :");
	//gets_s(outputfile);
	scanf("%s", outputfile);
	ofp = fopen(outputfile, "wb");
	if (ofp == NULL) {
    
		printf("\n\t Failed to unzip file !\n");
		return;
	}
	while (!feof(ifp)) {
    
		fread(&c, 1, 1, ifp);	// Read a byte from the file to c
		HuffTree[c].count++;
		flength++;	// Length of original document 
	}
	flength--;
	length1 = flength;
	HuffTree[c].count--;
	for (int i = 0; i < 512; i++) {
    
		if (HuffTree[i].count != 0) {
    
			HuffTree[i].b = (unsigned char)i;
		}
		else {
    
			HuffTree[i].b = 0;
		}
		HuffTree[i].parent = -1;
		HuffTree[i].lch = HuffTree[i].rch = -1;
	}
	sort(HuffTree,HuffTree+256,cmp);	// Sort by weight 
	for (i = 0; i < 256; i++)
		if (HuffTree[i].count == 0) break;
	n = i;	// Number of leaf nodes 
	m = 2 * n - 1;	// Huffman sums up the points 
	for (i = n; i < m; i++) {
    
		min1 = INF;
		for (j = 0; j < i; j++) {
    	// Take out the node with the smallest weight 
			if (HuffTree[j].parent != -1)	// This node is already in the Huffman tree 
				continue;
			if (min1 > HuffTree[j].count) {
    
				pt1 = j;
				min1 = HuffTree[j].count;
				continue;
			}
		}
		// Take the extracted node as the left node of Huffman tree 
		HuffTree[i].count = HuffTree[pt1].count;
		HuffTree[pt1].parent = i;
		HuffTree[i].lch = pt1;
		min1 = INF;
		for (j = 0; j < i; j++) {
    	// Repeat the operation of extracting the node with the smallest weight 
			if (HuffTree[j].parent != -1)
				continue;
			if (min1 > HuffTree[j].count) {
    
				pt1 = j;
				min1 = HuffTree[j].count;
				continue;
			}
		}
		// Take the extracted node as the right node of Huffman tree 
		HuffTree[i].count += HuffTree[pt1].count;
		HuffTree[i].rch = pt1;
		HuffTree[pt1].parent = i;
	}
	for (i = 0; i < n; i++) {
    	// Huffman code 
		f = i;
		HuffTree[i].bits[0] = 0;
		while (HuffTree[f].parent != -1) {
    
			j = f;
			f = HuffTree[f].parent;
			if (HuffTree[f].lch == j) {
    
				j = strlen(HuffTree[i].bits);
				memmove(HuffTree[i].bits + 1, HuffTree[i].bits, j + 1);
				HuffTree[i].bits[0] = '0';
			}
			else {
    
				j = strlen(HuffTree[i].bits);
				memmove(HuffTree[i].bits + 1, HuffTree[i].bits, j + 1);
				HuffTree[i].bits[0] = '1';
			}
		}
	}
	/* for (int i = 0; i < n; i++) { printf("%d %c %d %d %d %d %s\n", i, HuffTree[i].b, HuffTree[i].count, HuffTree[i].parent, HuffTree[i].lch, HuffTree[i].rch, HuffTree[i].bits); } */
	
	fseek(ifp, 0, SEEK_SET);
	fseek(ofp, 8, SEEK_SET);
	buf[0] = 0;
	pt1 = 8;
	f = 0;
	while (!feof(ifp)) {
    	// Scan original file 
		c = fgetc(ifp);
		//cout << c;
		f++;
		for (i = 0; i < n; i++) {
    
			if (c == HuffTree[i].b)
				break;
		}
		//cout << HuffTree[i].bits<<" ";
		strcat(buf, HuffTree[i].bits);
		j = strlen(buf);
		c = 0;
		while (j >= 8) {
    	// If the length is greater than 8 Split write 
			for (i = 0; i < 8; i++) {
    
				if (buf[i] == '1')c = (c << 1) | 1;
				else c = c << 1;
			}
			fwrite(&c, 1, 1, ofp);	// Write a byte code into the file 
			pt1++;
			strcpy(buf, buf + 8);
			j = strlen(buf);
		}
		if (f == flength)
			break;
	}
	if (j > 0) {
    	// if buf There is still content in and less than a byte , Need to be supplemented 0
		strcat(buf, "00000000");
		for (i = 0; i < 8; i++) {
    
			if (buf[i] == '1')c = (c << 1) | 1;
			else c <<= 1;
		}
		fwrite(&c, 1, 1, ofp);
		pt1++;
	}
	fseek(ofp, 0, SEEK_SET);
	fwrite(&flength, sizeof(long), 1, ofp);
	fwrite(&pt1, sizeof(long), 1, ofp);
	fseek(ofp, pt1, SEEK_SET);
	fwrite(&n, sizeof(long), 1, ofp);
	//cout << pt1 << " " << n << endl;
	for (i = 0; i < n; i++) {
    
		fwrite(&(HuffTree[i].b), 1, 1, ofp);	// Write the representative character of each byte 
		c = strlen(HuffTree[i].bits);
		fwrite(&c, 1, 1, ofp);	// Write the Huffman encoding length of each character 
		j = strlen(HuffTree[i].bits);
		if (j % 8 != 0) {
    	// If not 8 Multiple , Need to be supplemented 0
			for (f = j % 8; f < 8; f++)
				strcat(HuffTree[i].bits, "0");
		}
		while (HuffTree[i].bits[0] != 0) {
    
			c = 0;
			for (j = 0; j < 8; j++) {
    
				if (HuffTree[i].bits[j] == '1')
					c = (c << 1) | 1;
				else
					c = c << 1;
			}
			strcpy(HuffTree[i].bits, HuffTree[i].bits + 8);
			fwrite(&c, 1, 1, ofp);
		}
	}
	long length2 = pt1--;
	double div = ((double)length1 - (double)length2) / (double)length1;
	fclose(ifp);
	fclose(ofp);
	printf("\n\t Compressed file successfully !\n");
	printf("\t The compression ratio is  %f%%\n\n", div * 100);
}

void uncompress() {
    	// decompression 
	init();
	char filename[255], outputfile[255], buf[255], bx[255];
	unsigned char c;
	long i, j, m, n, f, p, l;
	long flength;
	FILE* ifp, * ofp;
	printf("\t Please enter the file name that needs to be decompressed :");
	//getchar();
	scanf("%s", &filename);
	//gets_s(filename);
	ifp = fopen(filename, "rb");
	if (ifp == NULL) {
    
		printf("\n\t File opening failure !\n");
		return;
	}
	printf("\t Will unzip in the current directory , Please enter the unzipped file name, including the extension :");
	scanf("%s", &outputfile);
	//gets_s(outputfile);
	ofp = fopen(outputfile, "wb");
	if (ofp == NULL) {
    
		printf("\n\t Failed to unzip file !\n");
		return;
	}

	fseek(ifp, 0, SEEK_SET);

	for (int i = 0; i < 9; i++) {
    
		fread(&c, 1, 1, ifp);
	}

	fseek(ifp, 0, SEEK_SET);
	fread(&flength, sizeof(long), 1, ifp);
	fread(&f, sizeof(long), 1, ifp);
	fseek(ifp, f, SEEK_SET);
	fread(&n, sizeof(long), 1, ifp);
	//cout << flength<<" "<<f << " " << n << endl;
	for (i = 0; i < n; i++) {
    	// structure Huffman Treelike n Leaf node 
		fread(&HuffTree[i].b, 1, 1, ifp);
		fread(&c, 1, 1, ifp);
		p = (long)c;
		HuffTree[i].count = p;
		HuffTree[i].bits[0] = 0;

		if (p % 8 > 0)
			m = p / 8 + 1;
		else
			m = p / 8;
		for (j = 0; j < m; j++) {
    
			fread(&c, 1, 1, ifp);
			f = c;	// Hexadecimal to decimal 
			itoa(f, buf, 2);
			f = strlen(buf);
			for (l = 8; l > f; l--) {
    
				strcat(HuffTree[i].bits, "0");
			}
			strcat(HuffTree[i].bits, buf);
		}
		HuffTree[i].bits[p] = 0;
	}
	HuffNode tmp;
	for (i = 0; i < n; i++) {
    	// Sort according to the length of Huffman code 
		for (j = i+1; j < n; j++) {
    
			if (strlen(HuffTree[i].bits) > strlen(HuffTree[j].bits)) {
    
				tmp = HuffTree[i];
				HuffTree[i] = HuffTree[j];
				HuffTree[j] = tmp;
			}
		}
	}
	p = strlen(HuffTree[n - 1].bits);
	fseek(ifp, 8, SEEK_SET);
	m = 0;
	bx[0] = 0;
	while (1) {
    	// decode 
		while (strlen(bx) < (unsigned int)p) {
    
			fread(&c, 1, 1, ifp);
			f = c;
			itoa(f, buf, 2);
			f = strlen(buf);
			for (l = 8; l > f; l--) {
    
				strcat(bx, "0");
			}
			strcat(bx, buf);
		}
		for (i = 0; i < n; i++) {
    
			if (memcmp(HuffTree[i].bits, bx, HuffTree[i].count) == 0)
				break;
		}
		strcpy(bx, bx + HuffTree[i].count);
		c = HuffTree[i].b;
		fwrite(&c, 1, 1, ofp);
		m++;
		if (m == flength) break;
	}
	fclose(ifp);
	fclose(ofp);
	printf("\n\t Unzip file successfully !\n");
	if (m == flength)
		printf("\t The extracted file is the same as the original file !\n\n");
	else
		printf("\t The extracted file is different from the original file !\n\n");
	return;
}

int main() {
    
	int c;
	while (1) {
    
		printf("\t_________________________________________\n");
		printf("\n");
		printf("\t *  Compress 、 decompression   Gadget  * \n");
		printf("\t_________________________________________\n");
		printf("\t_________________________________________\n");
		printf("\t| |\n");
		printf("\t|1. Compress  |\n");
		printf("\t|2. decompression  |\n");
		printf("\t|0. sign out  |\n");
		printf("\n");
		printf("\t  explain :(1) Huffman coding \n");
		printf("\t (2) Applicable to character text files \n");
		printf("\n");
		do {
    
			printf("\n\t* Please select the corresponding function (0-2):");
			c = getchar();
			//printf("%c\n", c);
			if (c != '0' && c != '1' && c != '2') {
    
				printf("\[email protected][email protected] Please check that the number you entered is 0~2 Between !\n");
				printf("\t Please input one side again !\n");
			}
		} while (c != '0' && c != '1' && c != '2');
		if (c == '1') {
    
			compress();
		}
		else if (c == '2') {
    
			uncompress();
		}
		else {
    
			printf("\t Welcome to use this tool again ^_^\n");
			exit(0);
		}
		system("pause");
		system("cls");
	}


	return 0;
}

原网站

版权声明
本文为[moyangxian]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/188/202207070709201367.html