【数据结构】哈夫曼树实现文件的压缩与解压缩

  • Post author:
  • Post category:其他



哈夫曼树实现文件的压缩与解压缩

被哈夫曼树折磨地好痛苦啊!

在这里插入图片描述

如果c是无符号char型的话,直接ifs.read(c,1)会造成光标闪烁。

二进制文件困扰了很久。

一次读入或写入一个字节,速度死慢,所以稍微理解了缓冲区的重要性啊。

#pragma once
#include<iostream>
#include<queue>
#include<unordered_map>
#include<fstream>
#include<bitset>
#include<string>
#include<iomanip>
#include<math.h>
#include<time.h>
using namespace std;
struct huffmanNode
{
	unsigned char element;
	long long weight;
	huffmanNode* leftChild, * rightChild;
	huffmanNode(unsigned char element = NULL, long long weight = 0, huffmanNode* leftChild = NULL,
		huffmanNode* rightChild = NULL) {
		this->element = element;
		this->weight = weight;
		this->leftChild = leftChild;
		this->rightChild = rightChild;
	}

};
struct cmp
{
	bool operator()(huffmanNode* x, huffmanNode* y) {
		return x->weight > y->weight;
	}

};
class FileCompress
{
private:
	unordered_map<unsigned char, long long> map_weight;
	unordered_map<unsigned char, string> map_code;
	unordered_map<string, unsigned char> map_change;
	huffmanNode* root;
	long long BeforeSize;
	long long AfterSize;
	string CompressFilepath;//压缩后文件的路径
	double CompressTime;
	double UnCompressTime;
	void dispose(huffmanNode* t);
	void code(huffmanNode* t, string str);

public:
	FileCompress();
	~FileCompress();
	void compress(const char*path);//压缩
	void uncompress(const char*path);//解压
	void PrintBeforeSize();
	void PrintAfterSize();
	void compressTime();
	void unCompressTime();

};


#include "FileCompress.h"

void FileCompress::dispose(huffmanNode* t)
{
	if (t != NULL) {
		dispose(t->leftChild);
		dispose(t->rightChild);
		delete t;
	}
}

void FileCompress::code(huffmanNode* t, string str)
{
	if (t != NULL) {
		if (t->leftChild == NULL && t->rightChild == NULL) {
			map_code[t->element] = str;
			map_change[str] = t->element;
		}
		else {
			code(t->leftChild, str + "0");
			code(t->rightChild, str + "1");
		}
	}

}

FileCompress::FileCompress()
{
	BeforeSize = 0;
	AfterSize = 0;
	root = NULL;
	CompressFilepath = "";

}

FileCompress::~FileCompress()
{
	dispose(root);
	root = NULL;
}

void FileCompress::compress(const char* path)
{
	if (root != NULL) {
		dispose(root);
		root = NULL;
		map_weight.clear();
		map_code.clear();
		map_change.clear();
	}
	double start = clock();
	ifstream ifs;
	ifs.open(path, ios::binary);
	unsigned char buf[1024];
	BeforeSize = 0;
	int n;
	while (!ifs.eof()) {
		ifs.read((char*)&buf, 1024);
		n = ifs.gcount();
		for (int i = 0; i < n; i++) {
			map_weight[buf[i]]++;

		}
		BeforeSize += n;
	}

	ifs.close();
	priority_queue<huffmanNode*, vector<huffmanNode*>, cmp> q;
	unordered_map<unsigned char, long long>::iterator
		it = map_weight.begin();
	for (; it != map_weight.end(); it++) {
		huffmanNode* node = new huffmanNode(it->first, it->second);
		q.push(node);
	}

	huffmanNode* w, * x, * y;
	while (q.size() > 1) {
		x = q.top();
		q.pop();
		y = q.top();
		q.pop();
		long long weight = x->weight + y->weight;
		w = new huffmanNode(0, weight, x, y);
		q.push(w);
	}
	root = q.top();
	code(root, "");


	string name = path;
	size_t index = name.rfind('.');
	CompressFilepath = name.substr(0, index);
	CompressFilepath += ".huf";




	ofstream ofs;
	ofs.open(CompressFilepath, ios::binary);
	unsigned char value = 0;
	int pos = 0;

	unsigned char buf2[1024];
	int t = 0;
	ifs.open(path, ios::binary);
	string str;
	while (!ifs.eof()) {
		ifs.read((char*)&buf, 1024);
		n = ifs.gcount();
		for (int j = 0; j < n; j++) {
			str = map_code[buf[j]];
			for (size_t i = 0; i < str.length(); i++) {
				value = value * 2 + (str[i] - '0');
				pos++;
				if (pos == 8) {
					buf2[t] = value;
					t++;
					if (t == 1024) {
						ofs.write((char*)&buf2, 1024);
						t = 0;
					}
					pos = 0;
					value = 0;
				}
			}
		}
	}
	if (0<pos < 8) {
		value <<= (8 - pos);
		buf2[t] = value;
		t++;
		ofs.write((char*)&buf2, t);

	}

	ifs.close();
	ofs.close();
	double end = clock();
	CompressTime = end - start;
}

void FileCompress::uncompress(const char* path)
{
	double start = clock();
	ifstream ifs;
	ifs.open(CompressFilepath, ios::binary);
	ofstream ofs;
	ofs.open(path, ios::binary);
	unsigned char buf[1024];
	unsigned char buf2[1024];
	int t = 0;
	long long count = 0;
	string coding = "";
	AfterSize = 0;
	int n;
	while (!ifs.eof()) {
		ifs.read((char*)&buf, 1024);
		n = ifs.gcount();
		AfterSize += n;
		for (int j = 0; j < n; j++) {
			string str = bitset<8>(buf[j]).to_string();
			for (size_t i = 0; i < str.length(); i++) {
				coding += str[i];
				if (map_change.count(coding)) {
					buf2[t] = map_change[coding];
					t++;
					count++;
					if (t == 1024) {
						ofs.write((char*)&buf2, 1024);
						t = 0;
					}
					if (count == BeforeSize) {
						if (t != 1024) {
							ofs.write((char*)&buf2, t);
							
						}
						ifs.close();
						ofs.close();
						double end = clock();
						UnCompressTime = end - start;
						return;
					}

					
					coding = "";
				}
			}


		}

		

	}
	
}

void FileCompress::PrintBeforeSize()
{
	double t = BeforeSize;
	cout << fixed << setprecision(2) << setiosflags(ios::right);
	if (t > pow(2, 30)) {
		cout << "原始文件大小:" << t/pow(2,30) << "GB\n";
	}
	else if (t > pow(2, 20)) {
		cout << "原始文件大小:" << t / pow(2, 20) << "MB\n";
	}
	else if(t > pow(2, 10)) {
		cout << "原始文件大小:" << t / pow(2, 10) << "KB\n";
	}
	else {
		cout << "原始文件大小:" << t << "B\n";
	}

}

void FileCompress::PrintAfterSize()
{
	double t = AfterSize;
	cout << fixed << setprecision(2) << setiosflags(ios::right);
	if (t > pow(2, 30)) {
		cout << "压缩后文件大小:" << t / pow(2, 30) << "GB\n";
	}
	else if (t > pow(2, 20)) {
		cout << "压缩后文件大小:" << t / pow(2, 20) << "MB\n";
	}
	else if (t > pow(2, 10)) {
		cout << "压缩后文件大小:" << t / pow(2, 10) << "KB\n";
	}
	else {
		cout << "压缩后文件大小:" << t << "B\n";
	}
}

void FileCompress::compressTime()
{
	cout << fixed << setprecision(2) << setiosflags(ios::right);
	if (CompressTime > 60 * 1000) {
		cout << "压缩时间:" << CompressTime / 60 * 1000 << "分钟\n";
	}
	else if(CompressTime > 1000) {
		cout << "压缩时间:" << CompressTime / 1000 << "秒\n";
	}
	else {
		cout << "压缩时间:" << CompressTime << "毫秒\n";
	}
}

void FileCompress::unCompressTime()
{
	cout << fixed << setprecision(2) << setiosflags(ios::right);
	if (UnCompressTime > 60 * 1000) {
		cout << "解压时间:" << UnCompressTime / 60 * 1000 << "分钟\n";
	}
	else if (UnCompressTime > 1000) {
		cout << "解压时间:" << UnCompressTime / 1000 << "秒\n";
	}
	else {
		cout << "解压时间:" << UnCompressTime<< "毫秒\n";
	}
}


#include<iostream>
using namespace std;
#include"FileCompress.h"
int main() {
	FileCompress f;
	f.compress("D:\\1.jpg");
	f.uncompress("E:\\1.jpg");
	cout << "图片压缩\n";
	f.PrintBeforeSize();
	f.PrintAfterSize();
	f.compressTime();
	f.unCompressTime();
	cout << "\n";
	
	cout << "音频压缩\n";
	f.compress("D:\\喜剧之王.mp3");
	f.uncompress("E:\\喜剧之王.mp3");
	f.PrintBeforeSize();
	f.PrintAfterSize();
	f.compressTime();
	f.unCompressTime();
	cout << "\n";

	cout << "文本压缩\n";
	f.compress("D:\\test.txt");
	f.uncompress("E:\\test.txt");
	f.PrintBeforeSize();
	f.PrintAfterSize();
	f.compressTime();
	f.unCompressTime();
	

	return 0;
}

在这里插入图片描述

在这里插入图片描述

在这里插入图片描述


解压的时候应该通过哈夫曼树来找对应的编码,我使用的是map保存对应的编码->字符,这样是不可取的,但是我懒得改了!

应该是我的存储结构选的不好,导致压缩上M文件就很耗时间

最后的结果压缩也没好到哪去啊

确实I/O读写是很耗时间的。

还有待改进啊!



版权声明:本文为weixin_48180029原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。